CrewAI¶

Test CrewAI agents, crews, and tasks with Tenro.

Experimental

This framework integration is experimental. Examples may break when the framework updates. See Compatibility for support definitions.

Tested with: crewai==0.140

What you'll use¶

Decorator	Purpose
`@link_tool`	Your custom tools (search, database, APIs)
`@link_agent`	Entry point for tracing

No @link_llm needed. Tenro intercepts LLM calls at the HTTP level automatically.

Customer support example¶

A customer support crew that searches a knowledge base and generates responses.

"""Customer Support: Testing knowledge base retrieval with CrewAI.

NOTE: CrewAI uses ReAct text parsing, not OpenAI's native `tool_calls` structure.
Unlike LangChain/OpenAI SDK, CrewAI expects "Action:" and "Action Input:" in the
LLM's text output. Simulated responses must match this format.
"""

from __future__ import annotations

import json

from examples.experimental.crewai.myapp.agents import CustomerSupportAgent, search_kb

from tenro import Provider
from tenro.simulate import agent, llm, tool
from tenro.testing import tenro


def react_action(tool_name: str, tool_input: dict) -> str:
    """Format a ReAct action that CrewAI's parser understands."""
    return (
        "Thought: I should search the knowledge base for refund information.\n"
        f"Action: {tool_name}\n"
        f"Action Input: {json.dumps(tool_input)}\n"
    )


def react_final(answer: str) -> str:
    """Format a ReAct final answer that CrewAI's parser understands."""
    return f"Thought: I now know the final answer.\nFinal Answer: {answer}\n"


@tenro
def test_customer_support_answers_question() -> None:
    """Test customer support agent uses knowledge base and LLM."""
    tool.simulate(search_kb.func, result="Full refunds within 30 days.")

    llm.simulate(
        Provider.OPENAI,
        responses=[
            react_action("Search Knowledge Base", {"query": "refund"}),
            react_final("You can get a full refund within 30 days of purchase."),
        ],
    )

    result = CustomerSupportAgent().run("How do I get a refund?")

    assert result == "You can get a full refund within 30 days of purchase."
    agent.verify(CustomerSupportAgent)
    llm.verify_many(Provider.OPENAI, count=2)
    tool.verify_many(search_kb.func, count=1)

RAG pipeline example¶

A retrieval-augmented generation crew with document search.

"""RAG Pipeline: Testing document retrieval with CrewAI.

NOTE: CrewAI uses ReAct text parsing, not OpenAI's native `tool_calls` structure.
Unlike LangChain/OpenAI SDK, CrewAI expects "Action:" and "Action Input:" in the
LLM's text output. Simulated responses must match this format.
"""

from __future__ import annotations

import json

from examples.experimental.crewai.myapp.agents import RAGPipeline, fetch_docs

from tenro import Provider
from tenro.simulate import agent, llm, tool
from tenro.testing import tenro


def react_action(tool_name: str, tool_input: dict) -> str:
    """Format a ReAct action that CrewAI's parser understands."""
    return (
        "Thought: I should fetch relevant documents.\n"
        f"Action: {tool_name}\n"
        f"Action Input: {json.dumps(tool_input)}\n"
    )


def react_final(answer: str) -> str:
    """Format a ReAct final answer that CrewAI's parser understands."""
    return f"Thought: I now know the final answer.\nFinal Answer: {answer}\n"


@tenro
def test_rag_pipeline_synthesizes_answer() -> None:
    """Test RAG pipeline fetches documents and generates answer."""
    tool.simulate(
        fetch_docs.func,
        result="Machine learning uses algorithms to learn.\nDeep learning is a subset of ML.",
    )

    llm.simulate(
        Provider.OPENAI,
        responses=[
            react_action("Fetch Documents", {"topic_query": "AI"}),
            react_final("Machine learning is a field where algorithms learn patterns."),
        ],
    )

    result = RAGPipeline().run("What is machine learning?", "AI")

    assert result == "Machine learning is a field where algorithms learn patterns."
    agent.verify(RAGPipeline)
    llm.verify_many(Provider.OPENAI, count=2)
    tool.verify_many(fetch_docs.func, count=1)

Multi-turn conversation example¶

A crew handling multi-turn conversations with context.

"""Multi-Turn Conversation: Testing sequential LLM calls with CrewAI.

NOTE: CrewAI uses ReAct text parsing, not OpenAI's native `tool_calls` structure.
Unlike LangChain/OpenAI SDK, CrewAI expects "Final Answer:" in the LLM's text output.
Simulated responses must match this format.
"""

from __future__ import annotations

from tenro import Provider
from tenro.simulate import llm
from tenro.testing import tenro


def react_final(answer: str) -> str:
    """Format a ReAct final answer that CrewAI's parser understands."""
    return f"Thought: I know the answer.\nFinal Answer: {answer}\n"


@tenro
def test_simple_crewai_task() -> None:
    """Test a simple CrewAI task without memory (no chromadb)."""
    from crewai import Agent, Crew, Task

    llm.simulate(
        Provider.OPENAI,
        responses=[react_final("Python lists are created with square brackets: [1, 2, 3]")],
    )

    # memory=False: CrewAI's memory triggers chromadb which makes additional
    # unpredictable LLM calls for embeddings, breaking deterministic simulation
    agent = Agent(
        role="Coding Assistant",
        goal="Help with Python questions",
        backstory="You are a Python expert.",
        llm="gpt-4o-mini",
        memory=False,
    )

    task = Task(
        description="How do I create a list in Python?",
        expected_output="A helpful response about Python lists",
        agent=agent,
    )

    crew = Crew(agents=[agent], tasks=[task], memory=False)
    result = crew.kickoff()

    assert str(result) == "Python lists are created with square brackets: [1, 2, 3]"
    # No agent.verify() - this test uses inline CrewAI Agent, not @link_agent decorated class
    llm.verify(Provider.OPENAI)

Key patterns¶

Agentic loop (LLM calls tool)¶

When the LLM decides to call a tool, then responds with the result:

from tenro import Provider, ToolCall
from tenro.simulate import llm, tool
# Assuming search_knowledge_base is defined with @link_tool("search_kb")

# 1. Set up tool result (use function reference)
tool.simulate(search_knowledge_base, result={"content": "Full refunds within 30 days."})

# 2. Set up LLM responses: first triggers tool, second is final response
llm.simulate(Provider.OPENAI, responses=[
    ToolCall(search_knowledge_base, query="refund policy"),
    "You can get a full refund within 30 days.",
])

Multi-agent crews¶

from tenro import Provider, ToolCall
from tenro.simulate import llm
# Sequential responses for different crew members
llm.simulate(Provider.OPENAI, responses=[
    ToolCall(research, topic="AI"),
    "Agent 1: Here's my research.",
    "Agent 2: Based on the research..."
])

Verifying¶

from tenro import Provider
from tenro.simulate import llm, tool
tool.verify_many(search_knowledge_base, count=1)
llm.verify_many(Provider.OPENAI, count=2)  # Tool request + final answer