Skip to content

CrewAI

Test CrewAI agents, crews, and tasks with Tenro.

Experimental

This framework integration is experimental. Examples may break when the framework updates. See Compatibility for support definitions.

Tested with: crewai==0.140

What you'll use

Decorator Purpose
@link_tool Your custom tools (search, database, APIs)
@link_agent Entry point for tracing

No @link_llm needed. Tenro intercepts LLM calls at the HTTP level automatically.

Customer support example

A customer support crew that searches a knowledge base and generates responses.

"""Customer Support: Testing knowledge base retrieval with CrewAI.

NOTE: CrewAI uses ReAct text parsing, not OpenAI's native `tool_calls` structure.
Unlike LangChain/OpenAI SDK, CrewAI expects "Action:" and "Action Input:" in the
LLM's text output. Simulated responses must match this format.
"""

from __future__ import annotations

import json

from examples.experimental.crewai.myapp.agents import CustomerSupportAgent, search_kb

from tenro import Provider
from tenro.simulate import agent, llm, tool
from tenro.testing import tenro


def react_action(tool_name: str, tool_input: dict) -> str:
    """Format a ReAct action that CrewAI's parser understands."""
    return (
        "Thought: I should search the knowledge base for refund information.\n"
        f"Action: {tool_name}\n"
        f"Action Input: {json.dumps(tool_input)}\n"
    )


def react_final(answer: str) -> str:
    """Format a ReAct final answer that CrewAI's parser understands."""
    return f"Thought: I now know the final answer.\nFinal Answer: {answer}\n"


@tenro
def test_customer_support_answers_question() -> None:
    """Test customer support agent uses knowledge base and LLM."""
    tool.simulate(search_kb.func, result="Full refunds within 30 days.")

    llm.simulate(
        Provider.OPENAI,
        responses=[
            react_action("Search Knowledge Base", {"query": "refund"}),
            react_final("You can get a full refund within 30 days of purchase."),
        ],
    )

    result = CustomerSupportAgent().run("How do I get a refund?")

    assert result == "You can get a full refund within 30 days of purchase."
    agent.verify(CustomerSupportAgent)
    llm.verify_many(Provider.OPENAI, count=2)
    tool.verify_many(search_kb.func, count=1)

RAG pipeline example

A retrieval-augmented generation crew with document search.

"""RAG Pipeline: Testing document retrieval with CrewAI.

NOTE: CrewAI uses ReAct text parsing, not OpenAI's native `tool_calls` structure.
Unlike LangChain/OpenAI SDK, CrewAI expects "Action:" and "Action Input:" in the
LLM's text output. Simulated responses must match this format.
"""

from __future__ import annotations

import json

from examples.experimental.crewai.myapp.agents import RAGPipeline, fetch_docs

from tenro import Provider
from tenro.simulate import agent, llm, tool
from tenro.testing import tenro


def react_action(tool_name: str, tool_input: dict) -> str:
    """Format a ReAct action that CrewAI's parser understands."""
    return (
        "Thought: I should fetch relevant documents.\n"
        f"Action: {tool_name}\n"
        f"Action Input: {json.dumps(tool_input)}\n"
    )


def react_final(answer: str) -> str:
    """Format a ReAct final answer that CrewAI's parser understands."""
    return f"Thought: I now know the final answer.\nFinal Answer: {answer}\n"


@tenro
def test_rag_pipeline_synthesizes_answer() -> None:
    """Test RAG pipeline fetches documents and generates answer."""
    tool.simulate(
        fetch_docs.func,
        result="Machine learning uses algorithms to learn.\nDeep learning is a subset of ML.",
    )

    llm.simulate(
        Provider.OPENAI,
        responses=[
            react_action("Fetch Documents", {"topic_query": "AI"}),
            react_final("Machine learning is a field where algorithms learn patterns."),
        ],
    )

    result = RAGPipeline().run("What is machine learning?", "AI")

    assert result == "Machine learning is a field where algorithms learn patterns."
    agent.verify(RAGPipeline)
    llm.verify_many(Provider.OPENAI, count=2)
    tool.verify_many(fetch_docs.func, count=1)

Multi-turn conversation example

A crew handling multi-turn conversations with context.

"""Multi-Turn Conversation: Testing sequential LLM calls with CrewAI.

NOTE: CrewAI uses ReAct text parsing, not OpenAI's native `tool_calls` structure.
Unlike LangChain/OpenAI SDK, CrewAI expects "Final Answer:" in the LLM's text output.
Simulated responses must match this format.
"""

from __future__ import annotations

from tenro import Provider
from tenro.simulate import llm
from tenro.testing import tenro


def react_final(answer: str) -> str:
    """Format a ReAct final answer that CrewAI's parser understands."""
    return f"Thought: I know the answer.\nFinal Answer: {answer}\n"


@tenro
def test_simple_crewai_task() -> None:
    """Test a simple CrewAI task without memory (no chromadb)."""
    from crewai import Agent, Crew, Task

    llm.simulate(
        Provider.OPENAI,
        responses=[react_final("Python lists are created with square brackets: [1, 2, 3]")],
    )

    # memory=False: CrewAI's memory triggers chromadb which makes additional
    # unpredictable LLM calls for embeddings, breaking deterministic simulation
    agent = Agent(
        role="Coding Assistant",
        goal="Help with Python questions",
        backstory="You are a Python expert.",
        llm="gpt-4o-mini",
        memory=False,
    )

    task = Task(
        description="How do I create a list in Python?",
        expected_output="A helpful response about Python lists",
        agent=agent,
    )

    crew = Crew(agents=[agent], tasks=[task], memory=False)
    result = crew.kickoff()

    assert str(result) == "Python lists are created with square brackets: [1, 2, 3]"
    # No agent.verify() - this test uses inline CrewAI Agent, not @link_agent decorated class
    llm.verify(Provider.OPENAI)

Key patterns

Agentic loop (LLM calls tool)

When the LLM decides to call a tool, then responds with the result:

from tenro import Provider, ToolCall
from tenro.simulate import llm, tool
# Assuming search_knowledge_base is defined with @link_tool("search_kb")

# 1. Set up tool result (use function reference)
tool.simulate(search_knowledge_base, result={"content": "Full refunds within 30 days."})

# 2. Set up LLM responses: first triggers tool, second is final response
llm.simulate(Provider.OPENAI, responses=[
    ToolCall(search_knowledge_base, query="refund policy"),
    "You can get a full refund within 30 days.",
])

Multi-agent crews

from tenro import Provider, ToolCall
from tenro.simulate import llm
# Sequential responses for different crew members
llm.simulate(Provider.OPENAI, responses=[
    ToolCall(research, topic="AI"),
    "Agent 1: Here's my research.",
    "Agent 2: Based on the research..."
])

Verifying

from tenro import Provider
from tenro.simulate import llm, tool
tool.verify_many(search_knowledge_base, count=1)
llm.verify_many(Provider.OPENAI, count=2)  # Tool request + final answer

See also