Skip to content

Pattern recipes

Common testing patterns with complete, runnable examples.

These patterns are tested in CI and can be copied directly into your project.

Simulating responses

Control what your tools and LLMs return during tests:

"""Pattern: Simulating tool and LLM responses.

Shows how to control what your tools and LLMs return during tests.
"""

from __future__ import annotations

from examples.myapp import (
    SearchAgent,
    TopicConversationAgent,
    call_openai,
    search_documents,
)

from tenro import Provider
from tenro.simulate import llm, tool
from tenro.testing import tenro


@tenro
def test_single_tool_result() -> None:
    """Return the same value every time the tool is called."""
    # Control tool output - same result on every call
    tool.simulate(search_documents, result=["doc1", "doc2", "doc3"])

    # Tool returns simulated data instead of calling real vector DB
    SearchAgent().run("machine learning")

    tool.verify_many(search_documents, count=1)


@tenro
def test_sequential_tool_results() -> None:
    """Return different values on each successive call."""
    # First call returns empty, second returns results (simulate retry logic)
    tool.simulate(
        search_documents,
        results=[[], ["doc1", "doc2"]],  # Different result each call
    )

    # Each call gets the next result in sequence
    search_documents("first query")
    search_documents("retry query")

    # Verify both calls happened
    tool.verify_many(search_documents, count=2)


@tenro
def test_single_llm_response() -> None:
    """Control what the LLM returns."""
    # Every OpenAI call returns this response
    llm.simulate(Provider.OPENAI, response="The document discusses...")

    # Call the LLM function
    call_openai("What does the document say?")

    llm.verify(Provider.OPENAI, output_contains="discusses")


@tenro
def test_sequential_llm_responses() -> None:
    """Different responses for multi-turn conversations."""
    # Simulate a back-and-forth conversation
    llm.simulate(
        Provider.OPENAI,
        responses=[
            "Let me search for that.",
            "Based on the documents, the answer is...",
            "Is there anything else I can help with?",
        ],
    )

    # Run the conversation agent - each LLM call gets the next response
    TopicConversationAgent().run("machine learning")

    # Each LLM call gets the next response in the sequence
    llm.verify_many(Provider.OPENAI, at_least=1)

Verifying calls

Verify your agent called the right tools:

"""Pattern: Verifying tool and LLM calls.

Shows how to verify your tools and LLMs were called the expected number of times.
Uses realistic LLM-driven agent patterns.
"""

from __future__ import annotations

from examples.myapp import (
    DataAgent,
    WeatherAgent,
    get_weather,
    search_database,
)

from tenro import Provider, ToolCall
from tenro.simulate import llm, tool
from tenro.testing import tenro


@tenro
def test_exact_call_count() -> None:
    """Verify a tool was called exactly N times."""
    tool.simulate(get_weather, result={"temp": 72, "condition": "sunny"})
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("get_weather", city="NYC")],
            "NYC is 72°F and sunny.",
        ],
    )

    WeatherAgent().run("What's the weather in NYC?")

    tool.verify_many(get_weather, count=1)
    llm.verify_many(Provider.OPENAI, count=2)


@tenro
def test_minimum_calls() -> None:
    """Verify at least N calls were made."""
    tool.simulate(get_weather, result={"temp": 70, "condition": "clear"})
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("get_weather", city="A")],
            "Weather checked.",
        ],
    )

    WeatherAgent().run("Check weather")

    tool.verify_many(get_weather, at_least=1)
    llm.verify_many(Provider.OPENAI, at_least=1)


@tenro
def test_maximum_calls() -> None:
    """Verify no more than N calls were made."""
    tool.simulate(search_database, result=[{"id": 1}])
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("search_database", query="test")],
            "Found 1 result.",
        ],
    )

    DataAgent().run("Search for test")

    tool.verify_many(search_database, at_most=5)
    llm.verify_many(Provider.OPENAI, at_most=5)


@tenro
def test_call_count_range() -> None:
    """Verify calls fall within a range."""
    tool.simulate(get_weather, result={"temp": 75})
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("get_weather", city="LA")],
            "LA is 75°F.",
        ],
    )

    WeatherAgent().run("LA weather?")

    tool.verify_many(get_weather, at_least=1, at_most=5)
    llm.verify_many(Provider.OPENAI, at_least=1, at_most=5)


@tenro
def test_verify_at_least_once() -> None:
    """Verify a tool was called at least once (1 or more).

    Use verify() when you only care that something was called,
    not the exact number of times.
    """
    tool.simulate(get_weather, result={"temp": 72})
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("get_weather", city="NYC")],
            "It's 72°F.",
        ],
    )

    WeatherAgent().run("NYC weather?")

    # verify() = at least once (passes if called 1, 2, 3, ... times)
    tool.verify(get_weather)
    llm.verify(Provider.OPENAI)


@tenro
def test_verify_exactly_once() -> None:
    """Verify a tool was called exactly once (not 0, not 2+).

    Use verify_many(count=1) when you need to assert the precise count.
    """
    tool.simulate(get_weather, result={"temp": 72})
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("get_weather", city="NYC")],
            "It's 72°F.",
        ],
    )

    WeatherAgent().run("NYC weather?")

    # verify_many(count=1) = exactly once (fails if called 0 or 2+ times)
    tool.verify_many(get_weather, count=1)
    llm.verify_many(Provider.OPENAI, count=2)


@tenro
def test_verify_never_called() -> None:
    """Verify a tool was never called."""
    # LLM answers directly without using tools
    llm.simulate(Provider.OPENAI, response="I don't need to check the weather.")

    WeatherAgent().run("What color is the sky?")

    tool.verify_never(get_weather)
    llm.verify(Provider.OPENAI)

Verifying call sequence

Ensure operations happen in the correct order:

"""Pattern: Verifying call sequences.

Shows how to ensure tools and agents are called in the correct order.

Note: This demonstrates a pipeline/orchestration pattern where agents
coordinate in a fixed sequence. This is different from LLM-driven tool
calling - it tests that pipeline stages execute in order.
"""

from __future__ import annotations

from tenro import link_agent, link_tool
from tenro.simulate import agent, tool
from tenro.testing import tenro


# Tools for the pipeline
@link_tool
def validate_input(data: dict) -> bool:
    """Validate input before processing."""
    return True


@link_tool
def process_data(data: dict) -> dict:
    """Process the validated data."""
    return {"processed": True}


@link_tool
def save_result(result: dict) -> str:
    """Save the processed result."""
    return "saved"


# Pipeline agents (orchestration pattern)
@link_agent
class ValidationAgent:
    """Validates input data."""

    def run(self, data: dict) -> bool:
        return validate_input(data)


@link_agent
class ProcessingAgent:
    """Processes validated data."""

    def run(self, data: dict) -> dict:
        return process_data(data)


@link_agent
class PersistenceAgent:
    """Saves processed results."""

    def run(self, result: dict) -> str:
        return save_result(result)


@link_agent
class PipelineAgent:
    """Orchestrates the full pipeline: validate → process → save."""

    def run(self, data: dict) -> str:
        ValidationAgent().run(data)
        result = ProcessingAgent().run(data)
        return PersistenceAgent().run(result)


@tenro
def test_pipeline_tools_execute_in_order() -> None:
    """Verify pipeline tools are called in sequence."""
    tool.simulate(validate_input, result=True)
    tool.simulate(process_data, result={"processed": True})
    tool.simulate(save_result, result="saved")

    result = PipelineAgent().run({"input": "data"})

    assert result == "saved"
    tool.verify_many(validate_input, count=1)
    tool.verify_many(process_data, count=1)
    tool.verify_many(save_result, count=1)


@tenro
def test_pipeline_agents_called() -> None:
    """Verify orchestrator delegates to sub-agents."""
    tool.simulate(validate_input, result=True)
    tool.simulate(process_data, result={"processed": True})
    tool.simulate(save_result, result="saved")

    PipelineAgent().run({"input": "data"})

    agent.verify(PipelineAgent)
    agent.verify(ValidationAgent)
    agent.verify(ProcessingAgent)
    agent.verify(PersistenceAgent)

Verifying content

Check what your LLM produced:

"""Pattern: Verifying LLM response content.

Shows how to check what the LLM responded in multi-turn conversations.
"""

from __future__ import annotations

from examples.myapp import TopicConversationAgent

from tenro import Provider
from tenro.simulate import llm
from tenro.testing import tenro


@tenro
def test_check_first_response() -> None:
    """Verify content in the first LLM response (default)."""
    llm.simulate(
        Provider.OPENAI,
        responses=[
            "Machine learning is a subset of AI...",
            "For example, spam filters use ML to...",
            "You're welcome! Let me know if you have more questions.",
        ],
    )

    TopicConversationAgent().run("machine learning")

    # Default: checks the first response
    llm.verify(output_contains="Machine learning")


@tenro
def test_check_specific_response() -> None:
    """Verify content in a specific response by index."""
    llm.simulate(
        Provider.OPENAI,
        responses=[
            "First response",
            "Second response with ERROR",
            "Third response",
        ],
    )

    TopicConversationAgent().run("topic")

    # Verify the second simulated response contains ERROR
    # Use call_index=None to search any call for the content
    llm.verify(output_contains="ERROR", call_index=None)


@tenro
def test_check_last_response() -> None:
    """Verify content in the last response using negative indexing."""
    llm.simulate(
        Provider.OPENAI,
        responses=[
            "Starting...",
            "Processing...",
            "Task completed successfully!",
        ],
    )

    TopicConversationAgent().run("task")

    # Negative index: -1 is the last response
    llm.verify(output_contains="completed", call_index=-1)


@tenro
def test_check_any_response() -> None:
    """Verify content exists in any response (permissive mode)."""
    llm.simulate(
        Provider.OPENAI,
        responses=[
            "Looking into it...",
            "Found a critical security issue!",
            "Analysis complete.",
        ],
    )

    TopicConversationAgent().run("security audit")

    # call_index=None: matches if ANY response contains the text
    llm.verify(output_contains="security", call_index=None)

Simulating errors

Test error handling and retry logic:

"""Pattern: Simulating errors to test failure handling.

Shows how to simulate failures and test your agent's error recovery.
"""

from __future__ import annotations

import pytest
from examples.myapp import ResilientAgent, call_api

from tenro.simulate import tool
from tenro.testing import tenro


@tenro
def test_error_then_success() -> None:
    """First call fails, retry succeeds - test recovery logic."""
    tool.simulate(
        call_api,
        results=[
            ConnectionError("Network timeout"),  # First call fails
            {"status": "ok", "data": [1, 2, 3]},  # Retry succeeds
        ],
    )

    result = ResilientAgent().run("fetch data")

    assert result["success"] is True
    tool.verify_many(call_api, count=2)  # Tried twice


@tenro
def test_all_retries_fail() -> None:
    """All retries fail - test error escalation."""
    tool.simulate(
        call_api,
        results=[
            ConnectionError("Attempt 1"),
            ConnectionError("Attempt 2"),
            ConnectionError("Attempt 3"),
        ],
    )

    with pytest.raises(ConnectionError):
        ResilientAgent().run("fetch data")

    tool.verify_many(call_api, count=3)  # All retries exhausted


@tenro
def test_specific_error_types() -> None:
    """Different error types trigger different handling."""
    tool.simulate(
        call_api,
        results=[
            ConnectionError("Connection refused"),
            {"status": "ok"},
        ],
    )

    result = ResilientAgent().run("fetch data")

    assert result["success"] is True
    tool.verify_many(call_api, count=2)

Optional simulations

Handle conditional tool calls:

"""Pattern: Optional simulations for conditional branches.

Use `optional=True` when a tool may or may not be called depending on the code
path. Without this flag, Tenro fails if a simulated tool is never invoked.
"""

from __future__ import annotations

from examples.myapp import (
    check_cache,
    fetch_from_api,
    get_data_with_cache,
)

from tenro.simulate import tool
from tenro.testing import tenro


@tenro
def test_cache_hit_skips_api() -> None:
    """When cache returns data, API is not called."""
    tool.simulate(check_cache, result={"data": "cached"})
    tool.simulate(fetch_from_api, result={"data": "fresh"}, optional=True)

    result = get_data_with_cache("key123", use_cache=True)

    assert result == {"data": "cached"}
    tool.verify_many(check_cache, count=1)
    tool.verify_never(fetch_from_api)


@tenro
def test_cache_miss_calls_api() -> None:
    """When cache returns None, API is called."""
    tool.simulate(check_cache, result=None)
    tool.simulate(fetch_from_api, result={"data": "fresh"})

    result = get_data_with_cache("key123", use_cache=True)

    assert result == {"data": "fresh"}
    tool.verify_many(check_cache, count=1)
    tool.verify_many(fetch_from_api, count=1)


@tenro
def test_cache_bypass_skips_check() -> None:
    """When cache is disabled, cache check is skipped."""
    tool.simulate(check_cache, result={"data": "cached"}, optional=True)
    tool.simulate(fetch_from_api, result={"data": "fresh"})

    result = get_data_with_cache("key123", use_cache=False)

    assert result == {"data": "fresh"}
    tool.verify_never(check_cache)
    tool.verify_many(fetch_from_api, count=1)

Dynamic behaviour

Compute results based on input:

"""Pattern: Dynamic behavior with side_effect.

Shows how to make simulated responses depend on input arguments.
Uses realistic LLM-driven agent patterns.
"""

from __future__ import annotations

from examples.myapp import WeatherAgent, get_weather

from tenro import Provider, ToolCall
from tenro.simulate import llm, tool
from tenro.testing import tenro


@tenro
def test_input_dependent_responses() -> None:
    """Responses vary based on input arguments using side_effect."""

    # side_effect: function receives the same args as the real function
    def weather_by_city(city: str) -> dict:
        weather_data = {
            "San Francisco": {"temp": 65, "condition": "foggy"},
            "Miami": {"temp": 85, "condition": "sunny"},
        }
        return weather_data.get(city, {"temp": 70, "condition": "unknown"})

    tool.simulate(get_weather, side_effect=weather_by_city)
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [ToolCall("get_weather", city="San Francisco")],
            "San Francisco is 65°F and foggy.",
        ],
    )

    result = WeatherAgent().run("What's the weather in San Francisco?")

    assert result == "San Francisco is 65°F and foggy."
    tool.verify_many(get_weather, count=1)
    llm.verify_many(Provider.OPENAI, count=2)


@tenro
def test_side_effect_with_state() -> None:
    """Side effect can maintain state across multiple tool calls."""
    call_count = {"n": 0}

    def counting_weather(city: str) -> dict:
        call_count["n"] += 1
        return {"temp": 70 + call_count["n"], "call_number": call_count["n"]}

    tool.simulate(get_weather, side_effect=counting_weather)
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [
                ToolCall("get_weather", city="NYC"),
                ToolCall("get_weather", city="LA"),
            ],
            "NYC is 71°F, LA is 72°F.",
        ],
    )

    result = WeatherAgent().run("Weather in NYC and LA?")

    assert result == "NYC is 71°F, LA is 72°F."
    tool.verify_many(get_weather, count=2)
    assert call_count["n"] == 2

Verifying never called

Ensure dangerous operations are prevented:

"""Pattern: Verifying tools were never called.

Shows how to ensure dangerous or expensive operations didn't happen.
"""

from __future__ import annotations

from examples.myapp import (
    SafeCleanupAgent,
    SmartCacheAgent,
    delete_all_records,
    fetch_from_api,
    get_cached_data,
)

from tenro import Provider
from tenro.simulate import llm, tool
from tenro.testing import tenro


@tenro
def test_cache_hit_skips_api() -> None:
    """When cache hits, expensive API should never be called."""
    # Cache returns data
    tool.simulate(get_cached_data, result={"data": "cached"})
    # Note: Don't simulate fetch_from_api since cache hit skips it

    SmartCacheAgent().get_data("user:123")

    # Cache was checked
    tool.verify_many(get_cached_data, count=1)
    # API was never called - saved an expensive call!
    tool.verify_never(fetch_from_api)


@tenro
def test_dangerous_operation_not_triggered() -> None:
    """Verify dangerous operations don't happen accidentally."""
    # Note: Don't simulate delete_all_records since unconfirmed mode won't call it

    SafeCleanupAgent().cleanup(confirmed=False)

    # The dangerous operation should never be called without confirmation
    tool.verify_never(delete_all_records)


@tenro
def test_llm_not_called_for_cached_response() -> None:
    """Verify LLM isn't called when response is cached."""
    # This test demonstrates verify_llm_never - no simulation needed
    # when validating that something WASN'T called

    # No LLM calls expected
    llm.verify_never(Provider.OPENAI)

Simulating tool calls

Simulate LLM responses that include tool calls:

"""Pattern: Simulating LLM tool calls with ToolCall and LLMResponse.

Shows how to simulate LLM responses that include tool calls using the type-safe
ToolCall constructor across different providers.

Key concepts:
- ToolCall(func, **args): Create tool call from callable (type-safe, IDE autocomplete)
- ToolCall("name", **args): Create tool call from string name
- responses=[ToolCall(...)]: Single tool call response
- responses=[["text", ToolCall(...)]]: Text + tool call in one response (nested list)
- responses=[LLMResponse([...])] : Explicit ordered blocks (interleaving)
- Verification with llm.verify(), tool.verify(), llm.calls(), tool.calls()

Interleaving note:
  Text blocks in LLMResponse represent the model's reasoning BEFORE/WHILE making
  tool call requests - NOT commentary on tool results. Tool results arrive in a
  separate response after tools execute. Anthropic and Gemini preserve block order;
  OpenAI Chat flattens to content + tool_calls (order lost but still works).
"""

from __future__ import annotations

import unittest

from tenro import Construct, LLMResponse, Provider, ToolCall, link_agent, link_llm, link_tool
from tenro.simulate import llm, tool
from tenro.testing import tenro

# ============================================================================
# APPLICATION CODE - Tools
# ============================================================================


@link_tool("search")
def search(query: str, limit: int = 10) -> list[str]:
    """Search for results."""
    return []


@link_tool("get_weather")
def get_weather(city: str) -> dict:
    """Get weather for a city."""
    return {}


@link_tool("send_email")
def send_email(to: str, subject: str, body: str) -> bool:
    """Send an email."""
    return True


# ============================================================================
# APPLICATION CODE - LLM Client Wrappers
# ============================================================================


@link_llm(Provider.OPENAI)
def call_openai(prompt: str) -> str:
    """Call OpenAI API."""
    from examples.myapp.clients import get_openai_client

    client = get_openai_client()
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content or ""


@link_llm(Provider.ANTHROPIC)
def call_anthropic(prompt: str) -> str:
    """Call Anthropic API."""
    from anthropic.types import TextBlock
    from examples.myapp.clients import get_anthropic_client

    client = get_anthropic_client()
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}],
    )
    text_blocks = [b for b in response.content if isinstance(b, TextBlock)]
    return text_blocks[0].text if text_blocks else ""


@link_llm(Provider.GEMINI)
def call_gemini(prompt: str) -> str:
    """Call Gemini API."""
    from examples.myapp.clients import get_gemini_client

    client = get_gemini_client()
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt,
    )
    return response.text or ""


# ============================================================================
# APPLICATION CODE - Class-based Agents
# ============================================================================


class SearchAgent:
    """Agent that searches and summarizes results using a single LLM provider."""

    def __init__(self, provider: str = "openai"):
        self.provider = provider

    @link_agent("SearchAgent")
    def run(self, query: str) -> str:
        """Execute search and summarize."""
        results = search(query)
        prompt = f"Summarize these results: {results}"

        if self.provider == "openai":
            return call_openai(prompt)
        elif self.provider == "anthropic":
            return call_anthropic(prompt)
        else:
            return call_gemini(prompt)


class MultiToolAgent:
    """Agent that uses multiple tools (search + weather) with configurable LLM."""

    def __init__(self, provider: str = "openai"):
        self.provider = provider

    @link_agent("MultiToolAgent")
    def run(self, task: str) -> str:
        """Execute task using multiple tools."""
        results = search(task)
        weather = get_weather("NYC")
        prompt = f"Task: {task}\nSearch results: {results}\nWeather: {weather}"

        if self.provider == "openai":
            return call_openai(prompt)
        elif self.provider == "anthropic":
            return call_anthropic(prompt)
        else:
            return call_gemini(prompt)


class MultiTurnAgent:
    """Agent that has multi-turn conversations with the LLM."""

    @link_agent("MultiTurnAgent")
    def run(self, topic: str) -> str:
        """Execute multi-turn conversation."""
        r1 = call_openai(f"Start researching: {topic}")
        search("follow up query")
        r2 = call_openai("Continue with the findings")
        return f"{r1} | {r2}"


class DefensiveAgent:
    """Agent that handles unknown tool calls gracefully."""

    @link_agent("DefensiveAgent")
    def run(self, query: str) -> str:
        """Execute with defensive error handling."""
        response = call_openai(query)
        return response


# Instantiate agents for each provider
openai_search_agent = SearchAgent(provider="openai")
anthropic_search_agent = SearchAgent(provider="anthropic")
gemini_search_agent = SearchAgent(provider="gemini")

openai_multi_tool_agent = MultiToolAgent(provider="openai")
anthropic_multi_tool_agent = MultiToolAgent(provider="anthropic")
gemini_multi_tool_agent = MultiToolAgent(provider="gemini")

multi_turn_agent = MultiTurnAgent()
defensive_agent = DefensiveAgent()


# ============================================================================
# PYTEST - OpenAI Examples
# ============================================================================


@tenro
def test_openai_single_tool_call() -> None:
    """OpenAI: Single tool call using ToolCall() helper."""
    llm.simulate(
        Provider.OPENAI,
        responses=[["Summary of results", ToolCall(search, query="AI research")]],
    )
    tool.simulate(search, result=["paper1", "paper2"])

    openai_search_agent.run("Find AI papers")

    llm.verify(Provider.OPENAI)
    tool.verify_many(search, count=1)

    assert len(llm.calls()) == 1
    assert len(tool.calls()) == 1
    assert tool.calls()[0].display_name == "search"


@tenro
def test_openai_multiple_tool_calls() -> None:
    """OpenAI: Multiple tool calls in parallel."""
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [
                "Summary with weather",
                ToolCall(search, query="news"),
                ToolCall(get_weather, city="NYC"),
            ]
        ],
    )
    tool.simulate(search, result=["headline"])
    tool.simulate(get_weather, result={"temp": 72})

    openai_multi_tool_agent.run("Get news and weather")

    tool.verify_many(count=2)
    tool.verify_many(search, count=1)
    tool.verify_many(get_weather, count=1)


@tenro
def test_openai_multi_turn() -> None:
    """OpenAI: Multi-turn with per-response tool calls."""
    llm.simulate(
        Provider.OPENAI,
        responses=[
            ["Starting search", ToolCall(search, query="first")],
            "Final summary",
        ],
    )
    tool.simulate(search, result=["r1"])

    multi_turn_agent.run("research topic")

    llm.verify_many(Provider.OPENAI, count=2)
    tool.verify_many(search, count=1)
    assert len(llm.calls()) == 2


@tenro
def test_openai_with_string_name() -> None:
    """OpenAI: ToolCall with string name."""
    llm.simulate(
        Provider.OPENAI,
        responses=[["Found results", ToolCall(name="search", arguments={"query": "AI"})]],
    )
    tool.simulate(search, result=["result"])

    openai_search_agent.run("Search")

    tool.verify_many(search, count=1)
    llm.verify(Provider.OPENAI)


# ============================================================================
# PYTEST - Anthropic Examples
# ============================================================================


@tenro
def test_anthropic_single_tool_call() -> None:
    """Anthropic: Single tool call using ToolCall() helper."""
    llm.simulate(
        Provider.ANTHROPIC,
        responses=[["Summary of results", ToolCall(search, query="ML papers")]],
    )
    tool.simulate(search, result=["paper1", "paper2"])

    anthropic_search_agent.run("Find ML papers")

    llm.verify(Provider.ANTHROPIC)
    tool.verify_many(search, count=1)

    assert len(llm.calls()) == 1
    assert llm.calls()[0].provider == "anthropic"


@tenro
def test_anthropic_multiple_tool_calls() -> None:
    """Anthropic: Multiple tool calls in a single response."""
    llm.simulate(
        Provider.ANTHROPIC,
        responses=[
            [
                "Results with weather",
                ToolCall(search, query="data"),
                ToolCall(get_weather, city="Paris"),
            ]
        ],
    )
    tool.simulate(search, result=["doc1"])
    tool.simulate(get_weather, result={"temp": 20})

    anthropic_multi_tool_agent.run("Find data with weather")

    llm.verify(Provider.ANTHROPIC)
    tool.verify_many(count=2)


@tenro
def test_anthropic_with_string_name() -> None:
    """Anthropic: ToolCall with string name."""
    llm.simulate(
        Provider.ANTHROPIC,
        responses=[["Search complete", ToolCall(name="search", arguments={"query": "docs"})]],
    )
    tool.simulate(search, result=["doc1"])

    anthropic_search_agent.run("Find docs")

    tool.verify_many(search, count=1)
    llm.verify(Provider.ANTHROPIC)


# ============================================================================
# PYTEST - Gemini Examples
# ============================================================================


@tenro
def test_gemini_single_tool_call() -> None:
    """Gemini: Single tool call using ToolCall() helper."""
    llm.simulate(
        Provider.GEMINI,
        responses=[["Summary of results", ToolCall(search, query="AI trends")]],
    )
    tool.simulate(search, result=["trend1", "trend2"])

    gemini_search_agent.run("Find AI trends")

    llm.verify(Provider.GEMINI)
    tool.verify_many(search, count=1)
    assert llm.calls()[0].provider == "gemini"


@tenro
def test_gemini_multiple_tool_calls() -> None:
    """Gemini: Multiple tool calls in a single response."""
    llm.simulate(
        Provider.GEMINI,
        responses=[
            [
                "Weather and search results",
                ToolCall(search, query="news"),
                ToolCall(get_weather, city="Tokyo"),
            ]
        ],
    )
    tool.simulate(search, result=["news1"])
    tool.simulate(get_weather, result={"temp": 25})

    gemini_multi_tool_agent.run("Find news with weather")

    llm.verify(Provider.GEMINI)
    tool.verify_many(count=2)


@tenro
def test_gemini_with_string_name() -> None:
    """Gemini: ToolCall with string name."""
    llm.simulate(
        Provider.GEMINI,
        responses=[["Results found", ToolCall(name="search", arguments={"query": "gemini"})]],
    )
    tool.simulate(search, result=["result"])

    gemini_search_agent.run("Search gemini")

    tool.verify_many(search, count=1)
    llm.verify(Provider.GEMINI)


# ============================================================================
# UNITTEST - OpenAI Examples
# ============================================================================


class TestOpenAIToolCalls(unittest.TestCase):
    """Unittest examples for OpenAI tool calls."""

    def test_single_tool_call(self) -> None:
        """OpenAI unittest: Single tool call."""
        with Construct() as construct:
            llm.simulate(
                Provider.OPENAI,
                responses=[["Results found", ToolCall(search, query="unittest query")]],
            )
            tool.simulate(search, result=["result"])

            openai_search_agent.run("unittest search")

            tool.verify(search)
            construct.verify_llm(Provider.OPENAI)
            self.assertEqual(len(construct.tool_calls), 1)

    def test_multiple_tool_calls(self) -> None:
        """OpenAI unittest: Multiple tool calls."""
        with Construct() as construct:
            llm.simulate(
                Provider.OPENAI,
                responses=[
                    [
                        "Summary",
                        ToolCall(search, query="test"),
                        ToolCall(get_weather, city="Boston"),
                    ]
                ],
            )
            tool.simulate(search, result=["r"])
            tool.simulate(get_weather, result={"temp": 60})

            openai_multi_tool_agent.run("test task")

            construct.verify_tools(count=2)
            self.assertEqual(len(construct.llm_calls), 1)


# ============================================================================
# UNITTEST - Anthropic Examples
# ============================================================================


class TestAnthropicToolCalls(unittest.TestCase):
    """Unittest examples for Anthropic tool calls."""

    def test_single_tool_call(self) -> None:
        """Anthropic unittest: Single tool call."""
        with Construct() as construct:
            llm.simulate(
                Provider.ANTHROPIC,
                responses=[["Found results", ToolCall(search, query="anthropic test")]],
            )
            tool.simulate(search, result=["result"])

            anthropic_search_agent.run("search anthropic")

            tool.verify(search)
            construct.verify_llm(Provider.ANTHROPIC)

    def test_with_string_name(self) -> None:
        """Anthropic unittest: ToolCall with string name."""
        with Construct() as construct:
            llm.simulate(
                Provider.ANTHROPIC,
                responses=[["Done", ToolCall(name="search", arguments={"query": "data"})]],
            )
            tool.simulate(search, result=["data1"])

            anthropic_search_agent.run("find data")

            tool.verify(search)
            self.assertEqual(construct.llm_calls[0].provider, "anthropic")


# ============================================================================
# UNITTEST - Gemini Examples
# ============================================================================


class TestGeminiToolCalls(unittest.TestCase):
    """Unittest examples for Gemini tool calls."""

    def test_single_tool_call(self) -> None:
        """Gemini unittest: Single tool call."""
        with Construct() as construct:
            llm.simulate(
                Provider.GEMINI,
                responses=[["Summary ready", ToolCall(search, query="gemini test")]],
            )
            tool.simulate(search, result=["result"])

            gemini_search_agent.run("search gemini")

            tool.verify(search)
            construct.verify_llm(Provider.GEMINI)

    def test_with_string_name(self) -> None:
        """Gemini unittest: ToolCall with string name."""
        with Construct() as construct:
            llm.simulate(
                Provider.GEMINI,
                responses=[["Complete", ToolCall(name="search", arguments={"query": "info"})]],
            )
            tool.simulate(search, result=["info1"])

            gemini_search_agent.run("find info")

            tool.verify(search)
            self.assertEqual(construct.llm_calls[0].provider, "gemini")


# ============================================================================
# Simulating LLM Hallucinations and Invalid Tool Calls
# ============================================================================


@tenro
def test_llm_requests_nonexistent_tool() -> None:
    """Test agent handling of LLM requesting a tool that doesn't exist.

    This pattern is useful for testing defensive programming - ensuring your
    agent gracefully handles cases where the LLM hallucinates a tool name
    or requests a tool that isn't registered.
    """
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [
                "I'll use the magic_tool to help",
                ToolCall(name="nonexistent_magic_tool", arguments={"spell": "abracadabra"}),
            ]
        ],
    )

    defensive_agent.run("Do something magical")

    llm.verify(Provider.OPENAI)

    llm_call = llm.calls()[0]
    assert llm_call.response is not None


@tenro
def test_llm_requests_tool_with_invalid_args() -> None:
    """Test agent handling of LLM providing invalid arguments to a tool.

    Another defensive programming pattern - the LLM might call a real tool
    but with arguments that don't match the expected schema.
    """
    llm.simulate(
        Provider.OPENAI,
        responses=[
            [
                "Searching with limit",
                ToolCall(name="search", arguments={"query": "test", "limit": "not_a_number"}),
            ]
        ],
    )
    tool.simulate(search, result=["result"])

    openai_search_agent.run("Search with bad args")

    llm.verify(Provider.OPENAI)


# ============================================================================
# LLMResponse with Blocks (Interleaving)
# ============================================================================
# Text blocks represent the model's reasoning BEFORE/WHILE making tool call
# requests. The model hasn't seen tool results yet - those come in a later turn.
#
# KEY DISTINCTION:
#   responses=[A, B, C]           → 3 separate LLM calls (3 turns)
#   responses=[LLMResponse([A, B, C])]  → 1 LLM call with interleaved content


@tenro
def test_single_turn_vs_multiple_turns() -> None:
    """Outer list = number of LLM calls. LLMResponse = content within one call.

    This is the key distinction: responses=[A, B] means TWO LLM calls,
    while responses=[LLMResponse([A, B])] means ONE call with both items.
    """
    # TWO separate LLM calls
    llm.simulate(Provider.OPENAI, responses=["First", "Second"])
    multi_turn_agent.run("topic")
    llm.verify_many(Provider.OPENAI, count=2)


@tenro
def test_anthropic_interleaved_single_turn() -> None:
    """Anthropic: ONE call with interleaved text + tool calls.

    Anthropic preserves block order. Text blocks are the model's reasoning
    as it decides which tools to call - all in a single atomic response.
    """
    llm.simulate(
        Provider.ANTHROPIC,
        responses=[
            LLMResponse(
                [
                    "I'll search for info.",
                    ToolCall(search, query="quantum"),
                    "Also checking weather.",
                    ToolCall(get_weather, city="NYC"),
                ]
            )
        ],
    )
    tool.simulate(search, result=["quantum info"])
    tool.simulate(get_weather, result={"temp": 72})

    anthropic_multi_tool_agent.run("Research")

    llm.verify_many(Provider.ANTHROPIC, count=1)  # ONE call
    tool.verify_many(count=2)


@tenro
def test_gemini_interleaved_single_turn() -> None:
    """Gemini: ONE call with interleaved content. Block order preserved."""
    llm.simulate(
        Provider.GEMINI,
        responses=[LLMResponse(["Searching now.", ToolCall(search, query="AI trends")])],
    )
    tool.simulate(search, result=["trend1"])

    gemini_search_agent.run("Find trends")

    llm.verify_many(Provider.GEMINI, count=1)
    tool.verify(search)


@tenro
def test_openai_blocks_flattened() -> None:
    """OpenAI: Blocks are flattened (no interleaving support).

    OpenAI Chat API concatenates text blocks and extracts tool calls
    to a separate array. Order within the turn is lost, but it still works.
    """
    llm.simulate(
        Provider.OPENAI,
        responses=[LLMResponse(["First.", ToolCall(search, query="test"), "Second."])],
    )
    tool.simulate(search, result=["result"])

    openai_search_agent.run("Search")

    llm.verify(Provider.OPENAI)
    tool.verify(search)


@tenro
def test_llmresponse_tool_calls_only() -> None:
    """LLMResponse with tool calls only (no text)."""
    llm.simulate(
        Provider.ANTHROPIC,
        responses=[LLMResponse([ToolCall(search, query="silent")])],
    )
    tool.simulate(search, result=["found"])

    anthropic_search_agent.run("Quick search")

    llm.verify(Provider.ANTHROPIC)
    tool.verify(search)


@tenro
def test_list_shorthand_equals_llmresponse() -> None:
    """List shorthand is equivalent to LLMResponse(blocks=[...])."""
    llm.simulate(
        Provider.ANTHROPIC,
        responses=[
            ["Reasoning", ToolCall(search, query="test")],  # shorthand
        ],
    )
    tool.simulate(search, result=["result"])

    anthropic_search_agent.run("Test")

    llm.verify(Provider.ANTHROPIC)
    tool.verify(search)

Running examples

All patterns are runnable tests:

# Clone the repository
git clone https://github.com/tenro-ai/tenro-python
cd tenro-python

# Install dependencies
uv sync

# Run all pattern examples
uv run pytest examples/patterns/

# Run a specific pattern
uv run pytest examples/patterns/test_simulating_responses.py -v

See also