diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index ff40489d94..62cf026dd0 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -311,16 +311,22 @@ def _invoke_loop(self) -> AgentFinish: Final answer from the agent. """ # Check if model supports native function calling - use_native_tools = ( + supports_fc = ( hasattr(self.llm, "supports_function_calling") and callable(getattr(self.llm, "supports_function_calling", None)) and self.llm.supports_function_calling() - and self.original_tools ) - if use_native_tools: + if supports_fc and self.original_tools: return self._invoke_loop_native_tools() + # FC-capable LLM with no user-defined tools but with response_model + # and no internal tools (delegation, human input, etc.): use simple + # native call path which correctly passes response_model for structured + # output instead of dropping it in the ReAct path. + if supports_fc and not self.tools and self.response_model: + return self._invoke_loop_native_no_tools() + # Fall back to ReAct text-based pattern return self._invoke_loop_react() @@ -351,6 +357,13 @@ def _invoke_loop_react(self) -> AgentFinish: enforce_rpm_limit(self.request_within_rpm_limit) + # In the ReAct flow, do NOT pass response_model to the LLM call. + # When the LLM doesn't support function calling, passing response_model + # forces structured output (via instructor/tools mode) before the agent + # can reason through the Action/Observation loop. The output schema is + # already embedded in the prompt text for guidance, and the final + # conversion to pydantic/json happens in task._export_output(). + # See: https://github.com/crewAIInc/crewAI/issues/4695 answer = get_llm_response( llm=self.llm, messages=self.messages, @@ -358,43 +371,16 @@ def _invoke_loop_react(self) -> AgentFinish: printer=self._printer, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=None, executor_context=self, verbose=self.agent.verbose, ) - # breakpoint() - if self.response_model is not None: - try: - if isinstance(answer, BaseModel): - output_json = answer.model_dump_json() - formatted_answer = AgentFinish( - thought="", - output=answer, - text=output_json, - ) - else: - self.response_model.model_validate_json(answer) - formatted_answer = AgentFinish( - thought="", - output=answer, - text=answer, - ) - except ValidationError: - # If validation fails, convert BaseModel to JSON string for parsing - answer_str = ( - answer.model_dump_json() - if isinstance(answer, BaseModel) - else str(answer) - ) - formatted_answer = process_llm_response( - answer_str, self.use_stop_words - ) # type: ignore[assignment] - else: - # When no response_model, answer should be a string - answer_str = str(answer) if not isinstance(answer, str) else answer - formatted_answer = process_llm_response( - answer_str, self.use_stop_words - ) # type: ignore[assignment] + + # When no response_model is passed, answer should be a string + answer_str = str(answer) if not isinstance(answer, str) else answer + formatted_answer = process_llm_response( + answer_str, self.use_stop_words + ) # type: ignore[assignment] if isinstance(formatted_answer, AgentAction): # Extract agent fingerprint if available @@ -1152,16 +1138,22 @@ async def _ainvoke_loop(self) -> AgentFinish: Final answer from the agent. """ # Check if model supports native function calling - use_native_tools = ( + supports_fc = ( hasattr(self.llm, "supports_function_calling") and callable(getattr(self.llm, "supports_function_calling", None)) and self.llm.supports_function_calling() - and self.original_tools ) - if use_native_tools: + if supports_fc and self.original_tools: return await self._ainvoke_loop_native_tools() + # FC-capable LLM with no user-defined tools but with response_model + # and no internal tools (delegation, human input, etc.): use simple + # native call path which correctly passes response_model for structured + # output instead of dropping it in the ReAct path. + if supports_fc and not self.tools and self.response_model: + return await self._ainvoke_loop_native_no_tools() + # Fall back to ReAct text-based pattern return await self._ainvoke_loop_react() @@ -1188,6 +1180,13 @@ async def _ainvoke_loop_react(self) -> AgentFinish: enforce_rpm_limit(self.request_within_rpm_limit) + # In the ReAct flow, do NOT pass response_model to the LLM call. + # When the LLM doesn't support function calling, passing response_model + # forces structured output (via instructor/tools mode) before the agent + # can reason through the Action/Observation loop. The output schema is + # already embedded in the prompt text for guidance, and the final + # conversion to pydantic/json happens in task._export_output(). + # See: https://github.com/crewAIInc/crewAI/issues/4695 answer = await aget_llm_response( llm=self.llm, messages=self.messages, @@ -1195,43 +1194,16 @@ async def _ainvoke_loop_react(self) -> AgentFinish: printer=self._printer, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=None, executor_context=self, verbose=self.agent.verbose, ) - if self.response_model is not None: - try: - if isinstance(answer, BaseModel): - output_json = answer.model_dump_json() - formatted_answer = AgentFinish( - thought="", - output=answer, - text=output_json, - ) - else: - self.response_model.model_validate_json(answer) - formatted_answer = AgentFinish( - thought="", - output=answer, - text=answer, - ) - except ValidationError: - # If validation fails, convert BaseModel to JSON string for parsing - answer_str = ( - answer.model_dump_json() - if isinstance(answer, BaseModel) - else str(answer) - ) - formatted_answer = process_llm_response( - answer_str, self.use_stop_words - ) # type: ignore[assignment] - else: - # When no response_model, answer should be a string - answer_str = str(answer) if not isinstance(answer, str) else answer - formatted_answer = process_llm_response( - answer_str, self.use_stop_words - ) # type: ignore[assignment] + # When no response_model is passed, answer should be a string + answer_str = str(answer) if not isinstance(answer, str) else answer + formatted_answer = process_llm_response( + answer_str, self.use_stop_words + ) # type: ignore[assignment] if isinstance(formatted_answer, AgentAction): fingerprint_context = {} diff --git a/lib/crewai/tests/agents/test_react_output_pydantic.py b/lib/crewai/tests/agents/test_react_output_pydantic.py new file mode 100644 index 0000000000..bb70102664 --- /dev/null +++ b/lib/crewai/tests/agents/test_react_output_pydantic.py @@ -0,0 +1,510 @@ +"""Tests for output_pydantic behavior in ReAct flow when LLM doesn't support function calling. + +Regression tests for https://github.com/crewAIInc/crewAI/issues/4695 + +When an LLM does NOT support function calling (supports_function_calling() returns False), +the executor should use the ReAct text-based pattern. In this path, response_model should +NOT be passed to the LLM call, because doing so forces structured output (via instructor/ +tools mode) before the agent can reason through the Action/Observation loop. + +The schema should still be embedded in the prompt text for guidance, and the final +conversion to pydantic/json should happen in task._export_output() after the ReAct loop. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import BaseModel, Field + +from crewai.agents.crew_agent_executor import CrewAgentExecutor +from crewai.agents.parser import AgentFinish + + +# --------------------------------------------------------------------------- +# Pydantic models used as output_pydantic in tests +# --------------------------------------------------------------------------- + + +class PersonInfo(BaseModel): + """A simple pydantic model for testing output_pydantic.""" + + name: str = Field(description="Person's name") + age: int = Field(description="Person's age") + + +class WeatherReport(BaseModel): + """Another pydantic model for testing output_pydantic.""" + + city: str = Field(description="City name") + temperature: float = Field(description="Temperature in Fahrenheit") + condition: str = Field(description="Weather condition") + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +def _make_llm(*, supports_fc: bool) -> MagicMock: + """Create a mock LLM with configurable function-calling support.""" + llm = MagicMock() + llm.supports_function_calling.return_value = supports_fc + llm.supports_stop_words.return_value = True + llm.stop = [] + return llm + + +def _make_executor( + llm: MagicMock, + *, + response_model: type[BaseModel] | None = None, +) -> CrewAgentExecutor: + """Create a CrewAgentExecutor with the given LLM and response_model.""" + agent = MagicMock() + agent.role = "Test Agent" + agent.key = "test_agent_key" + agent.verbose = False + agent.id = "test_agent_id" + + task = MagicMock() + task.description = "Test task" + + crew = MagicMock() + crew.verbose = False + crew._train = False + + executor = CrewAgentExecutor( + llm=llm, + task=task, + crew=crew, + agent=agent, + prompt={"prompt": "Test prompt {input} {tool_names} {tools}"}, + max_iter=5, + tools=[], + tools_names="", + stop_words=["Observation:"], + tools_description="", + tools_handler=MagicMock(), + response_model=response_model, + ) + return executor + + +# =========================================================================== +# Sync tests +# =========================================================================== + + +class TestReActFlowDoesNotPassResponseModel: + """Verify that _invoke_loop_react does NOT pass response_model to LLM.""" + + def test_react_flow_passes_none_response_model_when_output_pydantic_set( + self, + ) -> None: + """When output_pydantic is set but LLM lacks function calling, + response_model must be None in the get_llm_response call.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=PersonInfo) + + with patch( + "crewai.agents.crew_agent_executor.get_llm_response", + return_value="Thought: I know the answer\nFinal Answer: John is 30 years old", + ) as mock_get_llm: + with patch.object(executor, "_show_logs"): + result = executor._invoke_loop() + + # The critical assertion: response_model must be None in ReAct flow + call_kwargs = mock_get_llm.call_args + assert call_kwargs.kwargs.get("response_model") is None, ( + "response_model should be None in ReAct flow, but got " + f"{call_kwargs.kwargs.get('response_model')}" + ) + assert isinstance(result, AgentFinish) + + def test_react_flow_does_not_use_instructor_for_non_fc_llm(self) -> None: + """Ensure InternalInstructor is never invoked in the ReAct path.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=WeatherReport) + + with patch( + "crewai.agents.crew_agent_executor.get_llm_response", + return_value="Thought: I found the weather\nFinal Answer: It is sunny in NYC at 72F", + ): + with patch.object(executor, "_show_logs"): + with patch( + "crewai.utilities.internal_instructor.InternalInstructor" + ) as mock_instructor: + executor._invoke_loop() + + mock_instructor.assert_not_called() + + def test_invoke_loop_routes_to_react_when_no_function_calling(self) -> None: + """Confirm _invoke_loop routes to _invoke_loop_react when + supports_function_calling() returns False.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=PersonInfo) + + with patch.object( + executor, + "_invoke_loop_react", + return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"), + ) as mock_react: + with patch.object(executor, "_invoke_loop_native_tools") as mock_native: + executor._invoke_loop() + + mock_react.assert_called_once() + mock_native.assert_not_called() + + def test_invoke_loop_routes_to_native_when_function_calling_supported( + self, + ) -> None: + """Confirm _invoke_loop routes to _invoke_loop_native_tools when + supports_function_calling() returns True AND tools are present.""" + llm = _make_llm(supports_fc=True) + executor = _make_executor(llm, response_model=PersonInfo) + # Need at least one tool for native path + executor.original_tools = [MagicMock()] + + with patch.object( + executor, + "_invoke_loop_native_tools", + return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"), + ) as mock_native: + with patch.object(executor, "_invoke_loop_react") as mock_react: + executor._invoke_loop() + + mock_native.assert_called_once() + mock_react.assert_not_called() + + def test_invoke_loop_routes_to_native_no_tools_when_fc_no_tools_with_response_model( + self, + ) -> None: + """When LLM supports FC, has no tools (including internal tools), + but HAS a response_model, route to _invoke_loop_native_no_tools + (which correctly passes response_model for structured output).""" + llm = _make_llm(supports_fc=True) + executor = _make_executor(llm, response_model=PersonInfo) + # No user-defined or internal tools + executor.original_tools = [] + executor.tools = [] + + with patch.object( + executor, + "_invoke_loop_native_no_tools", + return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"), + ) as mock_native_no_tools: + with patch.object(executor, "_invoke_loop_react") as mock_react: + with patch.object(executor, "_invoke_loop_native_tools") as mock_native: + executor._invoke_loop() + + mock_native_no_tools.assert_called_once() + mock_react.assert_not_called() + mock_native.assert_not_called() + + def test_invoke_loop_routes_to_react_when_fc_no_orig_tools_but_internal_tools( + self, + ) -> None: + """When LLM supports FC, has no original_tools but HAS internal tools + (e.g. delegation), fall through to ReAct even with response_model. + Internal tools need the ReAct loop for Action/Observation cycles.""" + llm = _make_llm(supports_fc=True) + executor = _make_executor(llm, response_model=PersonInfo) + executor.original_tools = [] + # Internal tools present (e.g. delegation tool) + executor.tools = [MagicMock()] + + with patch.object( + executor, + "_invoke_loop_react", + return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"), + ) as mock_react: + with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools: + with patch.object(executor, "_invoke_loop_native_tools") as mock_native: + executor._invoke_loop() + + mock_react.assert_called_once() + mock_native_no_tools.assert_not_called() + mock_native.assert_not_called() + + def test_invoke_loop_routes_to_react_when_fc_no_tools_no_response_model( + self, + ) -> None: + """When LLM supports FC, has no tools, and NO response_model, + fall through to ReAct path (no structured output to preserve).""" + llm = _make_llm(supports_fc=True) + executor = _make_executor(llm, response_model=None) + executor.original_tools = [] + executor.tools = [] + + with patch.object( + executor, + "_invoke_loop_react", + return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"), + ) as mock_react: + with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools: + with patch.object(executor, "_invoke_loop_native_tools") as mock_native: + executor._invoke_loop() + + mock_react.assert_called_once() + mock_native_no_tools.assert_not_called() + mock_native.assert_not_called() + + def test_react_flow_still_works_with_tool_usage(self) -> None: + """Verify the ReAct loop still processes Action/Observation cycles + correctly even when output_pydantic is set.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=PersonInfo) + + call_count = 0 + + def mock_llm_response(*args: Any, **kwargs: Any) -> str: + nonlocal call_count + call_count += 1 + # Verify response_model is None on every call + assert kwargs.get("response_model") is None, ( + f"response_model should be None in ReAct flow (call {call_count})" + ) + if call_count == 1: + return ( + "Thought: I need to search for the person\n" + "Action: search_tool\n" + 'Action Input: {"query": "John Doe"}' + ) + return ( + "Thought: I found the person info\n" + "Final Answer: John Doe is 30 years old" + ) + + from crewai.tools.tool_types import ToolResult + + with patch( + "crewai.agents.crew_agent_executor.get_llm_response", + side_effect=mock_llm_response, + ): + with patch( + "crewai.agents.crew_agent_executor.execute_tool_and_check_finality", + return_value=ToolResult(result="John Doe, age 30", result_as_answer=False), + ): + with patch.object(executor, "_show_logs"): + with patch.object(executor, "_handle_agent_action") as mock_handle: + from crewai.agents.parser import AgentAction + + mock_handle.return_value = AgentAction( + text="Tool result", + tool="search_tool", + tool_input='{"query": "John Doe"}', + thought="Used tool", + result="John Doe, age 30", + ) + result = executor._invoke_loop() + + assert isinstance(result, AgentFinish) + assert call_count == 2, f"Expected 2 LLM calls, got {call_count}" + + def test_react_flow_without_response_model_unchanged(self) -> None: + """Verify the ReAct flow still works normally when no response_model is set.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=None) + + with patch( + "crewai.agents.crew_agent_executor.get_llm_response", + return_value="Thought: Simple answer\nFinal Answer: Hello world", + ) as mock_get_llm: + with patch.object(executor, "_show_logs"): + result = executor._invoke_loop() + + call_kwargs = mock_get_llm.call_args + assert call_kwargs.kwargs.get("response_model") is None + assert isinstance(result, AgentFinish) + + +# =========================================================================== +# Async tests +# =========================================================================== + + +class TestAsyncReActFlowDoesNotPassResponseModel: + """Verify that _ainvoke_loop_react does NOT pass response_model to LLM.""" + + @pytest.mark.asyncio + async def test_async_react_flow_passes_none_response_model(self) -> None: + """Async variant: response_model must be None in ReAct flow.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=PersonInfo) + + with patch( + "crewai.agents.crew_agent_executor.aget_llm_response", + new_callable=AsyncMock, + return_value="Thought: I know\nFinal Answer: John is 30", + ) as mock_aget_llm: + with patch.object(executor, "_show_logs"): + result = await executor._ainvoke_loop() + + call_kwargs = mock_aget_llm.call_args + assert call_kwargs.kwargs.get("response_model") is None, ( + "response_model should be None in async ReAct flow" + ) + assert isinstance(result, AgentFinish) + + @pytest.mark.asyncio + async def test_async_invoke_loop_routes_to_react_when_no_fc(self) -> None: + """Async: _ainvoke_loop routes to _ainvoke_loop_react when + supports_function_calling() returns False.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=WeatherReport) + + with patch.object( + executor, + "_ainvoke_loop_react", + new_callable=AsyncMock, + return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"), + ) as mock_react: + with patch.object(executor, "_ainvoke_loop_native_tools") as mock_native: + await executor._ainvoke_loop() + + mock_react.assert_called_once() + mock_native.assert_not_called() + + @pytest.mark.asyncio + async def test_async_react_flow_with_tool_usage(self) -> None: + """Async: ReAct loop processes tool calls correctly with output_pydantic.""" + llm = _make_llm(supports_fc=False) + executor = _make_executor(llm, response_model=PersonInfo) + + call_count = 0 + + async def mock_llm_response(*args: Any, **kwargs: Any) -> str: + nonlocal call_count + call_count += 1 + assert kwargs.get("response_model") is None + if call_count == 1: + return ( + "Thought: I need to search\n" + "Action: search_tool\n" + 'Action Input: {"query": "test"}' + ) + return "Thought: Done\nFinal Answer: Result found" + + from crewai.tools.tool_types import ToolResult + + with patch( + "crewai.agents.crew_agent_executor.aget_llm_response", + new_callable=AsyncMock, + side_effect=mock_llm_response, + ): + with patch( + "crewai.agents.crew_agent_executor.aexecute_tool_and_check_finality", + new_callable=AsyncMock, + return_value=ToolResult(result="Found it", result_as_answer=False), + ): + with patch.object(executor, "_show_logs"): + with patch.object(executor, "_handle_agent_action") as mock_handle: + from crewai.agents.parser import AgentAction + + mock_handle.return_value = AgentAction( + text="Tool result", + tool="search_tool", + tool_input='{"query": "test"}', + thought="Searching", + result="Found it", + ) + result = await executor._ainvoke_loop() + + assert isinstance(result, AgentFinish) + assert call_count == 2 + + +# =========================================================================== +# Integration-style tests (Crew-level) +# =========================================================================== + + +class TestCrewLevelOutputPydanticWithNonFCModel: + """Higher-level tests verifying that a Crew with output_pydantic works + correctly when the LLM doesn't support function calling.""" + + def test_crew_output_pydantic_with_non_fc_llm_uses_react(self) -> None: + """A Crew with output_pydantic should still use ReAct flow and NOT + pass response_model to the LLM when it doesn't support FC.""" + from crewai import Agent, Crew, Task + + llm = MagicMock() + llm.supports_function_calling.return_value = False + llm.supports_stop_words.return_value = True + llm.stop = [] + llm.model = "ollama/llama3" + # Return a valid ReAct final answer + llm.call.return_value = ( + "Thought: I know the answer\n" + 'Final Answer: {"name": "John Doe", "age": 30}' + ) + + # Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM + with patch("crewai.agent.core.create_llm", return_value=llm): + agent = Agent( + role="Researcher", + goal="Find person info", + backstory="You research people.", + llm=llm, + verbose=False, + ) + + task = Task( + description="Find info about John Doe", + expected_output="Person info as JSON", + agent=agent, + output_pydantic=PersonInfo, + ) + + crew = Crew(agents=[agent], tasks=[task], verbose=False) + result = crew.kickoff() + + # Verify llm.call was invoked + assert llm.call.called + + # Verify response_model was NOT passed to llm.call + for call_args in llm.call.call_args_list: + rm = call_args.kwargs.get("response_model") + assert rm is None, ( + f"response_model should be None for non-FC LLM, got {rm}" + ) + + assert result is not None + + def test_crew_output_pydantic_with_fc_llm_uses_native_tools(self) -> None: + """A Crew with output_pydantic and an FC-capable LLM should use + native tools flow and CAN pass response_model.""" + from crewai import Agent, Crew, Task + + llm = MagicMock() + llm.supports_function_calling.return_value = True + llm.supports_stop_words.return_value = True + llm.stop = [] + llm.model = "gpt-4o-mini" + # Return a valid final answer (no tool calls) + llm.call.return_value = '{"name": "Jane Doe", "age": 25}' + + # Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM + with patch("crewai.agent.core.create_llm", return_value=llm): + agent = Agent( + role="Researcher", + goal="Find person info", + backstory="You research people.", + llm=llm, + verbose=False, + ) + + task = Task( + description="Find info about Jane Doe", + expected_output="Person info as JSON", + agent=agent, + output_pydantic=PersonInfo, + ) + + crew = Crew(agents=[agent], tasks=[task], verbose=False) + result = crew.kickoff() + + assert result is not None