From ef54ac6b7dcf9b91b2278a66d5699f1aa1eb70c1 Mon Sep 17 00:00:00 2001 From: Joel Gotsch <joel.gotsch@gmail.com> Date: Tue, 19 Mar 2024 02:00:48 +0100 Subject: [PATCH] Undelete openai agent tests (#12057) --- .../llama-index-agent-openai/pyproject.toml | 2 +- .../tests/test_openai_agent.py | 341 ++++++++++++++++++ 2 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 llama-index-integrations/agent/llama-index-agent-openai/tests/test_openai_agent.py diff --git a/llama-index-integrations/agent/llama-index-agent-openai/pyproject.toml b/llama-index-integrations/agent/llama-index-agent-openai/pyproject.toml index 24b1ee7799..a11750db87 100644 --- a/llama-index-integrations/agent/llama-index-agent-openai/pyproject.toml +++ b/llama-index-integrations/agent/llama-index-agent-openai/pyproject.toml @@ -28,7 +28,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-agent-openai" readme = "README.md" -version = "0.1.5" +version = "0.1.6" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/agent/llama-index-agent-openai/tests/test_openai_agent.py b/llama-index-integrations/agent/llama-index-agent-openai/tests/test_openai_agent.py new file mode 100644 index 0000000000..1e860584c9 --- /dev/null +++ b/llama-index-integrations/agent/llama-index-agent-openai/tests/test_openai_agent.py @@ -0,0 +1,341 @@ +from typing import Any, AsyncGenerator, Generator, List, Sequence +from unittest.mock import MagicMock, patch + +import pytest +from llama_index.agent.openai.base import OpenAIAgent +from llama_index.agent.openai.step import call_tool_with_error_handling +from llama_index.core.base.llms.types import ChatMessage, ChatResponse +from llama_index.core.chat_engine.types import ( + AgentChatResponse, + StreamingAgentChatResponse, +) +from llama_index.core.llms.mock import MockLLM +from llama_index.core.tools.function_tool import FunctionTool +from llama_index.llms.openai import OpenAI + +from openai.types.chat.chat_completion import ChatCompletion, Choice +from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, ChoiceDelta +from openai.types.chat.chat_completion_message import ChatCompletionMessage + + +def mock_chat_completion(*args: Any, **kwargs: Any) -> ChatCompletion: + if "functions" in kwargs: + if not kwargs["functions"]: + raise ValueError("functions must not be empty") + + # Example taken from https://platform.openai.com/docs/api-reference/chat/create + return ChatCompletion( + id="chatcmpl-abc123", + object="chat.completion", + created=1677858242, + model="gpt-3.5-turbo-0301", + usage={"prompt_tokens": 13, "completion_tokens": 7, "total_tokens": 20}, + choices=[ + Choice( + message=ChatCompletionMessage( + role="assistant", content="\n\nThis is a test!" + ), + finish_reason="stop", + index=0, + logprobs=None, + ) + ], + ) + + +def mock_chat_stream( + *args: Any, **kwargs: Any +) -> Generator[ChatCompletionChunk, None, None]: + if "functions" in kwargs: + if not kwargs["functions"]: + raise ValueError("functions must not be empty") + + yield ChatCompletionChunk( + id="chatcmpl-abc123", + object="chat.completion.chunk", + created=1677858242, + model="gpt-3.5-turbo-0301", + choices=[ + ChunkChoice( + delta=ChoiceDelta(role="assistant", content="\n\nThis is a test!"), + finish_reason="stop", + index=0, + logprobs=None, + ) + ], + ) + + +async def mock_achat_completion(*args: Any, **kwargs: Any) -> ChatCompletion: + return mock_chat_completion(*args, **kwargs) + + +async def mock_achat_stream( + *args: Any, **kwargs: Any +) -> AsyncGenerator[ChatCompletionChunk, None]: + async def _mock_achat_stream( + *args: Any, **kwargs: Any + ) -> AsyncGenerator[ChatCompletionChunk, None]: + if "functions" in kwargs: + if not kwargs["functions"]: + raise ValueError("functions must not be empty") + + yield ChatCompletionChunk( + id="chatcmpl-abc123", + object="chat.completion.chunk", + created=1677858242, + model="gpt-3.5-turbo-0301", + choices=[ + ChunkChoice( + delta=ChoiceDelta(role="assistant", content="\n\nThis is a test!"), + finish_reason="stop", + index=0, + logprobs=None, + ) + ], + ) + + return _mock_achat_stream(*args, **kwargs) + + +@pytest.fixture() +def add_tool() -> FunctionTool: + def add(a: int, b: int) -> int: + """Add two integers and returns the result integer.""" + return a + b + + return FunctionTool.from_defaults(fn=add) + + +class MockChatLLM(MockLLM): + def __init__(self, responses: List[ChatMessage]) -> None: + self._i = 0 # call counter, determines which response to return + self._responses = responses # list of responses to return + + def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: + del messages # unused + response = ChatResponse( + message=self._responses[self._i], + ) + self._i += 1 + return response + + +MOCK_ACTION_RESPONSE = """\ +Thought: I need to use a tool to help me answer the question. +Action: add +Action Input: {"a": 1, "b": 1} +""" + +MOCK_FINAL_RESPONSE = """\ +Thought: I have enough information to answer the question without using any more tools. +Answer: 2 +""" + + +@patch("llama_index.llms.openai.base.SyncOpenAI") +def test_chat_basic(MockSyncOpenAI: MagicMock, add_tool: FunctionTool) -> None: + mock_instance = MockSyncOpenAI.return_value + mock_instance.chat.completions.create.return_value = mock_chat_completion() + + llm = OpenAI(model="gpt-3.5-turbo") + + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + response = agent.chat("What is 1 + 1?") + assert isinstance(response, AgentChatResponse) + assert response.response == "\n\nThis is a test!" + assert len(agent.chat_history) == 2 + assert agent.chat_history[0].content == "What is 1 + 1?" + assert agent.chat_history[1].content == "\n\nThis is a test!" + + +@patch("llama_index.llms.openai.base.AsyncOpenAI") +@pytest.mark.asyncio() +async def test_achat_basic(MockAsyncOpenAI: MagicMock, add_tool: FunctionTool) -> None: + mock_instance = MockAsyncOpenAI.return_value + mock_instance.chat.completions.create.return_value = mock_achat_completion() + + llm = OpenAI(model="gpt-3.5-turbo") + + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + response = await agent.achat("What is 1 + 1?") + assert isinstance(response, AgentChatResponse) + assert response.response == "\n\nThis is a test!" + assert len(agent.chat_history) == 2 + assert agent.chat_history[0].content == "What is 1 + 1?" + assert agent.chat_history[1].content == "\n\nThis is a test!" + + +@patch("llama_index.llms.openai.base.SyncOpenAI") +def test_stream_chat_basic(MockSyncOpenAI: MagicMock, add_tool: FunctionTool) -> None: + mock_instance = MockSyncOpenAI.return_value + mock_instance.chat.completions.create.side_effect = mock_chat_stream + + llm = OpenAI(model="gpt-3.5-turbo") + + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + response = agent.stream_chat("What is 1 + 1?") + assert isinstance(response, StreamingAgentChatResponse) + # str() strips newline values + assert str(response) == "This is a test!" + assert len(agent.chat_history) == 2 + assert agent.chat_history[0].content == "What is 1 + 1?" + assert agent.chat_history[1].content == "This is a test!" + + +@patch("llama_index.llms.openai.base.AsyncOpenAI") +@pytest.mark.asyncio() +async def test_astream_chat_basic( + MockAsyncOpenAI: MagicMock, add_tool: FunctionTool +) -> None: + mock_instance = MockAsyncOpenAI.return_value + mock_instance.chat.completions.create.side_effect = mock_achat_stream + + llm = OpenAI(model="gpt-3.5-turbo") + + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + response_stream = await agent.astream_chat("What is 1 + 1?") + async for response in response_stream.async_response_gen(): + pass + assert isinstance(response_stream, StreamingAgentChatResponse) + # str() strips newline values + assert response == "\n\nThis is a test!" + assert len(agent.chat_history) == 2 + assert agent.chat_history[0].content == "What is 1 + 1?" + assert agent.chat_history[1].content == "This is a test!" + + +@patch("llama_index.llms.openai.base.SyncOpenAI") +def test_chat_no_functions(MockSyncOpenAI: MagicMock) -> None: + mock_instance = MockSyncOpenAI.return_value + mock_instance.chat.completions.create.return_value = mock_chat_completion() + + llm = OpenAI(model="gpt-3.5-turbo") + + agent = OpenAIAgent.from_tools( + llm=llm, + ) + response = agent.chat("What is 1 + 1?") + assert isinstance(response, AgentChatResponse) + assert response.response == "\n\nThis is a test!" + + +def test_call_tool_with_error_handling() -> None: + """Test call tool with error handling.""" + + def _add(a: int, b: int) -> int: + return a + b + + tool = FunctionTool.from_defaults(fn=_add) + + output = call_tool_with_error_handling( + tool, {"a": 1, "b": 1}, error_message="Error!" + ) + assert output.content == "2" + + # try error + output = call_tool_with_error_handling( + tool, {"a": "1", "b": 1}, error_message="Error!" + ) + assert output.content == "Error!" + + +@patch("llama_index.llms.openai.base.SyncOpenAI") +def test_add_step( + MockSyncOpenAI: MagicMock, + add_tool: FunctionTool, +) -> None: + """Test add step.""" + mock_instance = MockSyncOpenAI.return_value + mock_instance.chat.completions.create.return_value = mock_chat_completion() + + llm = OpenAI(model="gpt-3.5-turbo") + # sync + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + ## NOTE: can only take a single step before finishing, + # since mocked chat output does not call any tools + task = agent.create_task("What is 1 + 1?") + step_output = agent.run_step(task.task_id) + assert str(step_output) == "\n\nThis is a test!" + + # add human input (not used but should be in memory) + task = agent.create_task("What is 1 + 1?") + step_output = agent.run_step(task.task_id, input="tmp") + chat_history: List[ChatMessage] = task.extra_state["new_memory"].get_all() + assert "tmp" in [m.content for m in chat_history] + + # # stream_step + # agent = OpenAIAgent.from_tools( + # tools=[add_tool], + # llm=llm, + # ) + # task = agent.create_task("What is 1 + 1?") + # # first step + # step_output = agent.stream_step(task.task_id) + # # add human input (not used but should be in memory) + # step_output = agent.stream_step(task.task_id, input="tmp") + # chat_history: List[ChatMessage] = task.extra_state["new_memory"].get_all() + # assert "tmp" in [m.content for m in chat_history] + + +@patch("llama_index.llms.openai.base.AsyncOpenAI") +@pytest.mark.asyncio() +async def test_async_add_step( + MockAsyncOpenAI: MagicMock, + add_tool: FunctionTool, +) -> None: + mock_instance = MockAsyncOpenAI.return_value + + llm = OpenAI(model="gpt-3.5-turbo") + # async + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + task = agent.create_task("What is 1 + 1?") + # first step + mock_instance.chat.completions.create.return_value = mock_achat_completion() + step_output = await agent.arun_step(task.task_id) + # add human input (not used but should be in memory) + task = agent.create_task("What is 1 + 1?") + mock_instance.chat.completions.create.return_value = mock_achat_completion() + step_output = await agent.arun_step(task.task_id, input="tmp") + chat_history: List[ChatMessage] = task.extra_state["new_memory"].get_all() + assert "tmp" in [m.content for m in chat_history] + + # async stream step + agent = OpenAIAgent.from_tools( + tools=[add_tool], + llm=llm, + ) + task = agent.create_task("What is 1 + 1?") + # first step + mock_instance.chat.completions.create.side_effect = mock_achat_stream + step_output = await agent.astream_step(task.task_id) + # add human input (not used but should be in memory) + task = agent.create_task("What is 1 + 1?") + mock_instance.chat.completions.create.side_effect = mock_achat_stream + + # stream the output to ensure it gets written to memory + step_output = await agent.astream_step(task.task_id, input="tmp") + async for _ in step_output.output.async_response_gen(): + pass + + chat_history = task.memory.get_all() + assert "tmp" in [m.content for m in chat_history] -- GitLab