From a47ec2388446e842bf685d3390bc11d92e62e389 Mon Sep 17 00:00:00 2001 From: Jerry Liu <jerryjliu98@gmail.com> Date: Fri, 2 Feb 2024 10:00:41 -0800 Subject: [PATCH] fix ollama stream_chat, explicitly raise error in `CondenseQuestionQueryEngine` (#10407) * cr * cr --- llama_index/chat_engine/condense_question.py | 2 +- llama_index/chat_engine/types.py | 11 +++++++++-- llama_index/llms/ollama.py | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/llama_index/chat_engine/condense_question.py b/llama_index/chat_engine/condense_question.py index 560718955d..4c7de0355d 100644 --- a/llama_index/chat_engine/condense_question.py +++ b/llama_index/chat_engine/condense_question.py @@ -249,7 +249,7 @@ class CondenseQuestionChatEngine(BaseChatEngine): sources=[tool_output], ) thread = Thread( - target=response.write_response_to_history, args=(self._memory,) + target=response.write_response_to_history, args=(self._memory, True) ) thread.start() else: diff --git a/llama_index/chat_engine/types.py b/llama_index/chat_engine/types.py index 217fb65d7c..52848e1611 100644 --- a/llama_index/chat_engine/types.py +++ b/llama_index/chat_engine/types.py @@ -98,7 +98,9 @@ class StreamingAgentChatResponse: self._aqueue.put_nowait(delta) self._new_item_event.set() - def write_response_to_history(self, memory: BaseMemory) -> None: + def write_response_to_history( + self, memory: BaseMemory, raise_error: bool = False + ) -> None: if self.chat_stream is None: raise ValueError( "chat_stream is None. Cannot write to history without chat_stream." @@ -117,7 +119,12 @@ class StreamingAgentChatResponse: chat.message.content = final_text.strip() # final message memory.put(chat.message) except Exception as e: - logger.warning(f"Encountered exception writing response to history: {e}") + if not raise_error: + logger.warning( + f"Encountered exception writing response to history: {e}" + ) + else: + raise self._is_done = True diff --git a/llama_index/llms/ollama.py b/llama_index/llms/ollama.py index b0a638f774..93ff606358 100644 --- a/llama_index/llms/ollama.py +++ b/llama_index/llms/ollama.py @@ -148,6 +148,8 @@ class Ollama(CustomLLM): for line in response.iter_lines(): if line: chunk = json.loads(line) + if "done" in chunk and chunk["done"]: + break message = chunk["message"] delta = message.get("content") text += delta -- GitLab