From a47ec2388446e842bf685d3390bc11d92e62e389 Mon Sep 17 00:00:00 2001
From: Jerry Liu <jerryjliu98@gmail.com>
Date: Fri, 2 Feb 2024 10:00:41 -0800
Subject: [PATCH] fix ollama stream_chat, explicitly raise error in
 `CondenseQuestionQueryEngine` (#10407)

* cr

* cr
---
 llama_index/chat_engine/condense_question.py |  2 +-
 llama_index/chat_engine/types.py             | 11 +++++++++--
 llama_index/llms/ollama.py                   |  2 ++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/llama_index/chat_engine/condense_question.py b/llama_index/chat_engine/condense_question.py
index 560718955d..4c7de0355d 100644
--- a/llama_index/chat_engine/condense_question.py
+++ b/llama_index/chat_engine/condense_question.py
@@ -249,7 +249,7 @@ class CondenseQuestionChatEngine(BaseChatEngine):
                 sources=[tool_output],
             )
             thread = Thread(
-                target=response.write_response_to_history, args=(self._memory,)
+                target=response.write_response_to_history, args=(self._memory, True)
             )
             thread.start()
         else:
diff --git a/llama_index/chat_engine/types.py b/llama_index/chat_engine/types.py
index 217fb65d7c..52848e1611 100644
--- a/llama_index/chat_engine/types.py
+++ b/llama_index/chat_engine/types.py
@@ -98,7 +98,9 @@ class StreamingAgentChatResponse:
         self._aqueue.put_nowait(delta)
         self._new_item_event.set()
 
-    def write_response_to_history(self, memory: BaseMemory) -> None:
+    def write_response_to_history(
+        self, memory: BaseMemory, raise_error: bool = False
+    ) -> None:
         if self.chat_stream is None:
             raise ValueError(
                 "chat_stream is None. Cannot write to history without chat_stream."
@@ -117,7 +119,12 @@ class StreamingAgentChatResponse:
                 chat.message.content = final_text.strip()  # final message
                 memory.put(chat.message)
         except Exception as e:
-            logger.warning(f"Encountered exception writing response to history: {e}")
+            if not raise_error:
+                logger.warning(
+                    f"Encountered exception writing response to history: {e}"
+                )
+            else:
+                raise
 
         self._is_done = True
 
diff --git a/llama_index/llms/ollama.py b/llama_index/llms/ollama.py
index b0a638f774..93ff606358 100644
--- a/llama_index/llms/ollama.py
+++ b/llama_index/llms/ollama.py
@@ -148,6 +148,8 @@ class Ollama(CustomLLM):
                 for line in response.iter_lines():
                     if line:
                         chunk = json.loads(line)
+                        if "done" in chunk and chunk["done"]:
+                            break
                         message = chunk["message"]
                         delta = message.get("content")
                         text += delta
-- 
GitLab