From 58156133359456aa42bb5dfba55c944bd06e6fcc Mon Sep 17 00:00:00 2001
From: Logan <logan.markewich@live.com>
Date: Fri, 28 Feb 2025 20:32:58 -0600
Subject: [PATCH] v0.12.22 (#17969)

---
 CHANGELOG.md                                  | 14 +++++++++
 docs/docs/CHANGELOG.md                        | 16 +++++++++-
 llama-index-core/llama_index/core/__init__.py |  2 +-
 llama-index-core/pyproject.toml               |  2 +-
 .../llama_index/llms/huggingface_api/base.py  | 13 +++++----
 .../pyproject.toml                            |  2 +-
 .../tests/test_huggingface_api.py             | 29 ++++++++++++-------
 poetry.lock                                   |  8 ++---
 pyproject.toml                                |  4 +--
 9 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d721b1040..952e20e0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # ChangeLog
 
+## [2025-02-28]
+
+### `llama-index-core` [0.12.22]
+
+- fix agentworkflow tool call tracking on final response (#17968)
+
+### `llama-index-readers-github` [0.6.0]
+
+- Ensure that Github reader uses timeout and retries params (#17959)
+
+### `llama-index-readers-web` [0.3.7]
+
+- chore: update FireCrawlWebReader integration to support extract (#17957)
+
 ## [2025-02-27]
 
 ### `llama-index-core` [0.12.21]
diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md
index af427b549..952e20e0d 100644
--- a/docs/docs/CHANGELOG.md
+++ b/docs/docs/CHANGELOG.md
@@ -1,5 +1,19 @@
 # ChangeLog
 
+## [2025-02-28]
+
+### `llama-index-core` [0.12.22]
+
+- fix agentworkflow tool call tracking on final response (#17968)
+
+### `llama-index-readers-github` [0.6.0]
+
+- Ensure that Github reader uses timeout and retries params (#17959)
+
+### `llama-index-readers-web` [0.3.7]
+
+- chore: update FireCrawlWebReader integration to support extract (#17957)
+
 ## [2025-02-27]
 
 ### `llama-index-core` [0.12.21]
@@ -10,7 +24,7 @@
 - Feature/remove retriever tool template override (#17909)
 - only modify delta if 'Answer:' was actually detected (#17901)
 - Fix CitationQueryEngine init function for response_synthesizer (#17897)
-- fix ChatSummaryMemoryBuffer._summarize_oldest_chat_history (#17845)
+- fix ChatSummaryMemoryBuffer.\_summarize_oldest_chat_history (#17845)
 - fix: make base64 detection more robust across the board (#17930)
 - fix: stepwise execution breaks when steps do async work (#17914)
 - safer workflow cancel + fix restored context bug (#17938)
diff --git a/llama-index-core/llama_index/core/__init__.py b/llama-index-core/llama_index/core/__init__.py
index 7cc9d89b5..31e99d656 100644
--- a/llama-index-core/llama_index/core/__init__.py
+++ b/llama-index-core/llama_index/core/__init__.py
@@ -1,6 +1,6 @@
 """Init file of LlamaIndex."""
 
-__version__ = "0.12.21"
+__version__ = "0.12.22"
 
 import logging
 from logging import NullHandler
diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml
index 4bb4aa407..140b51f7a 100644
--- a/llama-index-core/pyproject.toml
+++ b/llama-index-core/pyproject.toml
@@ -46,7 +46,7 @@ name = "llama-index-core"
 packages = [{include = "llama_index"}]
 readme = "README.md"
 repository = "https://github.com/run-llama/llama_index"
-version = "0.12.21"
+version = "0.12.22"
 
 [tool.poetry.dependencies]
 SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py b/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py
index a20e1bf39..4b41e98a6 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py
@@ -173,10 +173,13 @@ class HuggingFaceInferenceAPI(FunctionCallingLLM):
         self._sync_client = InferenceClient(**self._get_inference_client_kwargs())
         self._async_client = AsyncInferenceClient(**self._get_inference_client_kwargs())
 
-        # set context window if not provided
-        info = self._sync_client.get_endpoint_info()
-        if "max_input_tokens" in info and kwargs.get("context_window") is None:
-            self.context_window = info["max_input_tokens"]
+        # set context window if not provided, if we can get the endpoint info
+        try:
+            info = self._sync_client.get_endpoint_info()
+            if "max_input_tokens" in info and kwargs.get("context_window") is None:
+                self.context_window = info["max_input_tokens"]
+        except Exception:
+            pass
 
     def _get_inference_client_kwargs(self) -> Dict[str, Any]:
         """Extract the Hugging Face InferenceClient construction parameters."""
@@ -224,7 +227,7 @@ class HuggingFaceInferenceAPI(FunctionCallingLLM):
 
     def _parse_streaming_tool_calls(
         self, tool_call_strs: List[str]
-    ) -> List[ToolSelection | str]:
+    ) -> List[Union[ToolSelection, str]]:
         tool_calls = []
         # Try to parse into complete objects, otherwise keep as strings
         for tool_call_str in tool_call_strs:
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml
index fcd5d95b5..db69e25e5 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-huggingface-api"
 readme = "README.md"
-version = "0.4.0"
+version = "0.4.1"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py b/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py
index 0dfd2f53f..b765d7d67 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py
@@ -3,8 +3,9 @@ from unittest.mock import MagicMock, patch
 import pytest
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from huggingface_hub.inference._generated.types import ChatCompletionOutput
 
-STUB_MODEL_NAME = "placeholder_model"
+STUB_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
 
 
 @pytest.fixture(name="hf_inference_api")
@@ -45,15 +46,17 @@ class TestHuggingFaceInferenceAPI:
         generated_response = (
             " It's based on the book of the same name by James Fenimore Cooper."
         )
-        conversational_return = {
-            "choices": [
-                {
-                    "message": {
-                        "content": generated_response,
+        conversational_return = ChatCompletionOutput.parse_obj(
+            {
+                "choices": [
+                    {
+                        "message": {
+                            "content": generated_response,
+                        }
                     }
-                }
-            ],
-        }
+                ],
+            }
+        )
 
         with patch.object(
             hf_inference_api._sync_client,
@@ -67,6 +70,8 @@ class TestHuggingFaceInferenceAPI:
         mock_conversational.assert_called_once_with(
             messages=[{"role": m.role.value, "content": m.content} for m in messages],
             model=STUB_MODEL_NAME,
+            temperature=0.1,
+            max_tokens=256,
         )
 
     def test_chat_text_generation(
@@ -97,6 +102,8 @@ class TestHuggingFaceInferenceAPI:
         assert response.message.content == conversational_return
         mock_complete.assert_called_once_with(
             "System: You are an expert movie reviewer\nUser: Which movie is the best?\nAssistant:",
+            model=STUB_MODEL_NAME,
+            temperature=0.1,
             max_new_tokens=256,
         )
 
@@ -109,5 +116,7 @@ class TestHuggingFaceInferenceAPI:
             return_value=generated_text,
         ) as mock_text_generation:
             response = hf_inference_api.complete(prompt)
-        mock_text_generation.assert_called_once_with(prompt, max_new_tokens=256)
+        mock_text_generation.assert_called_once_with(
+            prompt, model=STUB_MODEL_NAME, temperature=0.1, max_new_tokens=256
+        )
         assert response.text == generated_text
diff --git a/poetry.lock b/poetry.lock
index 443fc4d34..5844ab8d5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1695,13 +1695,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0"
 
 [[package]]
 name = "llama-index-core"
-version = "0.12.21"
+version = "0.12.22"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_core-0.12.21-py3-none-any.whl", hash = "sha256:8583c781263a883f91c5575d533a5c3c1c27f923ee8913741e1598052370495a"},
-    {file = "llama_index_core-0.12.21.tar.gz", hash = "sha256:bd51521197231b767e90394f1df9e8869016cfeb9bbe6599fa56a3c32ddd8ccc"},
+    {file = "llama_index_core-0.12.22-py3-none-any.whl", hash = "sha256:d238eeb26e81f89b49453bb7c3c691d19ebc89dc51a5c3ed37609a619f81bd27"},
+    {file = "llama_index_core-0.12.22.tar.gz", hash = "sha256:49d4a32d0268eb719693a63ba49ce831076c2150c3cc9ed787ce1d65ecd71c0c"},
 ]
 
 [package.dependencies]
@@ -4876,4 +4876,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4.0"
-content-hash = "e148dc9566369578a8afc7ba00606f8b6fd6d1c9f4801a780c6d580a549825c0"
+content-hash = "eeb02964193aaf0a1c5cfb6e2e4d8632985e5f27fdf53a7126fb63df57cbe4a9"
diff --git a/pyproject.toml b/pyproject.toml
index 398c0d097..72656e586 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,7 @@ name = "llama-index"
 packages = [{from = "_llama-index", include = "llama_index"}]
 readme = "README.md"
 repository = "https://github.com/run-llama/llama_index"
-version = "0.12.21"
+version = "0.12.22"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
@@ -57,7 +57,7 @@ llama-index-agent-openai = "^0.4.0"
 llama-index-readers-file = "^0.4.0"
 llama-index-readers-llama-parse = ">=0.4.0"
 llama-index-indices-managed-llama-cloud = ">=0.4.0"
-llama-index-core = "^0.12.21"
+llama-index-core = "^0.12.22"
 llama-index-multi-modal-llms-openai = "^0.4.0"
 llama-index-cli = "^0.4.1"
 nltk = ">3.8.1"  # avoids a CVE, temp until next release, should be in llama-index-core
-- 
GitLab