From 58156133359456aa42bb5dfba55c944bd06e6fcc Mon Sep 17 00:00:00 2001 From: Logan <logan.markewich@live.com> Date: Fri, 28 Feb 2025 20:32:58 -0600 Subject: [PATCH] v0.12.22 (#17969) --- CHANGELOG.md | 14 +++++++++ docs/docs/CHANGELOG.md | 16 +++++++++- llama-index-core/llama_index/core/__init__.py | 2 +- llama-index-core/pyproject.toml | 2 +- .../llama_index/llms/huggingface_api/base.py | 13 +++++---- .../pyproject.toml | 2 +- .../tests/test_huggingface_api.py | 29 ++++++++++++------- poetry.lock | 8 ++--- pyproject.toml | 4 +-- 9 files changed, 65 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d721b1040..952e20e0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # ChangeLog +## [2025-02-28] + +### `llama-index-core` [0.12.22] + +- fix agentworkflow tool call tracking on final response (#17968) + +### `llama-index-readers-github` [0.6.0] + +- Ensure that Github reader uses timeout and retries params (#17959) + +### `llama-index-readers-web` [0.3.7] + +- chore: update FireCrawlWebReader integration to support extract (#17957) + ## [2025-02-27] ### `llama-index-core` [0.12.21] diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index af427b549..952e20e0d 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -1,5 +1,19 @@ # ChangeLog +## [2025-02-28] + +### `llama-index-core` [0.12.22] + +- fix agentworkflow tool call tracking on final response (#17968) + +### `llama-index-readers-github` [0.6.0] + +- Ensure that Github reader uses timeout and retries params (#17959) + +### `llama-index-readers-web` [0.3.7] + +- chore: update FireCrawlWebReader integration to support extract (#17957) + ## [2025-02-27] ### `llama-index-core` [0.12.21] @@ -10,7 +24,7 @@ - Feature/remove retriever tool template override (#17909) - only modify delta if 'Answer:' was actually detected (#17901) - Fix CitationQueryEngine init function for response_synthesizer (#17897) -- fix ChatSummaryMemoryBuffer._summarize_oldest_chat_history (#17845) +- fix ChatSummaryMemoryBuffer.\_summarize_oldest_chat_history (#17845) - fix: make base64 detection more robust across the board (#17930) - fix: stepwise execution breaks when steps do async work (#17914) - safer workflow cancel + fix restored context bug (#17938) diff --git a/llama-index-core/llama_index/core/__init__.py b/llama-index-core/llama_index/core/__init__.py index 7cc9d89b5..31e99d656 100644 --- a/llama-index-core/llama_index/core/__init__.py +++ b/llama-index-core/llama_index/core/__init__.py @@ -1,6 +1,6 @@ """Init file of LlamaIndex.""" -__version__ = "0.12.21" +__version__ = "0.12.22" import logging from logging import NullHandler diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml index 4bb4aa407..140b51f7a 100644 --- a/llama-index-core/pyproject.toml +++ b/llama-index-core/pyproject.toml @@ -46,7 +46,7 @@ name = "llama-index-core" packages = [{include = "llama_index"}] readme = "README.md" repository = "https://github.com/run-llama/llama_index" -version = "0.12.21" +version = "0.12.22" [tool.poetry.dependencies] SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"} diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py b/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py index a20e1bf39..4b41e98a6 100644 --- a/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py +++ b/llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py @@ -173,10 +173,13 @@ class HuggingFaceInferenceAPI(FunctionCallingLLM): self._sync_client = InferenceClient(**self._get_inference_client_kwargs()) self._async_client = AsyncInferenceClient(**self._get_inference_client_kwargs()) - # set context window if not provided - info = self._sync_client.get_endpoint_info() - if "max_input_tokens" in info and kwargs.get("context_window") is None: - self.context_window = info["max_input_tokens"] + # set context window if not provided, if we can get the endpoint info + try: + info = self._sync_client.get_endpoint_info() + if "max_input_tokens" in info and kwargs.get("context_window") is None: + self.context_window = info["max_input_tokens"] + except Exception: + pass def _get_inference_client_kwargs(self) -> Dict[str, Any]: """Extract the Hugging Face InferenceClient construction parameters.""" @@ -224,7 +227,7 @@ class HuggingFaceInferenceAPI(FunctionCallingLLM): def _parse_streaming_tool_calls( self, tool_call_strs: List[str] - ) -> List[ToolSelection | str]: + ) -> List[Union[ToolSelection, str]]: tool_calls = [] # Try to parse into complete objects, otherwise keep as strings for tool_call_str in tool_call_strs: diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml index fcd5d95b5..db69e25e5 100644 --- a/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-llms-huggingface-api" readme = "README.md" -version = "0.4.0" +version = "0.4.1" [tool.poetry.dependencies] python = ">=3.9,<4.0" diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py b/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py index 0dfd2f53f..b765d7d67 100644 --- a/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py +++ b/llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py @@ -3,8 +3,9 @@ from unittest.mock import MagicMock, patch import pytest from llama_index.core.llms import ChatMessage, MessageRole from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI +from huggingface_hub.inference._generated.types import ChatCompletionOutput -STUB_MODEL_NAME = "placeholder_model" +STUB_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct" @pytest.fixture(name="hf_inference_api") @@ -45,15 +46,17 @@ class TestHuggingFaceInferenceAPI: generated_response = ( " It's based on the book of the same name by James Fenimore Cooper." ) - conversational_return = { - "choices": [ - { - "message": { - "content": generated_response, + conversational_return = ChatCompletionOutput.parse_obj( + { + "choices": [ + { + "message": { + "content": generated_response, + } } - } - ], - } + ], + } + ) with patch.object( hf_inference_api._sync_client, @@ -67,6 +70,8 @@ class TestHuggingFaceInferenceAPI: mock_conversational.assert_called_once_with( messages=[{"role": m.role.value, "content": m.content} for m in messages], model=STUB_MODEL_NAME, + temperature=0.1, + max_tokens=256, ) def test_chat_text_generation( @@ -97,6 +102,8 @@ class TestHuggingFaceInferenceAPI: assert response.message.content == conversational_return mock_complete.assert_called_once_with( "System: You are an expert movie reviewer\nUser: Which movie is the best?\nAssistant:", + model=STUB_MODEL_NAME, + temperature=0.1, max_new_tokens=256, ) @@ -109,5 +116,7 @@ class TestHuggingFaceInferenceAPI: return_value=generated_text, ) as mock_text_generation: response = hf_inference_api.complete(prompt) - mock_text_generation.assert_called_once_with(prompt, max_new_tokens=256) + mock_text_generation.assert_called_once_with( + prompt, model=STUB_MODEL_NAME, temperature=0.1, max_new_tokens=256 + ) assert response.text == generated_text diff --git a/poetry.lock b/poetry.lock index 443fc4d34..5844ab8d5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1695,13 +1695,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0" [[package]] name = "llama-index-core" -version = "0.12.21" +version = "0.12.22" description = "Interface between LLMs and your data" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "llama_index_core-0.12.21-py3-none-any.whl", hash = "sha256:8583c781263a883f91c5575d533a5c3c1c27f923ee8913741e1598052370495a"}, - {file = "llama_index_core-0.12.21.tar.gz", hash = "sha256:bd51521197231b767e90394f1df9e8869016cfeb9bbe6599fa56a3c32ddd8ccc"}, + {file = "llama_index_core-0.12.22-py3-none-any.whl", hash = "sha256:d238eeb26e81f89b49453bb7c3c691d19ebc89dc51a5c3ed37609a619f81bd27"}, + {file = "llama_index_core-0.12.22.tar.gz", hash = "sha256:49d4a32d0268eb719693a63ba49ce831076c2150c3cc9ed787ce1d65ecd71c0c"}, ] [package.dependencies] @@ -4876,4 +4876,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "e148dc9566369578a8afc7ba00606f8b6fd6d1c9f4801a780c6d580a549825c0" +content-hash = "eeb02964193aaf0a1c5cfb6e2e4d8632985e5f27fdf53a7126fb63df57cbe4a9" diff --git a/pyproject.toml b/pyproject.toml index 398c0d097..72656e586 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ name = "llama-index" packages = [{from = "_llama-index", include = "llama_index"}] readme = "README.md" repository = "https://github.com/run-llama/llama_index" -version = "0.12.21" +version = "0.12.22" [tool.poetry.dependencies] python = ">=3.9,<4.0" @@ -57,7 +57,7 @@ llama-index-agent-openai = "^0.4.0" llama-index-readers-file = "^0.4.0" llama-index-readers-llama-parse = ">=0.4.0" llama-index-indices-managed-llama-cloud = ">=0.4.0" -llama-index-core = "^0.12.21" +llama-index-core = "^0.12.22" llama-index-multi-modal-llms-openai = "^0.4.0" llama-index-cli = "^0.4.1" nltk = ">3.8.1" # avoids a CVE, temp until next release, should be in llama-index-core -- GitLab