From 864716d6666efd19543040c12cc8748827d15770 Mon Sep 17 00:00:00 2001 From: Logan <logan.markewich@live.com> Date: Mon, 8 Apr 2024 13:30:41 -0600 Subject: [PATCH] Fix many GitHub issues (#12655) fixes --- .../core/node_parser/text/sentence.py | 2 +- .../core/response_synthesizers/__init__.py | 6 +++++- .../response_synthesizers/simple_summarize.py | 16 ++++++++-------- .../llama_index/llms/openllm/base.py | 2 +- .../llms/llama-index-llms-openllm/pyproject.toml | 2 +- .../pyproject.toml | 4 ++-- .../llama_index/readers/file/docs/base.py | 3 +++ .../llama-index-readers-file/pyproject.toml | 2 +- .../packs/llama_dataset_metadata/base.py | 9 ++++----- .../pyproject.toml | 2 +- 10 files changed, 27 insertions(+), 21 deletions(-) diff --git a/llama-index-core/llama_index/core/node_parser/text/sentence.py b/llama-index-core/llama_index/core/node_parser/text/sentence.py index 550e0d146..79c5dd5e0 100644 --- a/llama-index-core/llama_index/core/node_parser/text/sentence.py +++ b/llama-index-core/llama_index/core/node_parser/text/sentence.py @@ -199,7 +199,7 @@ class SentenceSplitter(MetadataAwareTextSplitter): """ token_size = self._token_size(text) - if self._token_size(text) <= chunk_size: + if token_size <= chunk_size: return [_Split(text, is_sentence=True, token_size=token_size)] text_splits_by_fns, is_sentence = self._get_splits_by_fns(text) diff --git a/llama-index-core/llama_index/core/response_synthesizers/__init__.py b/llama-index-core/llama_index/core/response_synthesizers/__init__.py index 35d10e1df..4c488b70d 100644 --- a/llama-index-core/llama_index/core/response_synthesizers/__init__.py +++ b/llama-index-core/llama_index/core/response_synthesizers/__init__.py @@ -1,7 +1,10 @@ """Init file.""" from llama_index.core.response_synthesizers.accumulate import Accumulate -from llama_index.core.response_synthesizers.base import BaseSynthesizer +from llama_index.core.response_synthesizers.base import ( + BaseSynthesizer, + SynthesizerComponent, +) from llama_index.core.response_synthesizers.compact_and_refine import ( CompactAndRefine, ) @@ -15,6 +18,7 @@ from llama_index.core.response_synthesizers.type import ResponseMode __all__ = [ "ResponseMode", "BaseSynthesizer", + "SynthesizerComponent", "Refine", "SimpleSummarize", "TreeSummarize", diff --git a/llama-index-core/llama_index/core/response_synthesizers/simple_summarize.py b/llama-index-core/llama_index/core/response_synthesizers/simple_summarize.py index 152459df1..27d92e151 100644 --- a/llama-index-core/llama_index/core/response_synthesizers/simple_summarize.py +++ b/llama-index-core/llama_index/core/response_synthesizers/simple_summarize.py @@ -52,23 +52,23 @@ class SimpleSummarize(BaseSynthesizer): **response_kwargs: Any, ) -> RESPONSE_TEXT_TYPE: text_qa_template = self._text_qa_template.partial_format(query_str=query_str) + single_text_chunk = "\n".join(text_chunks) truncated_chunks = self._prompt_helper.truncate( prompt=text_qa_template, - text_chunks=text_chunks, + text_chunks=[single_text_chunk], ) - node_text = "\n".join(truncated_chunks) response: RESPONSE_TEXT_TYPE if not self._streaming: response = await self._llm.apredict( text_qa_template, - context_str=node_text, + context_str=truncated_chunks, **response_kwargs, ) else: response = self._llm.stream( text_qa_template, - context_str=node_text, + context_str=truncated_chunks, **response_kwargs, ) @@ -86,23 +86,23 @@ class SimpleSummarize(BaseSynthesizer): **kwargs: Any, ) -> RESPONSE_TEXT_TYPE: text_qa_template = self._text_qa_template.partial_format(query_str=query_str) + single_text_chunk = "\n".join(text_chunks) truncated_chunks = self._prompt_helper.truncate( prompt=text_qa_template, - text_chunks=text_chunks, + text_chunks=[single_text_chunk], ) - node_text = "\n".join(truncated_chunks) response: RESPONSE_TEXT_TYPE if not self._streaming: response = self._llm.predict( text_qa_template, - context_str=node_text, + context_str=truncated_chunks, **kwargs, ) else: response = self._llm.stream( text_qa_template, - context_str=node_text, + context_str=truncated_chunks, **kwargs, ) diff --git a/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py b/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py index 1781326a6..e0923d268 100644 --- a/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py +++ b/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py @@ -94,7 +94,7 @@ class OpenLLM(LLM): trust_remote_code: bool = Field( description="Optional flag to trust remote code. This is synonymous to Transformers' `trust_remote_code`. Default to False." ) - _llm: openllm.LLM[Any, Any] + _llm: openllm.LLM[Any, Any] = PrivateAttr() def __init__( self, diff --git a/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml index 3d3bb0c08..518e778e0 100644 --- a/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-openllm/pyproject.toml @@ -28,7 +28,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-llms-openllm" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-anthropic/pyproject.toml b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-anthropic/pyproject.toml index d367dada5..53742840a 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-anthropic/pyproject.toml +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-anthropic/pyproject.toml @@ -27,12 +27,12 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-multi-modal-llms-anthropic" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.1" -anthropic = "0.20.0" +anthropic = "^0.23.1" [tool.poetry.group.dev.dependencies] ipython = "8.10.0" diff --git a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py index a3c168d61..441ee5e05 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py +++ b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py @@ -34,6 +34,9 @@ class PDFReader(BaseReader): fs: Optional[AbstractFileSystem] = None, ) -> List[Document]: """Parse file.""" + if not isinstance(file, Path): + file = Path(file) + try: import pypdf except ImportError: diff --git a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml index b00813109..354f59e22 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml @@ -50,7 +50,7 @@ license = "MIT" maintainers = ["FarisHijazi", "Haowjy", "ephe-meral", "hursh-desai", "iamarunbrahma", "jon-chuang", "mmaatouk", "ravi03071991", "sangwongenip", "thejessezhang"] name = "llama-index-readers-file" readme = "README.md" -version = "0.1.13" +version = "0.1.14" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-packs/llama-index-packs-llama-dataset-metadata/llama_index/packs/llama_dataset_metadata/base.py b/llama-index-packs/llama-index-packs-llama-dataset-metadata/llama_index/packs/llama_dataset_metadata/base.py index 66cd6e4cb..09ade2600 100644 --- a/llama-index-packs/llama-index-packs-llama-dataset-metadata/llama_index/packs/llama_dataset_metadata/base.py +++ b/llama-index-packs/llama-index-packs-llama-dataset-metadata/llama_index/packs/llama_dataset_metadata/base.py @@ -7,6 +7,7 @@ from llama_index.core.download.module import LLAMA_HUB_URL from llama_index.core.download.utils import get_file_content from llama_index.core.indices.base import BaseIndex from llama_index.core.llama_pack.base import BaseLlamaPack +from llama_index.core.settings import Settings if TYPE_CHECKING: from llama_index.core.llama_dataset import LabelledRagDataset @@ -143,11 +144,9 @@ class DatasetCard(BaseMetadata): ) # extract baseline config info from index - llm = index.service_context.llm.model - embed_model = index.as_retriever().get_service_context().embed_model.model_name - chunk_size = ( - index.as_retriever().get_service_context().transformations[0].chunk_size - ) + llm = Settings.llm.metadata.model_name + embed_model = Settings.embed_model.model_name + chunk_size = index._transformations[0].chunk_size similarity_top_k = index.as_retriever()._similarity_top_k baseline_config = BaselineConfig( llm=llm, diff --git a/llama-index-packs/llama-index-packs-llama-dataset-metadata/pyproject.toml b/llama-index-packs/llama-index-packs-llama-dataset-metadata/pyproject.toml index fcbc6ff08..9d3d880b3 100644 --- a/llama-index-packs/llama-index-packs-llama-dataset-metadata/pyproject.toml +++ b/llama-index-packs/llama-index-packs-llama-dataset-metadata/pyproject.toml @@ -29,7 +29,7 @@ license = "MIT" maintainers = ["nerdai"] name = "llama-index-packs-llama-dataset-metadata" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -- GitLab