diff --git a/benchmarks/agent/math_tasks.py b/benchmarks/agent/math_tasks.py index 76b6e09552bc6f7b6d5d470570843dc219690788..bf798763371c25dcc88d27ada6f7838ee9e6b41b 100644 --- a/benchmarks/agent/math_tasks.py +++ b/benchmarks/agent/math_tasks.py @@ -7,12 +7,12 @@ from llama_index.tools.function_tool import FunctionTool def add(a: int, b: int) -> int: - """Add two integers and returns the result integer""" + """Add two integers and returns the result integer.""" return a + b def multiply(a: int, b: int) -> int: - """Multiple two integers and returns the result integer""" + """Multiple two integers and returns the result integer.""" return a * b diff --git a/benchmarks/embeddings/bench_embeddings.py b/benchmarks/embeddings/bench_embeddings.py index b3498cd04badfb29801848d102c17a62d57fee0e..1d0320ecb5778a5b9ef44129b769c91c6ed4dd1d 100644 --- a/benchmarks/embeddings/bench_embeddings.py +++ b/benchmarks/embeddings/bench_embeddings.py @@ -16,7 +16,7 @@ def generate_strings(num_strings: int = 100, string_length: int = 10) -> List[st offset 0: [0:string_length], [string_length:2*string_length], ... offset 1: [1:1+string_length], [1+string_length:1+2*string_length],... ... - """ + """ # noqa: D415 content = ( SimpleDirectoryReader("../../examples/paul_graham_essay/data") .load_data()[0] diff --git a/experimental/cli/cli_add.py b/experimental/cli/cli_add.py index aecb98d9ba8371788bf2dec6ffb143bfd49aa251..076c52b26d128cade52d5833e8d285e4c10cd6e0 100644 --- a/experimental/cli/cli_add.py +++ b/experimental/cli/cli_add.py @@ -7,7 +7,7 @@ from .configuration import load_index, save_index def add_cli(args: Namespace) -> None: - """Handle subcommand "add" """ + """Handle subcommand "add".""" index = load_index() for p in args.files: @@ -26,7 +26,7 @@ def add_cli(args: Namespace) -> None: def register_add_cli(subparsers: _SubParsersAction) -> None: - """Register subcommand "add" to ArgumentParser""" + """Register subcommand "add" to ArgumentParser.""" parser = subparsers.add_parser("add") parser.add_argument( "files", diff --git a/experimental/cli/cli_init.py b/experimental/cli/cli_init.py index 2d1b68fbe63f3662100a57bc9e290ab31a2146aa..65728d31ed1cb7f83c03ea604b59a8caf7a4c155 100644 --- a/experimental/cli/cli_init.py +++ b/experimental/cli/cli_init.py @@ -4,13 +4,13 @@ from .configuration import load_config, save_config def init_cli(args: Namespace) -> None: - """Handle subcommand "init" """ + """Handle subcommand "init".""" config = load_config(args.directory) save_config(config, args.directory) def register_init_cli(subparsers: _SubParsersAction) -> None: - """Register subcommand "init" to ArgumentParser""" + """Register subcommand "init" to ArgumentParser.""" parser = subparsers.add_parser("init") parser.add_argument( "directory", diff --git a/experimental/cli/cli_query.py b/experimental/cli/cli_query.py index 3550f73a71a56d2240811534cc839fc0ebd43641..dc4543fa129c1dbec8d7250b4ab3221ebd3b9bd0 100644 --- a/experimental/cli/cli_query.py +++ b/experimental/cli/cli_query.py @@ -4,14 +4,14 @@ from .configuration import load_index def query_cli(args: Namespace) -> None: - """Handle subcommand "query" """ + """Handle subcommand "query".""" index = load_index() query_engine = index.as_query_engine() print(query_engine.query(args.query)) def register_query_cli(subparsers: _SubParsersAction) -> None: - """Register subcommand "query" to ArgumentParser""" + """Register subcommand "query" to ArgumentParser.""" parser = subparsers.add_parser("query") parser.add_argument( "query", diff --git a/experimental/cli/configuration.py b/experimental/cli/configuration.py index 395393b48b9d9d78c2373702d34e374287e4131e..4d2d8516cbaa9fac25681f6d3086623eb6ef2c41 100644 --- a/experimental/cli/configuration.py +++ b/experimental/cli/configuration.py @@ -28,7 +28,7 @@ DEFAULT_CONFIG = { def load_config(root: str = ".") -> ConfigParser: - """Load configuration from file""" + """Load configuration from file.""" config = ConfigParser() config.read_dict(DEFAULT_CONFIG) config.read(os.path.join(root, CONFIG_FILE_NAME)) @@ -36,13 +36,13 @@ def load_config(root: str = ".") -> ConfigParser: def save_config(config: ConfigParser, root: str = ".") -> None: - """Load configuration to file""" + """Load configuration to file.""" with open(os.path.join(root, CONFIG_FILE_NAME), "w") as fd: config.write(fd) def load_index(root: str = ".") -> BaseIndex[Any]: - """Load existing index file""" + """Load existing index file.""" config = load_config(root) service_context = _load_service_context(config) @@ -69,14 +69,14 @@ def load_index(root: str = ".") -> BaseIndex[Any]: def save_index(index: BaseIndex[Any], root: str = ".") -> None: - """Save index to file""" + """Save index to file.""" config = load_config(root) persist_dir = config["store"]["persist_dir"] index.storage_context.persist(persist_dir=persist_dir) def _load_service_context(config: ConfigParser) -> ServiceContext: - """Internal function to load service context based on configuration""" + """Internal function to load service context based on configuration.""" embed_model = _load_embed_model(config) llm_predictor = _load_llm_predictor(config) return ServiceContext.from_defaults( @@ -90,7 +90,7 @@ def _load_storage_context(config: ConfigParser) -> StorageContext: def _load_llm_predictor(config: ConfigParser) -> LLMPredictor: - """Internal function to load LLM predictor based on configuration""" + """Internal function to load LLM predictor based on configuration.""" model_type = config["llm_predictor"]["type"].lower() if model_type == "default": llm = _load_llm(config["llm_predictor"]) @@ -110,7 +110,7 @@ def _load_llm(section: SectionProxy) -> LLM: def _load_embed_model(config: ConfigParser) -> BaseEmbedding: - """Internal function to load embedding model based on configuration""" + """Internal function to load embedding model based on configuration.""" model_type = config["embed_model"]["type"] if model_type == "default": return OpenAIEmbedding() diff --git a/experimental/colbert_index/base.py b/experimental/colbert_index/base.py index 487a1ba2cc7231d59030e475316237f5b6b19089..c0d3bfa143c0889cd785b4b1b4f132b3b6fb559d 100644 --- a/experimental/colbert_index/base.py +++ b/experimental/colbert_index/base.py @@ -145,7 +145,7 @@ class ColbertIndex(BaseIndex[IndexDict]): """ doc_ids, _, scores = self.store.search(text=query_str, k=top_k) - node_doc_ids = list(map(lambda id: self._docs_pos_to_node_id[id], doc_ids)) + node_doc_ids = [self._docs_pos_to_node_id[id] for id in doc_ids] nodes = self.docstore.get_nodes(node_doc_ids) nodes_with_score = [] diff --git a/llama_index/agent/react/formatter.py b/llama_index/agent/react/formatter.py index 97d131fe310b7c793458f2aef6585272f76c506e..7680521239c2f640ed93b211131d9d331d99a283 100644 --- a/llama_index/agent/react/formatter.py +++ b/llama_index/agent/react/formatter.py @@ -11,7 +11,7 @@ from llama_index.tools import BaseTool def get_react_tool_descriptions(tools: Sequence[BaseTool]) -> List[str]: - """Tool""" + """Tool.""" tool_descs = [] for tool in tools: tool_desc = ( diff --git a/llama_index/callbacks/llama_debug.py b/llama_index/callbacks/llama_debug.py index 1fff6895146c6a279e3518fb242dfd85b4bc2c0d..dd1073ec8eafbe4e0a8c3e85c905e6ef9330c94c 100644 --- a/llama_index/callbacks/llama_debug.py +++ b/llama_index/callbacks/llama_debug.py @@ -116,7 +116,7 @@ class LlamaDebugHandler(BaseCallbackHandler): def _get_time_stats_from_event_pairs( self, event_pairs: List[List[CBEvent]] ) -> EventStats: - """Calculate time-based stats for a set of event pairs""" + """Calculate time-based stats for a set of event pairs.""" total_secs = 0.0 for event_pair in event_pairs: start_time = datetime.strptime(event_pair[0].time, TIMESTAMP_FORMAT) diff --git a/llama_index/callbacks/open_inference_callback.py b/llama_index/callbacks/open_inference_callback.py index 0e6a0ff50fbe11c7a6218fff95a1dd7a909fb9a4..9b105ca24193b3b7032750f087366411d166f8fd 100644 --- a/llama_index/callbacks/open_inference_callback.py +++ b/llama_index/callbacks/open_inference_callback.py @@ -112,7 +112,7 @@ def as_dataframe(data: Iterable[BaseDataType]) -> "DataFrame": @dataclass class TraceData: - """Trace data""" + """Trace data.""" query_data: QueryData = field(default_factory=QueryData) node_datas: List[NodeData] = field(default_factory=list) diff --git a/llama_index/callbacks/simple_llm_handler.py b/llama_index/callbacks/simple_llm_handler.py index 3a89f34faa2aea0d7448dbaebca2debef6fcaee5..cc53a35960a52be0e56113e75b6c7c9eeaafbf71 100644 --- a/llama_index/callbacks/simple_llm_handler.py +++ b/llama_index/callbacks/simple_llm_handler.py @@ -5,7 +5,7 @@ from llama_index.callbacks.schema import CBEventType, EventPayload class SimpleLLMHandler(BaseCallbackHandler): - """Callback handler for printing llms inputs/outputs""" + """Callback handler for printing llms inputs/outputs.""" def __init__(self) -> None: super().__init__(event_starts_to_ignore=[], event_ends_to_ignore=[]) diff --git a/llama_index/chat_engine/context.py b/llama_index/chat_engine/context.py index 10ee892c04bba8b2414dfd4019cb8706819e6409..dde6e5698da214ab8d92c9e586a0eb1415cc0b0d 100644 --- a/llama_index/chat_engine/context.py +++ b/llama_index/chat_engine/context.py @@ -127,7 +127,7 @@ class ContextChatEngine(BaseChatEngine): return self._context_template.format(context_str=context_str), nodes def _get_prefix_messages_with_context(self, context_str: str) -> List[ChatMessage]: - """Get the prefix messages with context""" + """Get the prefix messages with context.""" # ensure we grab the user-configured system prompt system_prompt = "" prefix_messages = self._prefix_messages diff --git a/llama_index/chat_engine/types.py b/llama_index/chat_engine/types.py index 6b96ba91cbda0ed164b56249ca59556875ff4f67..5b73cb5da77de8e2d9d30b439b2734abd8863899 100644 --- a/llama_index/chat_engine/types.py +++ b/llama_index/chat_engine/types.py @@ -18,12 +18,12 @@ logger.setLevel(logging.WARNING) def is_function(message: ChatMessage) -> bool: - """Utility for ChatMessage responses from OpenAI models""" + """Utility for ChatMessage responses from OpenAI models.""" return "function_call" in message.additional_kwargs class ChatResponseMode(str, Enum): - """Flag toggling waiting/streaming in `Agent._chat`""" + """Flag toggling waiting/streaming in `Agent._chat`.""" WAIT = "wait" STREAM = "stream" diff --git a/llama_index/embeddings/adapter.py b/llama_index/embeddings/adapter.py index e27e0e3144fc27dc4b804a9c29987386f7d42c55..a02516225e2d2232173fb4e9fad0e48efb2b350c 100644 --- a/llama_index/embeddings/adapter.py +++ b/llama_index/embeddings/adapter.py @@ -47,7 +47,7 @@ class AdapterEmbeddingModel(BaseEmbedding): embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, callback_manager: Optional[CallbackManager] = None, ) -> None: - """Init params""" + """Init params.""" import torch from llama_index.embeddings.adapter_utils import BaseAdapter, LinearLayer diff --git a/llama_index/evaluation/benchmarks/hotpotqa.py b/llama_index/evaluation/benchmarks/hotpotqa.py index f09ef85629c733cfcfaa3187c0b694ecd732f2ef..4651fb70f78a13e2562fd326c52a7e600b1f88ed 100644 --- a/llama_index/evaluation/benchmarks/hotpotqa.py +++ b/llama_index/evaluation/benchmarks/hotpotqa.py @@ -22,7 +22,7 @@ hotpot/hotpot_dev_distractor_v1.json""" class HotpotQAEvaluator: """ - Refer to https://hotpotqa.github.io/ for more details on the dataset + Refer to https://hotpotqa.github.io/ for more details on the dataset. """ def _download_datasets(self) -> Dict[str, str]: diff --git a/llama_index/evaluation/dataset_generation.py b/llama_index/evaluation/dataset_generation.py index 299e24c1592355703e71d9e23a3cbfd7e1ae648d..3232f5d7fd5496f43f87a166a8baf2868060d368 100644 --- a/llama_index/evaluation/dataset_generation.py +++ b/llama_index/evaluation/dataset_generation.py @@ -1,4 +1,4 @@ -"""Dataset generation from documents""" +"""Dataset generation from documents.""" from __future__ import annotations import asyncio diff --git a/llama_index/indices/base_retriever.py b/llama_index/indices/base_retriever.py index 2885698a53271300263df72536f20e3fb7d0e21f..3d2f52b5b925a9a73ed1dc08030a0d1935f15c28 100644 --- a/llama_index/indices/base_retriever.py +++ b/llama_index/indices/base_retriever.py @@ -47,7 +47,7 @@ class BaseRetriever(ABC): def get_service_context(self) -> Optional[ServiceContext]: """Attempts to resolve a service context. Short-circuits at self.service_context, self._service_context, - or self._index.service_context + or self._index.service_context. """ if hasattr(self, "service_context"): return self.service_context diff --git a/llama_index/indices/loading.py b/llama_index/indices/loading.py index b917f93e33790f4f33dea1a6af92d5dace8efe69..4c65946e70978fbfc5f9b42886171b429ff68a31 100644 --- a/llama_index/indices/loading.py +++ b/llama_index/indices/loading.py @@ -50,7 +50,7 @@ def load_indices_from_storage( index_ids: Optional[Sequence[str]] = None, **kwargs: Any, ) -> List[BaseIndex]: - """Load multiple indices from storage context + """Load multiple indices from storage context. Args: storage_context (StorageContext): storage context containing diff --git a/llama_index/indices/managed/base.py b/llama_index/indices/managed/base.py index ac649407bbe177cb0b25f394d1c07db2f5d0bb6b..928ba66d50c5ba3f59a020218af2dfffea761176 100644 --- a/llama_index/indices/managed/base.py +++ b/llama_index/indices/managed/base.py @@ -20,7 +20,7 @@ class BaseManagedIndex(BaseIndex[IndexDict], ABC): The managed service can index documents into a managed service. How documents are structured into nodes is a detail for the managed service, and not exposed in this interface (although could be controlled by - configuration parameters) + configuration parameters). Args: show_progress (bool): Whether to show tqdm progress bars. Defaults to False. diff --git a/llama_index/indices/managed/vectara/base.py b/llama_index/indices/managed/vectara/base.py index 21db9ccc4b1e97299a5060de065e1e4293a7abd5..abd6c1c491c642eee4442cc63b58a407c7838f1b 100644 --- a/llama_index/indices/managed/vectara/base.py +++ b/llama_index/indices/managed/vectara/base.py @@ -203,7 +203,7 @@ class VectaraIndex(BaseManagedIndex): ) -> Optional[str]: """Vectara provides a way to add files (binary or text) directly via our API where pre-processing and chunking occurs internally in an optimal way - This method provides a way to use that API in Llama_index + This method provides a way to use that API in Llama_index. # ruff: noqa: E501 Full API Docs: https://docs.vectara.com/docs/api-reference/indexing-apis/ diff --git a/llama_index/indices/managed/vectara/retriever.py b/llama_index/indices/managed/vectara/retriever.py index 25155d1b64763aa362d987c50dbd2aaf31ae9e0f..5fb08c263718ee5d8fd4701517edc755ffd2b839 100644 --- a/llama_index/indices/managed/vectara/retriever.py +++ b/llama_index/indices/managed/vectara/retriever.py @@ -1,5 +1,5 @@ """Vectara index. -An index that that is built on top of Vectara +An index that that is built on top of Vectara. """ import json diff --git a/llama_index/indices/postprocessor/node.py b/llama_index/indices/postprocessor/node.py index 6a32ab1260e4eb88346ce7fe502bada39969b874..b1011097367b7fccdd626e85e86da680e25f1036 100644 --- a/llama_index/indices/postprocessor/node.py +++ b/llama_index/indices/postprocessor/node.py @@ -362,7 +362,7 @@ class LongContextReorder(BaseNodePostprocessor): performance typically arises when crucial data is positioned at the start or conclusion of the input context. Additionally, as the input context lengthens, performance drops notably, even - in models designed for long contexts." + in models designed for long contexts.". """ @classmethod diff --git a/llama_index/llm_predictor/base.py b/llama_index/llm_predictor/base.py index 776226587c53fe3341f73b2640484395b23a78a2..5de86901826c8574b29afce81513fe519386b23a 100644 --- a/llama_index/llm_predictor/base.py +++ b/llama_index/llm_predictor/base.py @@ -263,7 +263,7 @@ class LLMPredictor(BaseLLMPredictor): self, formatted_prompt: str, ) -> str: - """Add system and query wrapper prompts to base prompt""" + """Add system and query wrapper prompts to base prompt.""" extended_prompt = formatted_prompt if self.system_prompt: extended_prompt = self.system_prompt + "\n\n" + extended_prompt @@ -276,7 +276,7 @@ class LLMPredictor(BaseLLMPredictor): return extended_prompt def _extend_messages(self, messages: List[ChatMessage]) -> List[ChatMessage]: - """Add system prompt to chat message list""" + """Add system prompt to chat message list.""" if self.system_prompt: messages = [ ChatMessage(role=MessageRole.SYSTEM, content=self.system_prompt), diff --git a/llama_index/llms/azure_openai.py b/llama_index/llms/azure_openai.py index 2dd0eae9c60a98235befe0d45b31b17ead60150c..21bf029ab41e93f2674b2de9a54372f642ef9047 100644 --- a/llama_index/llms/azure_openai.py +++ b/llama_index/llms/azure_openai.py @@ -10,7 +10,7 @@ AZURE_OPENAI_API_TYPE = "azure" class AzureOpenAI(OpenAI): """ - Azure OpenAI + Azure OpenAI. To use this, you must first deploy a model on Azure OpenAI. Unlike OpenAI, you need to specify a `engine` parameter to identify diff --git a/llama_index/llms/portkey.py b/llama_index/llms/portkey.py index 4fcbcdf452790870240b12f47c4a5a7561187622..c9e444af99946feaa30d4dfb7d92228b7cb8b318 100644 --- a/llama_index/llms/portkey.py +++ b/llama_index/llms/portkey.py @@ -1,5 +1,5 @@ """ -Portkey integration with Llama_index for enhanced monitoring +Portkey integration with Llama_index for enhanced monitoring. """ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Union, cast @@ -38,7 +38,7 @@ if TYPE_CHECKING: class Portkey(CustomLLM): - """_summary_ + """_summary_. Args: LLM (_type_): _description_ diff --git a/llama_index/llms/portkey_utils.py b/llama_index/llms/portkey_utils.py index d15cb4136704d6a9411797fb92e860a375636251..b328ea402f7505b252a7c338e030c49147c7529b 100644 --- a/llama_index/llms/portkey_utils.py +++ b/llama_index/llms/portkey_utils.py @@ -1,5 +1,5 @@ """ -Utility Tools for the Portkey Class +Utility Tools for the Portkey Class. This file module contains a collection of utility functions designed to enhance the functionality and usability of the Portkey class diff --git a/llama_index/llms/predibase.py b/llama_index/llms/predibase.py index e0a279653bde32ba86baccd79168d67ba9739786..c70dc631d1ee11cd13ad066463f755ceedcbd024 100644 --- a/llama_index/llms/predibase.py +++ b/llama_index/llms/predibase.py @@ -14,7 +14,7 @@ from llama_index.llms.custom import CustomLLM class PredibaseLLM(CustomLLM): - """Predibase LLM""" + """Predibase LLM.""" model_name: str = Field(description="The Predibase model to use.") predibase_api_key: str = Field(description="The Predibase API key to use.") diff --git a/llama_index/node_parser/extractors/metadata_extractors.py b/llama_index/node_parser/extractors/metadata_extractors.py index 88846e61d67c14b55fbef4a37d244a6a0885834b..166e5b559af0e7a605840efb7af14e5e77bc9955 100644 --- a/llama_index/node_parser/extractors/metadata_extractors.py +++ b/llama_index/node_parser/extractors/metadata_extractors.py @@ -403,7 +403,8 @@ class SummaryExtractor(MetadataFeatureExtractor): """ Summary extractor. Node-level extractor with adjacent sharing. Extracts `section_summary`, `prev_section_summary`, `next_section_summary` - metadata fields + metadata fields. + Args: llm_predictor (Optional[BaseLLMPredictor]): LLM predictor summaries (List[str]): list of summaries to extract: 'self', 'prev', 'next' diff --git a/llama_index/node_parser/file/html.py b/llama_index/node_parser/file/html.py index 60b1e387f87830fb74d421299b4cd352636be513..c45498a1a99cbc6b1db5fb50d750b8c9d09a7f1a 100644 --- a/llama_index/node_parser/file/html.py +++ b/llama_index/node_parser/file/html.py @@ -88,7 +88,7 @@ class HTMLNodeParser(NodeParser): return all_nodes def get_nodes_from_node(self, node: BaseNode) -> List[TextNode]: - """Get nodes from document""" + """Get nodes from document.""" try: from bs4 import BeautifulSoup except ImportError: @@ -144,7 +144,7 @@ class HTMLNodeParser(NodeParser): node: BaseNode, metadata: dict, ) -> TextNode: - """Build node from single text split""" + """Build node from single text split.""" node = build_nodes_from_splits( [text_split], node, self.include_metadata, self.include_prev_next_rel )[0] diff --git a/llama_index/node_parser/file/json.py b/llama_index/node_parser/file/json.py index 207143e55c253125a67fddf55fbc193527f0bc1a..aa8e79a4b6a1bfd9c3f4835ae5562ca1d125818b 100644 --- a/llama_index/node_parser/file/json.py +++ b/llama_index/node_parser/file/json.py @@ -80,7 +80,7 @@ class JSONNodeParser(NodeParser): return all_nodes def get_nodes_from_node(self, node: BaseNode) -> List[TextNode]: - """Get nodes from document""" + """Get nodes from document.""" text = node.get_content(metadata_mode=MetadataMode.NONE) try: data = json.loads(text) @@ -132,7 +132,7 @@ class JSONNodeParser(NodeParser): node: BaseNode, metadata: dict, ) -> TextNode: - """Build node from single text split""" + """Build node from single text split.""" node = build_nodes_from_splits( [text_split], node, self.include_metadata, self.include_prev_next_rel )[0] diff --git a/llama_index/node_parser/file/markdown.py b/llama_index/node_parser/file/markdown.py index c5f9920509b8ee57513909afe8cec31f1c074179..13ccf80f8e9eadf7542d9b134a426938c1c64c2f 100644 --- a/llama_index/node_parser/file/markdown.py +++ b/llama_index/node_parser/file/markdown.py @@ -80,7 +80,7 @@ class MarkdownNodeParser(NodeParser): return all_nodes def get_nodes_from_node(self, node: BaseNode) -> List[TextNode]: - """Get nodes from document""" + """Get nodes from document.""" text = node.get_content(metadata_mode=MetadataMode.NONE) markdown_nodes = [] lines = text.split("\n") @@ -115,7 +115,7 @@ class MarkdownNodeParser(NodeParser): def _update_metadata( self, headers_metadata: dict, new_header: str, new_header_level: int ) -> dict: - """Update the markdown headers for metadata + """Update the markdown headers for metadata. Removes all headers that are equal or less than the level of the newly found header @@ -136,7 +136,7 @@ class MarkdownNodeParser(NodeParser): node: BaseNode, metadata: dict, ) -> TextNode: - """Build node from single text split""" + """Build node from single text split.""" node = build_nodes_from_splits( [text_split], node, self.include_metadata, self.include_prev_next_rel )[0] diff --git a/llama_index/objects/tool_node_mapping.py b/llama_index/objects/tool_node_mapping.py index e17d9cfb581bb014cd91cd325f75a08c6c3fbb91..18ee0c9ae5cafcb639631e17a3836221634747b3 100644 --- a/llama_index/objects/tool_node_mapping.py +++ b/llama_index/objects/tool_node_mapping.py @@ -1,4 +1,4 @@ -"""Tool mapping""" +"""Tool mapping.""" from typing import Any, Optional, Sequence diff --git a/llama_index/query_engine/citation_query_engine.py b/llama_index/query_engine/citation_query_engine.py index 7f01c049bd18003e172402be4ba57a945bcd1ad2..52bc021a53013a826e70eef6123f91b801022381 100644 --- a/llama_index/query_engine/citation_query_engine.py +++ b/llama_index/query_engine/citation_query_engine.py @@ -136,7 +136,7 @@ class CitationQueryEngine(BaseQueryEngine): # class-specific args **kwargs: Any, ) -> "CitationQueryEngine": - """Initialize a CitationQueryEngine object." + """Initialize a CitationQueryEngine object.". Args: index: (BastGPTIndex): index to use for querying diff --git a/llama_index/query_engine/knowledge_graph_query_engine.py b/llama_index/query_engine/knowledge_graph_query_engine.py index 53b8b9430fe34469343c54d12ddc0ecb5391eea5..8d00b2a44806ff355e2bce8e825ac60f7b8b135d 100644 --- a/llama_index/query_engine/knowledge_graph_query_engine.py +++ b/llama_index/query_engine/knowledge_graph_query_engine.py @@ -1,4 +1,4 @@ -""" Knowledge Graph Query Engine""" +""" Knowledge Graph Query Engine.""" import logging from typing import Any, List, Optional, Sequence diff --git a/llama_index/query_engine/retriever_query_engine.py b/llama_index/query_engine/retriever_query_engine.py index c6bfd389c2a6c9e960ced5a08219bf311d49f832..ad9915390e2e8527ebc8cfaacd8e18092a9ce4da 100644 --- a/llama_index/query_engine/retriever_query_engine.py +++ b/llama_index/query_engine/retriever_query_engine.py @@ -67,7 +67,7 @@ class RetrieverQueryEngine(BaseQueryEngine): # class-specific args **kwargs: Any, ) -> "RetrieverQueryEngine": - """Initialize a RetrieverQueryEngine object." + """Initialize a RetrieverQueryEngine object.". Args: retriever (BaseRetriever): A retriever object. diff --git a/llama_index/readers/bagel.py b/llama_index/readers/bagel.py index aecb94ca65f0d154c321ce2087923b9847355446..cdf647f8c47caaef9749900e30748863eeb6bb15 100644 --- a/llama_index/readers/bagel.py +++ b/llama_index/readers/bagel.py @@ -18,15 +18,8 @@ Metadatas = List[Metadata] # Metadata Query Grammar LiteralValue = Union[str, int, float] -LogicalOperator = Union[Literal["$and"], Literal["$or"]] -WhereOperator = Union[ - Literal["$gt"], - Literal["$gte"], - Literal["$lt"], - Literal["$lte"], - Literal["$ne"], - Literal["$eq"], -] +LogicalOperator = Literal["$and", "$or"] +WhereOperator = Literal["$gt", "$gte", "$lt", "$lte", "$ne", "$eq"] OperatorExpression = Dict[Union[WhereOperator, LogicalOperator], LiteralValue] Where = Dict[ @@ -47,14 +40,7 @@ OneOrMany = Union[T, List[T]] # This should ust be List[Literal["documents", "embeddings", "metadatas", "distances"]] # However, this provokes an incompatibility with the Overrides library and Python 3.7 -Include = List[ - Union[ - Literal["documents"], - Literal["embeddings"], - Literal["metadatas"], - Literal["distances"], - ] -] +Include = List[Literal["documents", "embeddings", "metadatas", "distances"]] LiteralValue = LiteralValue LogicalOperator = LogicalOperator diff --git a/llama_index/readers/deeplake.py b/llama_index/readers/deeplake.py index fa9fb3471c1f6e83115190af27b3f18c1a13844d..00c00f43a66e41c483d45359da3a97cbc2747931 100644 --- a/llama_index/readers/deeplake.py +++ b/llama_index/readers/deeplake.py @@ -30,7 +30,7 @@ def vector_search( distance_metric: distance function 'L2' for Euclidean, 'L1' for Nuclear, 'Max' l-infinity distance, 'cos' for cosine similarity, 'dot' for dot product returns: - nearest_indices: List, indices of nearest neighbors + nearest_indices: List, indices of nearest neighbors. """ # Calculate the distance between the query_vector and all data_vectors if isinstance(query_vector, list): @@ -62,7 +62,7 @@ class DeepLakeReader(BaseReader): self, token: Optional[str] = None, ): - """Initializing the deepLake reader""" + """Initializing the deepLake reader.""" import_err_msg = ( "`deeplake` package not found, please run `pip install deeplake`" ) diff --git a/llama_index/readers/discord_reader.py b/llama_index/readers/discord_reader.py index 8ea42a0f2e11615a2a7b5d0367b4a425d9a86cbb..0b400d2b38924b76306d5e514265a31814a2d583 100644 --- a/llama_index/readers/discord_reader.py +++ b/llama_index/readers/discord_reader.py @@ -70,20 +70,18 @@ async def read_channel( ### Wraps each message in a Document containing the text \ # as well as some useful metadata properties. - return list( - map( - lambda msg: Document( - text=msg.content, - metadata={ - "message_id": msg.id, - "username": msg.author.name, - "created_at": msg.created_at, - "edited_at": msg.edited_at, - }, - ), - messages, + return [ + Document( + text=msg.content, + metadata={ + "message_id": msg.id, + "username": msg.author.name, + "created_at": msg.created_at, + "edited_at": msg.edited_at, + }, ) - ) + for msg in messages + ] class DiscordReader(BasePydanticReader): diff --git a/llama_index/readers/file/docs_reader.py b/llama_index/readers/file/docs_reader.py index 4817086932ce35abaa45884520425c043bde321b..4203e37c54389b77afc96e318c9ca5d7b0584268 100644 --- a/llama_index/readers/file/docs_reader.py +++ b/llama_index/readers/file/docs_reader.py @@ -71,7 +71,7 @@ class DocxReader(BaseReader): class HWPReader(BaseReader): - """Hwp Parser""" + """Hwp Parser.""" def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) diff --git a/llama_index/readers/file/flat_reader.py b/llama_index/readers/file/flat_reader.py index 4ba16679cf052078663701d86acb53a3daa705ba..05130cfb3873cd56ccebebc77901c9846a5d89e3 100644 --- a/llama_index/readers/file/flat_reader.py +++ b/llama_index/readers/file/flat_reader.py @@ -7,7 +7,7 @@ from llama_index.schema import Document class FlatReader(BaseReader): - """Flat reader + """Flat reader. Extract raw text from a file and save the file type in the metadata """ diff --git a/llama_index/readers/github_readers/github_repository_reader.py b/llama_index/readers/github_readers/github_repository_reader.py index ef3265e7b0a1819752a01ccaa7f6eb67112b204f..bceee82511f6f3c5d9384a487f151f5752e01f53 100644 --- a/llama_index/readers/github_readers/github_repository_reader.py +++ b/llama_index/readers/github_readers/github_repository_reader.py @@ -348,44 +348,42 @@ class GithubRepositoryReader(BaseReader): + f"as {file_extension} with " + f"{reader.__class__.__name__}", ) - with tempfile.TemporaryDirectory() as tmpdirname: - with tempfile.NamedTemporaryFile( - dir=tmpdirname, - suffix=f".{file_extension}", - mode="w+b", - delete=False, - ) as tmpfile: - print_if_verbose( - self._verbose, - "created a temporary file" - + f"{tmpfile.name} for parsing {file_path}", - ) - tmpfile.write(file_content) - tmpfile.flush() - tmpfile.close() - try: - docs = reader.load_data(pathlib.Path(tmpfile.name)) - parsed_file = "\n\n".join([doc.get_content() for doc in docs]) - except Exception as e: - print_if_verbose(self._verbose, f"error while parsing {file_path}") - logger.error( - "Error while parsing " - + f"{file_path} with " - + f"{reader.__class__.__name__}:\n{e}" - ) - parsed_file = None - finally: - os.remove(tmpfile.name) - if parsed_file is None: - return None - return Document( - text=parsed_file, - id_=tree_sha, - metadata={ - "file_path": file_path, - "file_name": tree_path, - }, + with tempfile.TemporaryDirectory() as tmpdirname, tempfile.NamedTemporaryFile( + dir=tmpdirname, + suffix=f".{file_extension}", + mode="w+b", + delete=False, + ) as tmpfile: + print_if_verbose( + self._verbose, + "created a temporary file" + f"{tmpfile.name} for parsing {file_path}", + ) + tmpfile.write(file_content) + tmpfile.flush() + tmpfile.close() + try: + docs = reader.load_data(pathlib.Path(tmpfile.name)) + parsed_file = "\n\n".join([doc.get_content() for doc in docs]) + except Exception as e: + print_if_verbose(self._verbose, f"error while parsing {file_path}") + logger.error( + "Error while parsing " + + f"{file_path} with " + + f"{reader.__class__.__name__}:\n{e}" ) + parsed_file = None + finally: + os.remove(tmpfile.name) + if parsed_file is None: + return None + return Document( + text=parsed_file, + id_=tree_sha, + metadata={ + "file_path": file_path, + "file_name": tree_path, + }, + ) if __name__ == "__main__": diff --git a/llama_index/readers/google_readers/gsheets.py b/llama_index/readers/google_readers/gsheets.py index 03a2a642a5a1f32e2c843bbe08611be0ccaf2e56..74a3f0d4cd17030a621b51a31779801f79807312 100644 --- a/llama_index/readers/google_readers/gsheets.py +++ b/llama_index/readers/google_readers/gsheets.py @@ -104,8 +104,7 @@ class GoogleSheetsReader(BasePydanticReader): .execute() ) sheet_text += ( - "\n".join(map(lambda row: "\t".join(row), response.get("values", []))) - + "\n" + "\n".join("\t".join(row) for row in response.get("values", [])) + "\n" ) return sheet_text diff --git a/llama_index/readers/myscale.py b/llama_index/readers/myscale.py index aa166307d95f160100dcc5867d09de167ecb3658..fbb31db01e3568c07e51f5e19849e631002bdfce 100644 --- a/llama_index/readers/myscale.py +++ b/llama_index/readers/myscale.py @@ -21,7 +21,7 @@ def format_list_to_string(lst: List) -> str: class MyScaleSettings: - """MyScale Client Configuration + """MyScale Client Configuration. Attribute: table (str) : Table name to operate on. diff --git a/llama_index/readers/psychic.py b/llama_index/readers/psychic.py index 8c0edf44af86fd98187938493049d6a72807df6f..28485c75c8ab02aa97d7af5c5436a547cbb22911 100644 --- a/llama_index/readers/psychic.py +++ b/llama_index/readers/psychic.py @@ -47,7 +47,7 @@ class PsychicReader(BaseReader): def load_data( self, connector_id: Optional[str] = None, account_id: Optional[str] = None ) -> List[Document]: - """Load data from a Psychic connection + """Load data from a Psychic connection. Args: connector_id (str): The connector ID to connect to diff --git a/llama_index/readers/redis/utils.py b/llama_index/readers/redis/utils.py index 007aa6ee90482e18464e23c81463bd45627a689d..da7e1e3e126705c6ca75023b015300c341fe0d5b 100644 --- a/llama_index/readers/redis/utils.py +++ b/llama_index/readers/redis/utils.py @@ -70,7 +70,7 @@ def get_redis_query( sort: bool = True, filters: str = "*", ) -> "Query": - """Create a vector query for use with a SearchIndex + """Create a vector query for use with a SearchIndex. Args: return_fields (t.List[str]): A list of fields to return in the query results diff --git a/llama_index/response_synthesizers/accumulate.py b/llama_index/response_synthesizers/accumulate.py index b13b5485d183eefbd4271c925c1f2505e19d45cb..a95a63b2284631fa2ac6c407770a6362c9883370 100644 --- a/llama_index/response_synthesizers/accumulate.py +++ b/llama_index/response_synthesizers/accumulate.py @@ -47,7 +47,7 @@ class Accumulate(BaseSynthesizer): separator: str = "\n---------------------\n", **response_kwargs: Any, ) -> RESPONSE_TEXT_TYPE: - """Apply the same prompt to text chunks and return async responses""" + """Apply the same prompt to text chunks and return async responses.""" if self._streaming: raise ValueError("Unable to stream in Accumulate response mode") @@ -68,7 +68,7 @@ class Accumulate(BaseSynthesizer): separator: str = "\n---------------------\n", **response_kwargs: Any, ) -> RESPONSE_TEXT_TYPE: - """Apply the same prompt to text chunks and return responses""" + """Apply the same prompt to text chunks and return responses.""" if self._streaming: raise ValueError("Unable to stream in Accumulate response mode") diff --git a/llama_index/selectors/llm_selectors.py b/llama_index/selectors/llm_selectors.py index f596cda90874896b7c9966e5c005060604c361d6..1ed035b198d7212101c8e1034cd9ebcb70f32158 100644 --- a/llama_index/selectors/llm_selectors.py +++ b/llama_index/selectors/llm_selectors.py @@ -41,7 +41,7 @@ def _structured_output_to_selector_result(output: Any) -> SelectorResult: class LLMSingleSelector(BaseSelector): - """LLM single selector + """LLM single selector. LLM-based selector that chooses one out of many options. @@ -124,7 +124,7 @@ class LLMSingleSelector(BaseSelector): class LLMMultiSelector(BaseSelector): - """LLM multi selector + """LLM multi selector. LLM-based selector that chooses multiple out of many options. diff --git a/llama_index/storage/docstore/firestore_docstore.py b/llama_index/storage/docstore/firestore_docstore.py index 8f335700f9bc9f7ae26d073ea920ce3eb33c16f4..26f71dac4b0f5dfa514522d25c4c05e5c79cb4fe 100644 --- a/llama_index/storage/docstore/firestore_docstore.py +++ b/llama_index/storage/docstore/firestore_docstore.py @@ -34,7 +34,7 @@ class FirestoreDocumentStore(KVDocumentStore): Args: project (str): The project which the client acts on behalf of. database (str): The database name that the client targets. - namespace (str): namespace for the docstore + namespace (str): namespace for the docstore. """ firestore_kvstore = FirestoreKVStore(project=project, database=database) return cls(firestore_kvstore, namespace) diff --git a/llama_index/storage/index_store/firestore_indexstore.py b/llama_index/storage/index_store/firestore_indexstore.py index 1cea883bf25971c662f134c8d2291d2cda8d813e..8f777f849a4a65780458dc97d72e9c6111e142f5 100644 --- a/llama_index/storage/index_store/firestore_indexstore.py +++ b/llama_index/storage/index_store/firestore_indexstore.py @@ -32,7 +32,7 @@ class FirestoreIndexStore(KVIndexStore): Args: project (str): The project which the client acts on behalf of. database (str): The database name that the client targets. - namespace (str): namespace for the docstore + namespace (str): namespace for the docstore. """ firestore_kvstore = FirestoreKVStore(project=project, database=database) return cls(firestore_kvstore, namespace) diff --git a/llama_index/tools/tool_spec/load_and_search/base.py b/llama_index/tools/tool_spec/load_and_search/base.py index 0ee04b533d3887d0464c0729d30cbb8d81d3e298..bde2bc6a8b1ddc809cf6006055d51328ac01b636 100644 --- a/llama_index/tools/tool_spec/load_and_search/base.py +++ b/llama_index/tools/tool_spec/load_and_search/base.py @@ -17,7 +17,7 @@ from llama_index.tools.utils import create_schema_from_function class LoadAndSearchToolSpec(BaseToolSpec): - """Load and Search Tool + """Load and Search Tool. This tool can be used with other tools that load large amounts of information. Compared to OndemandLoaderTool this returns two tools, diff --git a/llama_index/tts/elevenlabs.py b/llama_index/tts/elevenlabs.py index 5b8f2309a10de6d7208af4e3294f89570b115c4a..64aa65868bc7e02badc92ed51566c0688ee44f29 100644 --- a/llama_index/tts/elevenlabs.py +++ b/llama_index/tts/elevenlabs.py @@ -14,8 +14,6 @@ class ElevenLabsTTS(BaseTTS): """ def __init__(self, api_key: Optional[str] = None) -> None: - """ """ - super().__init__() self.api_key = api_key diff --git a/llama_index/utils.py b/llama_index/utils.py index 59241bb6c4e2ff38621add993a926dc38f92d5d0..c2be210b13a6bf0341cf8237fc56799130fade7c 100644 --- a/llama_index/utils.py +++ b/llama_index/utils.py @@ -241,7 +241,7 @@ def get_transformer_tokenizer_fn(model_name: str) -> Callable[[str], List[str]]: """ Args: model_name(str): the model name of the tokenizer. - For instance, fxmarty/tiny-llama-fast-tokenizer + For instance, fxmarty/tiny-llama-fast-tokenizer. """ try: from transformers import AutoTokenizer @@ -255,7 +255,7 @@ def get_transformer_tokenizer_fn(model_name: str) -> Callable[[str], List[str]]: def get_cache_dir() -> str: """Locate a platform-appropriate cache directory for llama_index, - and create it if it doesn't yet exist + and create it if it doesn't yet exist. """ # User override if "LLAMA_INDEX_CACHE_DIR" in os.environ: diff --git a/llama_index/vector_stores/cassandra.py b/llama_index/vector_stores/cassandra.py index 2722259289816483c345e2b32d52011cb8a9a294..6afdc7aab52b0b515069a9f6fdeb2392bd0a3964 100644 --- a/llama_index/vector_stores/cassandra.py +++ b/llama_index/vector_stores/cassandra.py @@ -171,7 +171,7 @@ class CassandraVectorStore(VectorStore): @property def client(self) -> Any: - """Return the underlying cassIO vector table object""" + """Return the underlying cassIO vector table object.""" return self.vector_table @staticmethod diff --git a/llama_index/vector_stores/cogsearch.py b/llama_index/vector_stores/cogsearch.py index ac194b76c9dbb25f57dcde76a0bbb074a48e7979..51e99fb327354bb6aff92883677f2d3aed1e5a46 100644 --- a/llama_index/vector_stores/cogsearch.py +++ b/llama_index/vector_stores/cogsearch.py @@ -27,7 +27,7 @@ class MetadataIndexFieldType(int, enum.Enum): """ Enumeration representing the supported types for metadata fields in an Azure Cognitive Search Index, corresponds with types supported in a flat - metadata dictionary + metadata dictionary. """ STRING = auto() # "Edm.String" @@ -38,7 +38,7 @@ class MetadataIndexFieldType(int, enum.Enum): class IndexManagement(int, enum.Enum): - """Enumeration representing the supported index management operations""" + """Enumeration representing the supported index management operations.""" NO_VALIDATION = auto() VALIDATE_INDEX = auto() @@ -85,7 +85,7 @@ class CognitiveSearchVectorStore(VectorStore): self._create_index(index_name) def _create_metadata_index_fields(self) -> List[Any]: - """Create a list of index fields for storing metadata values""" + """Create a list of index fields for storing metadata values.""" from azure.search.documents.indexes.models import SimpleField index_fields = [] @@ -113,7 +113,7 @@ class CognitiveSearchVectorStore(VectorStore): def _create_index(self, index_name: Optional[str]) -> None: """ Creates a default index based on the supplied index name, key field names and - metadata filtering keys + metadata filtering keys. """ from azure.search.documents.indexes.models import ( HnswParameters, @@ -438,7 +438,7 @@ class CognitiveSearchVectorStore(VectorStore): return ids def _create_index_document(self, node: BaseNode) -> Dict[str, Any]: - """Create Cognitive Search index document from embedding result""" + """Create Cognitive Search index document from embedding result.""" doc: Dict[str, Any] = {} doc["id"] = node.node_id doc["chunk"] = node.get_content(metadata_mode=MetadataMode.NONE) or "" @@ -478,7 +478,7 @@ class CognitiveSearchVectorStore(VectorStore): self._search_client.delete_documents(docs_to_delete) def _create_odata_filter(self, metadata_filters: MetadataFilters) -> str: - """Generate an OData filter string using supplied metadata filters""" + """Generate an OData filter string using supplied metadata filters.""" odata_filter: List[str] = [] for f in metadata_filters.filters: if not isinstance(f, ExactMatchFilter): diff --git a/llama_index/vector_stores/dynamodb.py b/llama_index/vector_stores/dynamodb.py index 8e4c8e6dfb520caf0cd1d8494ff692efc91af3bb..bd89f72447f4bc70f1ab3d0773600618ca2aa6ff 100644 --- a/llama_index/vector_stores/dynamodb.py +++ b/llama_index/vector_stores/dynamodb.py @@ -109,7 +109,7 @@ class DynamoDBVectorStore(VectorStore): ) def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: - """Get nodes for response""" + """Get nodes for response.""" if query.filters is not None: raise ValueError( "Metadata filters not implemented for SimpleVectorStore yet." diff --git a/llama_index/vector_stores/elasticsearch.py b/llama_index/vector_stores/elasticsearch.py index dab60f7eb9bf48921d6904364242a190feda9d08..cba936152be64f52852cd784037ecf28100bd22d 100644 --- a/llama_index/vector_stores/elasticsearch.py +++ b/llama_index/vector_stores/elasticsearch.py @@ -199,12 +199,12 @@ class ElasticsearchStore(VectorStore): @property def client(self) -> Any: - """Get async elasticsearch client""" + """Get async elasticsearch client.""" return self._client @staticmethod def get_user_agent() -> str: - """Get user agent for elasticsearch client""" + """Get user agent for elasticsearch client.""" import llama_index return f"llama_index-py-vs/{llama_index.__version__}" diff --git a/llama_index/vector_stores/myscale.py b/llama_index/vector_stores/myscale.py index 05e4f25c314d2c9eead7043dbe4b28eb620acc16..94022cd956c4e4af54fc68f68d70cc0b1321bd43 100644 --- a/llama_index/vector_stores/myscale.py +++ b/llama_index/vector_stores/myscale.py @@ -241,7 +241,7 @@ class MyScaleVectorStore(VectorStore): raise NotImplementedError("Delete not yet implemented for MyScale index.") def drop(self) -> None: - """Drop MyScale Index and table""" + """Drop MyScale Index and table.""" self._client.command( f"DROP TABLE IF EXISTS {self.config.database}.{self.config.table}" ) diff --git a/llama_index/vector_stores/neo4jvector.py b/llama_index/vector_stores/neo4jvector.py index c25a0c5fa39ad21a9fe83aa317c2984433fcf8c2..be3afe357d863998aca6314fb37f5880b91a04d4 100644 --- a/llama_index/vector_stores/neo4jvector.py +++ b/llama_index/vector_stores/neo4jvector.py @@ -10,7 +10,7 @@ from llama_index.vector_stores.utils import metadata_dict_to_node, node_to_metad def check_if_not_null(props: List[str], values: List[Any]) -> None: - """Check if variable is not null and raise error accordingly""" + """Check if variable is not null and raise error accordingly.""" for prop, value in zip(props, values): if not value: raise ValueError(f"Parameter `{prop}` must not be None or empty string") @@ -19,12 +19,12 @@ def check_if_not_null(props: List[str], values: List[Any]) -> None: def sort_by_index_name( lst: List[Dict[str, Any]], index_name: str ) -> List[Dict[str, Any]]: - """Sort first element to match the index_name if exists""" + """Sort first element to match the index_name if exists.""" return sorted(lst, key=lambda x: x.get("index_name") != index_name) def clean_params(params: List[BaseNode]) -> List[Dict[str, Any]]: - """Convert BaseNode object to a dictionary to be imported into Neo4j""" + """Convert BaseNode object to a dictionary to be imported into Neo4j.""" clean_params = [] for record in params: text = record.get_content(metadata_mode=MetadataMode.NONE) diff --git a/llama_index/vector_stores/postgres.py b/llama_index/vector_stores/postgres.py index faa32c99d970b24e748f1581137a6d730d215899..a603af5c132e74ddfddc7c26fd34c4a4cc1ccab1 100644 --- a/llama_index/vector_stores/postgres.py +++ b/llama_index/vector_stores/postgres.py @@ -1,6 +1,5 @@ import logging -from collections import namedtuple -from typing import Any, List, Optional, Type +from typing import Any, List, NamedTuple, Optional, Type from llama_index.bridge.pydantic import PrivateAttr from llama_index.schema import BaseNode, MetadataMode, TextNode @@ -13,9 +12,12 @@ from llama_index.vector_stores.types import ( ) from llama_index.vector_stores.utils import metadata_dict_to_node, node_to_metadata_dict -DBEmbeddingRow = namedtuple( - "DBEmbeddingRow", ["node_id", "text", "metadata", "similarity"] -) + +class DBEmbeddingRow(NamedTuple): + node_id: str # FIXME: verify this type hint + text: str + metadata: dict + similarity: float _logger = logging.getLogger(__name__) @@ -30,7 +32,7 @@ def get_data_model( embed_dim: int = 1536, ) -> Any: """ - This part create a dynamic sqlalchemy model with a new table + This part create a dynamic sqlalchemy model with a new table. """ from pgvector.sqlalchemy import Vector from sqlalchemy import Column, Computed @@ -229,18 +231,16 @@ class PGVectorStore(BasePydanticVectorStore): self._async_session = async_sessionmaker(self._async_engine) def _create_tables_if_not_exists(self) -> None: - with self._session() as session: - with session.begin(): - self._base.metadata.create_all(session.connection()) + with self._session() as session, session.begin(): + self._base.metadata.create_all(session.connection()) def _create_extension(self) -> None: import sqlalchemy - with self._session() as session: - with session.begin(): - statement = sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector") - session.execute(statement) - session.commit() + with self._session() as session, session.begin(): + statement = sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector") + session.execute(statement) + session.commit() def _initialize(self) -> None: if not self._is_initialized: @@ -264,25 +264,23 @@ class PGVectorStore(BasePydanticVectorStore): def add(self, nodes: List[BaseNode]) -> List[str]: self._initialize() ids = [] - with self._session() as session: - with session.begin(): - for node in nodes: - ids.append(node.node_id) - item = self._node_to_table_row(node) - session.add(item) - session.commit() + with self._session() as session, session.begin(): + for node in nodes: + ids.append(node.node_id) + item = self._node_to_table_row(node) + session.add(item) + session.commit() return ids async def async_add(self, nodes: List[BaseNode]) -> List[str]: self._initialize() ids = [] - async with self._async_session() as session: - async with session.begin(): - for node in nodes: - ids.append(node.node_id) - item = self._node_to_table_row(node) - session.add(item) - await session.commit() + async with self._async_session() as session, session.begin(): + for node in nodes: + ids.append(node.node_id) + item = self._node_to_table_row(node) + session.add(item) + await session.commit() return ids def _apply_filters_and_limit( @@ -325,20 +323,19 @@ class PGVectorStore(BasePydanticVectorStore): metadata_filters: Optional[MetadataFilters] = None, ) -> List[DBEmbeddingRow]: stmt = self._build_query(embedding, limit, metadata_filters) - with self._session() as session: - with session.begin(): - res = session.execute( - stmt, + with self._session() as session, session.begin(): + res = session.execute( + stmt, + ) + return [ + DBEmbeddingRow( + node_id=item.node_id, + text=item.text, + metadata=item.metadata_, + similarity=(1 - distance) if distance is not None else 0, ) - return [ - DBEmbeddingRow( - node_id=item.node_id, - text=item.text, - metadata=item.metadata_, - similarity=(1 - distance) if distance is not None else 0, - ) - for item, distance in res.all() - ] + for item, distance in res.all() + ] async def _aquery_with_score( self, @@ -347,18 +344,17 @@ class PGVectorStore(BasePydanticVectorStore): metadata_filters: Optional[MetadataFilters] = None, ) -> List[DBEmbeddingRow]: stmt = self._build_query(embedding, limit, metadata_filters) - async with self._async_session() as async_session: - async with async_session.begin(): - res = await async_session.execute(stmt) - return [ - DBEmbeddingRow( - node_id=item.node_id, - text=item.text, - metadata=item.metadata_, - similarity=(1 - distance) if distance is not None else 0, - ) - for item, distance in res.all() - ] + async with self._async_session() as async_session, async_session.begin(): + res = await async_session.execute(stmt) + return [ + DBEmbeddingRow( + node_id=item.node_id, + text=item.text, + metadata=item.metadata_, + similarity=(1 - distance) if distance is not None else 0, + ) + for item, distance in res.all() + ] def _build_sparse_query( self, @@ -392,18 +388,17 @@ class PGVectorStore(BasePydanticVectorStore): metadata_filters: Optional[MetadataFilters] = None, ) -> List[DBEmbeddingRow]: stmt = self._build_sparse_query(query_str, limit, metadata_filters) - async with self._async_session() as async_session: - async with async_session.begin(): - res = await async_session.execute(stmt) - return [ - DBEmbeddingRow( - node_id=item.node_id, - text=item.text, - metadata=item.metadata_, - similarity=rank, - ) - for item, rank in res.all() - ] + async with self._async_session() as async_session, async_session.begin(): + res = await async_session.execute(stmt) + return [ + DBEmbeddingRow( + node_id=item.node_id, + text=item.text, + metadata=item.metadata_, + similarity=rank, + ) + for item, rank in res.all() + ] def _sparse_query_with_rank( self, @@ -412,18 +407,17 @@ class PGVectorStore(BasePydanticVectorStore): metadata_filters: Optional[MetadataFilters] = None, ) -> List[DBEmbeddingRow]: stmt = self._build_sparse_query(query_str, limit, metadata_filters) - with self._session() as session: - with session.begin(): - res = session.execute(stmt) - return [ - DBEmbeddingRow( - node_id=item.node_id, - text=item.text, - metadata=item.metadata_, - similarity=rank, - ) - for item, rank in res.all() - ] + with self._session() as session, session.begin(): + res = session.execute(stmt) + return [ + DBEmbeddingRow( + node_id=item.node_id, + text=item.text, + metadata=item.metadata_, + similarity=rank, + ) + for item, rank in res.all() + ] async def _async_hybrid_query( self, query: VectorStoreQuery @@ -540,15 +534,14 @@ class PGVectorStore(BasePydanticVectorStore): import sqlalchemy self._initialize() - with self._session() as session: - with session.begin(): - stmt = sqlalchemy.text( - f"DELETE FROM public.data_{self.table_name} where " - f"(metadata_->>'doc_id')::text = '{ref_doc_id}' " - ) + with self._session() as session, session.begin(): + stmt = sqlalchemy.text( + f"DELETE FROM public.data_{self.table_name} where " + f"(metadata_->>'doc_id')::text = '{ref_doc_id}' " + ) - session.execute(stmt) - session.commit() + session.execute(stmt) + session.commit() def _dedup_results(results: List[DBEmbeddingRow]) -> List[DBEmbeddingRow]: diff --git a/llama_index/vector_stores/redis.py b/llama_index/vector_stores/redis.py index 08335461407d5c7b92001b08e288a6acdc48c326..ff1c9774deb390b789ce88834aa3faada65bc290 100644 --- a/llama_index/vector_stores/redis.py +++ b/llama_index/vector_stores/redis.py @@ -124,7 +124,7 @@ class RedisVectorStore(VectorStore): @property def client(self) -> "RedisType": - """Return the redis client instance""" + """Return the redis client instance.""" return self._redis_client def add(self, nodes: List[BaseNode]) -> List[str]: diff --git a/llama_index/vector_stores/rocksetdb.py b/llama_index/vector_stores/rocksetdb.py index 5b715ba773e99411a25f544b02e192e28c2ba1f6..0c3c1e596e634ac5cd5f69962dcebb8c72f249fe 100644 --- a/llama_index/vector_stores/rocksetdb.py +++ b/llama_index/vector_stores/rocksetdb.py @@ -24,7 +24,7 @@ T = TypeVar("T", bound="RocksetVectorStore") def _get_rockset() -> ModuleType: """Gets the rockset module and raises an ImportError if - the rockset package hasn't been installed + the rockset package hasn't been installed. Returns: rockset module (ModuleType) @@ -124,7 +124,7 @@ class RocksetVectorStore(VectorStore): return self.rs def add(self, nodes: List[BaseNode]) -> List[str]: - """Stores vectors in the collection + """Stores vectors in the collection. Args: nodes (List[BaseNode]): List of nodes with embeddings @@ -151,7 +151,7 @@ class RocksetVectorStore(VectorStore): ] def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: - """Deletes nodes stored in the collection by their ref_doc_id + """Deletes nodes stored in the collection by their ref_doc_id. Args: ref_doc_id (str): The ref_doc_id of the document @@ -177,7 +177,7 @@ class RocksetVectorStore(VectorStore): ) def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: - """Gets nodes relevant to a query + """Gets nodes relevant to a query. Args: query (llama_index.vector_stores.types.VectorStoreQuery): The query diff --git a/llama_index/vector_stores/tair.py b/llama_index/vector_stores/tair.py index e611be1cac3ba2340c337cf21ec2970b66a1a70e..7eb1501253361f0a37db1794ac556717c1dd7479 100644 --- a/llama_index/vector_stores/tair.py +++ b/llama_index/vector_stores/tair.py @@ -122,7 +122,7 @@ class TairVectorStore(VectorStore): @property def client(self) -> "Tair": - """Return the Tair client instance""" + """Return the Tair client instance.""" return self._tair_client def add(self, nodes: List[BaseNode]) -> List[str]: diff --git a/llama_index/vector_stores/timescalevector.py b/llama_index/vector_stores/timescalevector.py index bb67bda14148ea6fb8cd800f8e5b9024248a2c07..ec90fdfc5fc05e1b137b1d2e10347e1bf894ee40 100644 --- a/llama_index/vector_stores/timescalevector.py +++ b/llama_index/vector_stores/timescalevector.py @@ -15,7 +15,7 @@ from llama_index.vector_stores.utils import metadata_dict_to_node, node_to_metad class IndexType(enum.Enum): - """Enumerator for the supported Index types""" + """Enumerator for the supported Index types.""" TIMESCALE_VECTOR = 1 PGVECTOR_IVFFLAT = 2 diff --git a/pyproject.toml b/pyproject.toml index a02d1e4f79c8c0c906a4ee83dd980409f7b7f269..95eb022344dac8a1b7bff12edaa4548568bb5c38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,9 @@ exclude = [ "notebooks", ] ignore = [ - "COM812", + "COM812", # Too aggressive + "D212", # Using D213 + "D417", # Too aggressive "F541", # Messes with prompts.py "TCH002", "UP006", # Messes with pydantic @@ -31,22 +33,7 @@ select = [ "B011", "B013", "B014", - "C400", - "C401", - "C402", - "C403", - "C404", - "C405", - "C406", - "C408", - "C409", - "C410", - "C411", - "C413", - "C414", - "C416", - "C418", - "C419", + "C4", "COM812", "COM819", "D201", @@ -60,23 +47,9 @@ select = [ "D213", "D214", "D215", - "D400", - "D403", - "D405", - "D406", - "D407", - "D408", - "D409", - "D410", - "D411", - "D412", - "D413", - "D416", - "E703", - "E711", - "E712", - "E713", - "E714", + "D3", + "D4", + "E7", "EXE004", "F504", "F541", @@ -102,18 +75,7 @@ select = [ "PT006", "PT02", "PTH201", - "PYI009", - "PYI010", - "PYI011", - "PYI012", - "PYI014", - "PYI015", - "PYI020", - "PYI026", - "PYI029", - "PYI032", - "PYI053", - "PYI054", + "PYI", "Q", "RET501", "RET502", @@ -128,15 +90,7 @@ select = [ "SIM103", "SIM109", "SIM118", - "SIM201", - "SIM202", - "SIM208", - "SIM211", - "SIM212", - "SIM220", - "SIM221", - "SIM222", - "SIM223", + "SIM2", "SIM300", "SIM9", "TCH005", diff --git a/tests/conftest.py b/tests/conftest.py index 54f5d47b95da331e2ea0e49e1cdfbc62fb53c57b..cc6dac336c5b9cc265550570762a851cce00e1e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -122,7 +122,7 @@ class CachedOpenAIApiKeys: openai.api_key = "sk-" + "a" * 48 # No matter what, set the environment variable back to what it was - def __exit__(self, *exc: Any) -> None: + def __exit__(self, *exc: object) -> None: os.environ["OPENAI_API_KEY"] = str(self.api_env_variable_was) os.environ["OPENAI_API_TYPE"] = str(self.api_env_type_was) openai.api_key = self.openai_api_key_was diff --git a/tests/indices/postprocessor/test_base.py b/tests/indices/postprocessor/test_base.py index 2d64821c0017344c90ea7d26ad6d95ced3987197..955002fb51274937efad6ca7a8394a891b29ec01 100644 --- a/tests/indices/postprocessor/test_base.py +++ b/tests/indices/postprocessor/test_base.py @@ -25,7 +25,7 @@ from llama_index.schema import ( ) from llama_index.storage.docstore.simple_docstore import SimpleDocumentStore -spacy_installed = True if find_spec("spacy") else False +spacy_installed = bool(find_spec("spacy")) def test_forward_back_processor(tmp_path: Path) -> None: diff --git a/tests/indices/query/test_embedding_utils.py b/tests/indices/query/test_embedding_utils.py index 2dbb83e8ded85d166e6ea54030e918837aca4d55..646650d6f6ea68df76fe37a70b4cff0b2575d6a0 100644 --- a/tests/indices/query/test_embedding_utils.py +++ b/tests/indices/query/test_embedding_utils.py @@ -1,4 +1,4 @@ -""" Test embedding utility functions""" +""" Test embedding utility functions.""" import numpy as np from llama_index.indices.query.embedding_utils import ( @@ -8,7 +8,7 @@ from llama_index.indices.query.embedding_utils import ( def test_get_top_k_mmr_embeddings() -> None: - """Test Maximum Marginal Relevance""" + """Test Maximum Marginal Relevance.""" # Results score should follow from the mmr algorithm query_embedding = [5.0, 0.0, 0.0] embeddings = [[4.0, 3.0, 0.0], [3.0, 4.0, 0.0], [-4.0, 3.0, 0.0]] diff --git a/tests/indices/tree/test_index.py b/tests/indices/tree/test_index.py index 9cb651ba2dbdc1c50fc094075c79fa2ebbb31b1e..629ca8d8d159e52473f2e1029990cbc2c547dc73 100644 --- a/tests/indices/tree/test_index.py +++ b/tests/indices/tree/test_index.py @@ -197,7 +197,7 @@ def test_insert( def test_twice_insert_empty( mock_service_context: ServiceContext, ) -> None: - """# test twice insert from empty (with_id)""" + """# test twice insert from empty (with_id).""" tree = TreeIndex.from_documents([], service_context=mock_service_context) # test first insert diff --git a/tests/llm_predictor/vellum/test_predictor.py b/tests/llm_predictor/vellum/test_predictor.py index cdf5f16ecd05d9ab21f00a112da04c50342ba35f..938111fa1b3f2ae6e072965efb8e11b37a645831 100644 --- a/tests/llm_predictor/vellum/test_predictor.py +++ b/tests/llm_predictor/vellum/test_predictor.py @@ -11,7 +11,7 @@ def test_predict__basic( vellum_predictor_factory: Callable[..., VellumPredictor], dummy_prompt: BasePromptTemplate, ) -> None: - """When the Vellum API returns expected values, so should our predictor""" + """When the Vellum API returns expected values, so should our predictor.""" vellum_client = mock_vellum_client_factory( compiled_prompt_text="What's you're favorite greeting?", completion_text="Hello, world!", @@ -29,7 +29,7 @@ def test_stream__basic( vellum_predictor_factory: Callable[..., VellumPredictor], dummy_prompt: BasePromptTemplate, ) -> None: - """When the Vellum API streams expected values, so should our predictor""" + """When the Vellum API streams expected values, so should our predictor.""" import vellum vellum_client = mock_vellum_client_factory( diff --git a/tests/llm_predictor/vellum/test_prompt_registry.py b/tests/llm_predictor/vellum/test_prompt_registry.py index 0e845bb4c2ade8c723817b3102405606cd5d5c57..0e3146425f126f022a5f84a9f11bef5477cf330d 100644 --- a/tests/llm_predictor/vellum/test_prompt_registry.py +++ b/tests/llm_predictor/vellum/test_prompt_registry.py @@ -13,7 +13,7 @@ def test_from_prompt__new( mock_vellum_client_factory: Callable[..., mock.MagicMock], vellum_prompt_registry_factory: Callable[..., VellumPromptRegistry], ) -> None: - """We should register a new prompt if no deployment exists""" + """We should register a new prompt if no deployment exists.""" from vellum.core import ApiError dummy_prompt = PromptTemplate(template="What's your favorite {thing}?") @@ -32,7 +32,7 @@ def test_from_prompt__existing( mock_vellum_client_factory: Callable[..., mock.MagicMock], vellum_prompt_registry_factory: Callable[..., VellumPromptRegistry], ) -> None: - """We shouldn't register a new prompt if a deployment id or name is provided""" + """We shouldn't register a new prompt if a deployment id or name is provided.""" dummy_prompt = PromptTemplate( template="What's your favorite {thing}?", metadata={"vellum_deployment_id": "abc"}, @@ -54,7 +54,7 @@ def test_get_compiled_prompt__basic( mock_vellum_client_factory: Callable[..., mock.MagicMock], vellum_prompt_registry_factory: Callable[..., VellumPromptRegistry], ) -> None: - """Verify that we can get a compiled prompt from the registry""" + """Verify that we can get a compiled prompt from the registry.""" registered_prompt = VellumRegisteredPrompt( deployment_id="abc", deployment_name="my-deployment", diff --git a/tests/llms/test_localai.py b/tests/llms/test_localai.py index a468ad65a42981af8c9f8eeccdb0b4f44be22821..5dafe59b4951705803c626326fab78d15bfbb7b9 100644 --- a/tests/llms/test_localai.py +++ b/tests/llms/test_localai.py @@ -77,9 +77,8 @@ def test_forgetting_kwarg() -> None: with patch( "llama_index.llms.openai.completion_with_retry", return_value={} - ) as mock_completion: - with pytest.raises(NotImplementedError, match="/chat/completions"): - llm.complete("A long time ago in a galaxy far, far away") + ) as mock_completion, pytest.raises(NotImplementedError, match="/chat/completions"): + llm.complete("A long time ago in a galaxy far, far away") mock_completion.assert_not_called() diff --git a/tests/llms/test_openai_utils.py b/tests/llms/test_openai_utils.py index 0a9d1f884420b2ce82b100763f8f23646fb44bcd..b61491efb75e4b567c699c29fb667bb66931bb32 100644 --- a/tests/llms/test_openai_utils.py +++ b/tests/llms/test_openai_utils.py @@ -57,7 +57,7 @@ def openi_message_dicts_with_function_calling() -> List[dict]: def azure_openi_message_dicts_with_function_calling() -> List[dict]: """ Taken from: - - https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/function-calling + - https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/function-calling. """ return [ { diff --git a/tests/memory/test_chat_memory_buffer.py b/tests/memory/test_chat_memory_buffer.py index c263984268be9afd51bf56009fe5e9ca835a38b2..7b116880d5d00196af3ce7fbae26dab0ae82567b 100644 --- a/tests/memory/test_chat_memory_buffer.py +++ b/tests/memory/test_chat_memory_buffer.py @@ -69,7 +69,7 @@ def test_dict_save_load() -> None: def test_pickle() -> None: - """Unpickleable tiktoken tokenizer should be circumvented when pickling""" + """Unpickleable tiktoken tokenizer should be circumvented when pickling.""" memory = ChatMemoryBuffer.from_defaults() bytes_ = pickle.dumps(memory) assert isinstance(pickle.loads(bytes_), ChatMemoryBuffer) diff --git a/tests/readers/test_file.py b/tests/readers/test_file.py index 27b12b9afeb1e34d5d27c57d3589fcecc88a044e..4d1bee1d0174e3db6164b479d4bbad71a23eccea 100644 --- a/tests/readers/test_file.py +++ b/tests/readers/test_file.py @@ -343,6 +343,5 @@ def test_error_if_not_dir_or_file() -> None: SimpleDirectoryReader("not_a_dir") with pytest.raises(ValueError, match="File"): SimpleDirectoryReader(input_files=["not_a_file"]) - with TemporaryDirectory() as tmp_dir: - with pytest.raises(ValueError, match="No files"): - SimpleDirectoryReader(tmp_dir) + with TemporaryDirectory() as tmp_dir, pytest.raises(ValueError, match="No files"): + SimpleDirectoryReader(tmp_dir) diff --git a/tests/text_splitter/test_code_splitter.py b/tests/text_splitter/test_code_splitter.py index 075469c56ac4c6ab73031764fd43da5d51fce18b..e0580d9a39e0f6f7dfcaf7199f6a4c1a64d0c538 100644 --- a/tests/text_splitter/test_code_splitter.py +++ b/tests/text_splitter/test_code_splitter.py @@ -5,7 +5,7 @@ from llama_index.text_splitter import CodeSplitter def test_python_code_splitter() -> None: - """Test case for code splitting using python""" + """Test case for code splitting using python.""" if "CI" in os.environ: return @@ -26,7 +26,7 @@ def baz(): def test_typescript_code_splitter() -> None: - """Test case for code splitting using typescript""" + """Test case for code splitting using typescript.""" if "CI" in os.environ: return @@ -49,7 +49,7 @@ function baz() { def test_html_code_splitter() -> None: - """Test case for code splitting using typescript""" + """Test case for code splitting using typescript.""" if "CI" in os.environ: return @@ -82,7 +82,7 @@ def test_html_code_splitter() -> None: def test_tsx_code_splitter() -> None: - """Test case for code splitting using typescript""" + """Test case for code splitting using typescript.""" if "CI" in os.environ: return @@ -120,7 +120,7 @@ export default ExampleComponent;""" def test_cpp_code_splitter() -> None: - """Test case for code splitting using typescript""" + """Test case for code splitting using typescript.""" if "CI" in os.environ: return diff --git a/tests/text_splitter/test_sentence_splitter.py b/tests/text_splitter/test_sentence_splitter.py index 82b1df214db68ed4d8abb46f2ec81fac1d34e230..928dcb30ab89c107bc267d73d58d8344affaa8c6 100644 --- a/tests/text_splitter/test_sentence_splitter.py +++ b/tests/text_splitter/test_sentence_splitter.py @@ -58,7 +58,7 @@ def test_split_with_metadata(english_text: str) -> None: def test_edge_case() -> None: - """Test case from: https://github.com/jerryjliu/llama_index/issues/7287""" + """Test case from: https://github.com/jerryjliu/llama_index/issues/7287.""" text = "\n\nMarch 2020\n\nL&D Metric (Org) - 2.92%\n\n| Training Name | Category | Duration (hrs) | Invitees | Attendance | Target Training Hours | Actual Training Hours | Adoption % |\n| ---------------------------------------------------------------------------------------------------------------------- | --------------- | -------------- | -------- | ---------- | --------------------- | --------------------- | ---------- |\n| Overview of Data Analytics | Technical | 1 | 23 | 10 | 23 | 10 | 43.5 |\n| Sales & Learning Best Practices - Introduction to OTT Platforms | Technical | 0.5 | 16 | 12 | 8 | 6 | 75 |\n| Leading Through OKRs | Lifeskill | 1 | 1 | 1 | 1 | 1 | 100 |\n| COVID: Lockdown Awareness Session | Lifeskill | 2 | 1 | 1 | 2 | 2 | 100 |\n| Navgati Interview | Lifeskill | 2 | 6 | 6 | 12 | 12 | 100 |\n| leadership Summit | Leadership | 18 | 42 | 42 | 756 | 756 | 100 |\n| AWS - AI/ML - Online Conference | Project Related | 15 | 2 | 2 | 30 | 30 | 100 |\n" splitter = SentenceSplitter(tokenizer=tiktoken.get_encoding("gpt2").encode) chunks = splitter.split_text(text) @@ -109,7 +109,7 @@ def test_split_texts_multiple() -> None: def test_split_texts_with_metadata(english_text: str) -> None: - """Test case for a list of texts with metadata""" + """Test case for a list of texts with metadata.""" chunk_size = 100 metadata_str = "word " * 50 tokenizer = tiktoken.get_encoding("cl100k_base") diff --git a/tests/tools/test_utils.py b/tests/tools/test_utils.py index 6f5cc1ae3636c23da92a64ed137fe0770b0de70f..f727d8400b9397c586376c0a7fc738b571d723d1 100644 --- a/tests/tools/test_utils.py +++ b/tests/tools/test_utils.py @@ -23,7 +23,7 @@ def test_create_schema_from_function() -> None: assert schema["properties"]["a"]["type"] == "boolean" def test_fn2(x: int = 1) -> None: - """Optional input""" + """Optional input.""" SchemaCls = create_schema_from_function("test_schema", test_fn2) schema = SchemaCls.schema()