diff --git a/docs/00-introduction.ipynb b/docs/00-introduction.ipynb index 209aa103a441ac2eb5b16ef4e52e42811428142c..2183c213bb0cc04f8d156ad9346e94f179b98cf7 100644 --- a/docs/00-introduction.ipynb +++ b/docs/00-introduction.ipynb @@ -318,7 +318,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/examples/unstructured-element-splitter.ipynb b/docs/examples/unstructured-element-splitter.ipynb index c9d67c0da0881314194651f9f768b7cbeea1d10b..4eeb6fe93cdb7f2867b9376ae64f519f94c3a309 100644 --- a/docs/examples/unstructured-element-splitter.ipynb +++ b/docs/examples/unstructured-element-splitter.ipynb @@ -103,10 +103,10 @@ "source": [ "from unstructured.documents.elements import Element\n", "from colorama import Fore, Style\n", - "from typing import List\n", + "from typing import List, Dict\n", "\n", "\n", - "def group_elements_by_title(elements: List[Element]) -> dict:\n", + "def group_elements_by_title(elements: List[Element]) -> Dict:\n", " grouped_elements = {}\n", " current_title = \"Untitled\" # Default title for initial text without a title\n", "\n", @@ -143,10 +143,10 @@ "outputs": [], "source": [ "from semantic_router.splitters import RollingWindowSplitter\n", - "\n", + "from typing import Dict\n", "\n", "def create_title_chunks(\n", - " grouped_elements: dict, splitter: RollingWindowSplitter\n", + " grouped_elements: Dict, splitter: RollingWindowSplitter\n", ") -> list:\n", " title_with_chunks = []\n", " for title, elements in grouped_elements.items():\n", diff --git a/replace.py b/replace.py index 2e8c8c5960189d02f0ab35eec90789c6f519aa45..25592ff112b26eb586a3857884b419d10c3fa858 100644 --- a/replace.py +++ b/replace.py @@ -9,9 +9,9 @@ def replace_type_hints(file_path): # Decode the file data with error handling file_data = file_data.decode("utf-8", errors="ignore") - # Regular expression pattern to find 'dict[Type1, Type2] | None' and replace with 'Optional[dict[Type1, Type2]]' + # Regular expression pattern to find 'Dict[Type1, Type2] | None' and replace with 'Optional[Dict[Type1, Type2]]'. file_data = re.sub( - r"dict\[(\w+), (\w+)\]\s*\|\s*None", r"Optional[dict[\1, \2]]", file_data + r"Dict\[(\w+), (\w+)\]\s*\|\s*None", r"Optional[Dict[\1, \2]]", file_data ) with open(file_path, "w") as file: diff --git a/semantic_router/encoders/clip.py b/semantic_router/encoders/clip.py index 89a85cf3ff7cce83c11456ecee5345d532da6c37..6495c870fa25eb5e55f4ddf93081009dc752597c 100644 --- a/semantic_router/encoders/clip.py +++ b/semantic_router/encoders/clip.py @@ -2,7 +2,7 @@ from typing import Any, List, Optional import numpy as np from pydantic.v1 import PrivateAttr - +from typing import Dict from semantic_router.encoders import BaseEncoder @@ -10,9 +10,9 @@ class CLIPEncoder(BaseEncoder): name: str = "openai/clip-vit-base-patch16" type: str = "huggingface" score_threshold: float = 0.2 - tokenizer_kwargs: dict = {} - processor_kwargs: dict = {} - model_kwargs: dict = {} + tokenizer_kwargs: Dict = {} + processor_kwargs: Dict = {} + model_kwargs: Dict = {} device: Optional[str] = None _tokenizer: Any = PrivateAttr() _processor: Any = PrivateAttr() diff --git a/semantic_router/encoders/huggingface.py b/semantic_router/encoders/huggingface.py index 24bd5cd044c09bd8c2de2c5836ad8d79fc6204f8..7ca7580d972238824c5b42c8a2e8e89547cf9522 100644 --- a/semantic_router/encoders/huggingface.py +++ b/semantic_router/encoders/huggingface.py @@ -23,7 +23,7 @@ Classes: import requests import time import os -from typing import Any, List, Optional +from typing import Any, List, Optional, Dict from pydantic.v1 import PrivateAttr @@ -35,8 +35,8 @@ class HuggingFaceEncoder(BaseEncoder): name: str = "sentence-transformers/all-MiniLM-L6-v2" type: str = "huggingface" score_threshold: float = 0.5 - tokenizer_kwargs: dict = {} - model_kwargs: dict = {} + tokenizer_kwargs: Dict = {} + model_kwargs: Dict = {} device: Optional[str] = None _tokenizer: Any = PrivateAttr() _model: Any = PrivateAttr() diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index dbbe56f69110c4c9a162eddc711adbfc6bd8cfd7..17cc569ae8c9045ed40f3d47b0cb3912df46e4cd 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -39,7 +39,7 @@ class TfidfEncoder(BaseEncoder): self.word_index = self._build_word_index(docs) self.idf = self._compute_idf(docs) - def _build_word_index(self, docs: List[str]) -> dict: + def _build_word_index(self, docs: List[str]) -> Dict: words = set() for doc in docs: for word in doc.split(): diff --git a/semantic_router/encoders/vit.py b/semantic_router/encoders/vit.py index 65de365ac6abca6a941e5e51e6e538f1a8449a34..9002ec23dcf6f5c3dac0c5f295b51c8304ba5b32 100644 --- a/semantic_router/encoders/vit.py +++ b/semantic_router/encoders/vit.py @@ -1,4 +1,4 @@ -from typing import Any, List, Optional +from typing import Any, List, Optional, Dict from pydantic.v1 import PrivateAttr @@ -9,8 +9,8 @@ class VitEncoder(BaseEncoder): name: str = "google/vit-base-patch16-224" type: str = "huggingface" score_threshold: float = 0.5 - processor_kwargs: dict = {} - model_kwargs: dict = {} + processor_kwargs: Dict = {} + model_kwargs: Dict = {} device: Optional[str] = None _processor: Any = PrivateAttr() _model: Any = PrivateAttr() diff --git a/semantic_router/index/base.py b/semantic_router/index/base.py index d95a62a8316944a56c80d8b136e8a97d668f3725..bc5adcca26efe543728f48f8a4e61deb9911870b 100644 --- a/semantic_router/index/base.py +++ b/semantic_router/index/base.py @@ -1,4 +1,4 @@ -from typing import Any, List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union, Dict import numpy as np from pydantic.v1 import BaseModel @@ -35,7 +35,7 @@ class BaseIndex(BaseModel): """ raise NotImplementedError("This method should be implemented by subclasses.") - def describe(self) -> dict: + def describe(self) -> Dict: """ Returns a dictionary with index details such as type, dimensions, and total vector count. diff --git a/semantic_router/index/local.py b/semantic_router/index/local.py index 6d322f07c456a41afd54f0fa4e8571d9c30623ec..df9e02c18b4dfe10f8dd13274243a79690444184 100644 --- a/semantic_router/index/local.py +++ b/semantic_router/index/local.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Dict import numpy as np @@ -49,7 +49,7 @@ class LocalIndex(BaseIndex): raise ValueError("No routes have been added to the index.") return list(zip(self.routes, self.utterances)) - def describe(self) -> dict: + def describe(self) -> Dict: return { "type": self.type, "dimensions": self.index.shape[1] if self.index is not None else 0, diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index 31fbfac8476920f4576423c920f548de5df09da5..b70c5d47efa92063b61fa1ae7910314caddd9c9e 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -138,7 +138,7 @@ class PineconeIndex(BaseIndex): self.host = self.client.describe_index(self.index_name)["host"] return index - def _batch_upsert(self, batch: List[dict]): + def _batch_upsert(self, batch: List[Dict]): """Helper method for upserting a single batch of records.""" if self.index is not None: self.index.upsert(vectors=batch, namespace=self.namespace) @@ -241,7 +241,7 @@ class PineconeIndex(BaseIndex): def delete_all(self): self.index.delete(delete_all=True, namespace=self.namespace) - def describe(self) -> dict: + def describe(self) -> Dict: if self.index is not None: stats = self.index.describe_index_stats() return { diff --git a/semantic_router/index/qdrant.py b/semantic_router/index/qdrant.py index a2292da0eae2a9245a0782a4a2b40c34e2696b18..bb49f1fd41b5bc94368efffe6ade5e728a298bef 100644 --- a/semantic_router/index/qdrant.py +++ b/semantic_router/index/qdrant.py @@ -208,7 +208,7 @@ class QdrantIndex(BaseIndex): ), ) - def describe(self) -> dict: + def describe(self) -> Dict: collection_info = self.client.get_collection(self.index_name) return { diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 90cb7bfa6e9c4637bae3b5f485b297922cc85804..3639b7a8984bca9ab7669a3da61efdc5928345b5 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -428,7 +428,7 @@ class RouteLayer: def _retrieve( self, xq: Any, top_k: int = 5, route_filter: Optional[List[str]] = None - ) -> List[dict]: + ) -> List[Dict]: """Given a query vector, retrieve the top_k most similar records.""" # get scores and routes scores, routes = self.index.query( @@ -448,7 +448,7 @@ class RouteLayer: f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." ) - def _semantic_classify(self, query_results: List[dict]) -> Tuple[str, List[float]]: + def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float]]: scores_by_class = self.group_scores_by_class(query_results) # Calculate total score for each class @@ -473,7 +473,7 @@ class RouteLayer: return None def _semantic_classify_multiple_routes( - self, query_results: List[dict] + self, query_results: List[Dict] ) -> List[Tuple[str, float]]: scores_by_class = self.group_scores_by_class(query_results) @@ -496,7 +496,7 @@ class RouteLayer: return classes_above_threshold def group_scores_by_class( - self, query_results: List[dict] + self, query_results: List[Dict] ) -> Dict[str, List[float]]: scores_by_class: Dict[str, List[float]] = {} for result in query_results: diff --git a/semantic_router/llms/base.py b/semantic_router/llms/base.py index 44af45e51b902670d035783d566ebec10dd2d20d..b89cfdc2cdb4ae777905421d2d6106bba30cc691 100644 --- a/semantic_router/llms/base.py +++ b/semantic_router/llms/base.py @@ -1,5 +1,5 @@ import json -from typing import Any, List, Optional +from typing import Any, List, Optional, Dict from pydantic.v1 import BaseModel @@ -20,7 +20,7 @@ class BaseLLM(BaseModel): raise NotImplementedError("Subclasses must implement this method") def _is_valid_inputs( - self, inputs: List[dict[str, Any]], function_schemas: List[dict[str, Any]] + self, inputs: List[Dict[str, Any]], function_schemas: List[Dict[str, Any]] ) -> bool: """Determine if the functions chosen by the LLM exist within the function_schemas, and if the input arguments are valid for those functions.""" @@ -49,7 +49,7 @@ class BaseLLM(BaseModel): logger.error(f"Input validation error: {str(e)}") return False - def _validate_single_function_inputs(self, inputs: dict[str, Any], function_schema: dict[str, Any]) -> bool: + def _validate_single_function_inputs(self, inputs: Dict[str, Any], function_schema: Dict[str, Any]) -> bool: """Validate the extracted inputs against the function schema""" try: # Extract parameter names and types from the signature string @@ -78,8 +78,8 @@ class BaseLLM(BaseModel): return param_names, param_types def extract_function_inputs( - self, query: str, function_schemas: List[dict[str, Any]] - ) -> dict: + self, query: str, function_schemas: List[Dict[str, Any]] + ) -> Dict: logger.info("Extracting function input...") prompt = f""" diff --git a/semantic_router/llms/llamacpp.py b/semantic_router/llms/llamacpp.py index 112cea9211d9dc70fdc5a08fdcc0da8d2ad56969..26d05129952d20675669045e0b9b5561e4bf0d36 100644 --- a/semantic_router/llms/llamacpp.py +++ b/semantic_router/llms/llamacpp.py @@ -1,6 +1,6 @@ from contextlib import contextmanager from pathlib import Path -from typing import Any, Optional, List +from typing import Any, Optional, List, Dict from pydantic.v1 import PrivateAttr @@ -79,8 +79,8 @@ class LlamaCppLLM(BaseLLM): self.grammar = None def extract_function_inputs( - self, query: str, function_schema: dict[str, Any] - ) -> dict: + self, query: str, function_schema: Dict[str, Any] + ) -> Dict: with self._grammar(): return super().extract_function_inputs( query=query, function_schema=function_schema diff --git a/semantic_router/llms/openai.py b/semantic_router/llms/openai.py index c316fdc1df914ac13b2db97afe1f26ee0dfbd52c..1e121a3e9952b71bb1f4ca812266cd9db24d584a 100644 --- a/semantic_router/llms/openai.py +++ b/semantic_router/llms/openai.py @@ -1,5 +1,5 @@ import os -from typing import List, Optional, Any +from typing import List, Optional, Any, Callable, Dict import openai from openai._types import NotGiven @@ -11,7 +11,6 @@ from semantic_router.utils.logger import logger import json from semantic_router.utils.function_call import get_schema, convert_python_type_to_json_type import inspect -from typing import Callable, Dict import re class OpenAILLM(BaseLLM): @@ -41,7 +40,7 @@ class OpenAILLM(BaseLLM): self.temperature = temperature self.max_tokens = max_tokens - def _extract_tool_calls_info(self, tool_calls: List[dict[str, Any]]) -> List[dict[str, Any]]: + def _extract_tool_calls_info(self, tool_calls: List[Dict[str, Any]]) -> List[Dict[str, Any]]: tool_calls_info = [] for tool_call in tool_calls: if tool_call.function.arguments is None: @@ -57,7 +56,7 @@ class OpenAILLM(BaseLLM): def __call__( self, messages: List[Message], - function_schemas: Optional[List[dict[str, Any]]] = None, + function_schemas: Optional[List[Dict[str, Any]]] = None, ) -> str: if self.client is None: raise ValueError("OpenAI client is not initialized.") @@ -99,8 +98,8 @@ class OpenAILLM(BaseLLM): raise Exception(f"LLM error: {e}") from e def extract_function_inputs( - self, query: str, function_schemas: List[dict[str, Any]] - ) -> dict: + self, query: str, function_schemas: List[Dict[str, Any]] + ) -> Dict: messages = [] system_prompt = "You are an intelligent AI. Given a command or request from the user, call the function to complete the request." messages.append(Message(role="system", content=system_prompt)) diff --git a/semantic_router/schema.py b/semantic_router/schema.py index b18d9691d7650811ee73694a9515bf662d18d49b..63f1e440a1a816a79a3b37b974d191eac48a24af 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import List, Optional, Union, Any +from typing import List, Optional, Union, Any, Dict from pydantic.v1 import BaseModel @@ -24,7 +24,7 @@ class EncoderInfo(BaseModel): class RouteChoice(BaseModel): name: Optional[str] = None - function_call: Optional[List[dict]] = None + function_call: Optional[List[Dict]] = None similarity_score: Optional[float] = None @@ -55,7 +55,7 @@ class DocumentSplit(BaseModel): is_triggered: bool = False triggered_score: Optional[float] = None token_count: Optional[int] = None - metadata: Optional[dict] = None + metadata: Optional[Dict] = None @property def content(self) -> str: