diff --git a/docs/examples/unstructured-element-splitter.ipynb b/docs/examples/unstructured-element-splitter.ipynb index c9561b7538e986fd5ce6811fcf3639d9b919e9c0..c9d67c0da0881314194651f9f768b7cbeea1d10b 100644 --- a/docs/examples/unstructured-element-splitter.ipynb +++ b/docs/examples/unstructured-element-splitter.ipynb @@ -103,9 +103,10 @@ "source": [ "from unstructured.documents.elements import Element\n", "from colorama import Fore, Style\n", + "from typing import List\n", "\n", "\n", - "def group_elements_by_title(elements: list[Element]) -> dict:\n", + "def group_elements_by_title(elements: List[Element]) -> dict:\n", " grouped_elements = {}\n", " current_title = \"Untitled\" # Default title for initial text without a title\n", "\n", diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index 77069da2b89dafbbd05a9a95bc4d10bbee15a449..dbbe56f69110c4c9a162eddc711adbfc6bd8cfd7 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -1,6 +1,6 @@ import string from collections import Counter -from typing import Dict +from typing import Dict, List import numpy as np from numpy import ndarray @@ -20,7 +20,7 @@ class TfidfEncoder(BaseEncoder): self.word_index = {} self.idf = np.array([]) - def __call__(self, docs: list[str]) -> list[list[float]]: + def __call__(self, docs: List[str]) -> List[List[float]]: if len(self.word_index) == 0 or self.idf.size == 0: raise ValueError("Vectorizer is not initialized.") if len(docs) == 0: @@ -31,7 +31,7 @@ class TfidfEncoder(BaseEncoder): tfidf = tf * self.idf return tfidf.tolist() - def fit(self, routes: list[Route]): + def fit(self, routes: List[Route]): docs = [] for route in routes: for doc in route.utterances: @@ -39,7 +39,7 @@ class TfidfEncoder(BaseEncoder): self.word_index = self._build_word_index(docs) self.idf = self._compute_idf(docs) - def _build_word_index(self, docs: list[str]) -> dict: + def _build_word_index(self, docs: List[str]) -> dict: words = set() for doc in docs: for word in doc.split(): @@ -47,7 +47,7 @@ class TfidfEncoder(BaseEncoder): word_index = {word: i for i, word in enumerate(words)} return word_index - def _compute_tf(self, docs: list[str]) -> np.ndarray: + def _compute_tf(self, docs: List[str]) -> np.ndarray: if len(self.word_index) == 0: raise ValueError("Word index is not initialized.") tf = np.zeros((len(docs), len(self.word_index))) @@ -60,7 +60,7 @@ class TfidfEncoder(BaseEncoder): tf = tf / norm(tf, axis=1, keepdims=True) return tf - def _compute_idf(self, docs: list[str]) -> np.ndarray: + def _compute_idf(self, docs: List[str]) -> np.ndarray: if len(self.word_index) == 0: raise ValueError("Word index is not initialized.") idf = np.zeros(len(self.word_index)) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index b7905cca30d69b57638baa9ecb93ec79504a43ca..90cb7bfa6e9c4637bae3b5f485b297922cc85804 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -196,7 +196,7 @@ class RouteLayer: else: self.encoder = encoder self.llm = llm - self.routes: list[Route] = routes if routes is not None else [] + self.routes: List[Route] = routes if routes is not None else [] self.score_threshold = self.encoder.score_threshold self.top_k = top_k if self.top_k < 1: diff --git a/semantic_router/llms/base.py b/semantic_router/llms/base.py index d3f207a187f8d9690c422f19d6b0f1a057b979c0..44af45e51b902670d035783d566ebec10dd2d20d 100644 --- a/semantic_router/llms/base.py +++ b/semantic_router/llms/base.py @@ -20,7 +20,7 @@ class BaseLLM(BaseModel): raise NotImplementedError("Subclasses must implement this method") def _is_valid_inputs( - self, inputs: list[dict[str, Any]], function_schemas: list[dict[str, Any]] + self, inputs: List[dict[str, Any]], function_schemas: List[dict[str, Any]] ) -> bool: """Determine if the functions chosen by the LLM exist within the function_schemas, and if the input arguments are valid for those functions.""" @@ -68,7 +68,7 @@ class BaseLLM(BaseModel): logger.error(f"Single input validation error: {str(e)}") return False - def _extract_parameter_info(self, signature: str) -> tuple[list[str], list[str]]: + def _extract_parameter_info(self, signature: str) -> tuple[List[str], List[str]]: """Extract parameter names and types from the function signature.""" param_info = [param.strip() for param in signature[1:-1].split(",")] param_names = [info.split(":")[0].strip() for info in param_info] @@ -78,7 +78,7 @@ class BaseLLM(BaseModel): return param_names, param_types def extract_function_inputs( - self, query: str, function_schemas: list[dict[str, Any]] + self, query: str, function_schemas: List[dict[str, Any]] ) -> dict: logger.info("Extracting function input...") diff --git a/semantic_router/llms/llamacpp.py b/semantic_router/llms/llamacpp.py index 5a737b869b229fd36b8bf623e0ef6920423dbbd3..112cea9211d9dc70fdc5a08fdcc0da8d2ad56969 100644 --- a/semantic_router/llms/llamacpp.py +++ b/semantic_router/llms/llamacpp.py @@ -1,6 +1,6 @@ from contextlib import contextmanager from pathlib import Path -from typing import Any, Optional +from typing import Any, Optional, List from pydantic.v1 import PrivateAttr @@ -48,7 +48,7 @@ class LlamaCppLLM(BaseLLM): def __call__( self, - messages: list[Message], + messages: List[Message], ) -> str: try: completion = self.llm.create_chat_completion( diff --git a/semantic_router/llms/openai.py b/semantic_router/llms/openai.py index 7a887b8eaf80d345d270506463b32b992c11c886..c316fdc1df914ac13b2db97afe1f26ee0dfbd52c 100644 --- a/semantic_router/llms/openai.py +++ b/semantic_router/llms/openai.py @@ -41,7 +41,7 @@ class OpenAILLM(BaseLLM): self.temperature = temperature self.max_tokens = max_tokens - def _extract_tool_calls_info(self, tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]: + def _extract_tool_calls_info(self, tool_calls: List[dict[str, Any]]) -> List[dict[str, Any]]: tool_calls_info = [] for tool_call in tool_calls: if tool_call.function.arguments is None: @@ -57,7 +57,7 @@ class OpenAILLM(BaseLLM): def __call__( self, messages: List[Message], - function_schemas: Optional[list[dict[str, Any]]] = None, + function_schemas: Optional[List[dict[str, Any]]] = None, ) -> str: if self.client is None: raise ValueError("OpenAI client is not initialized.") @@ -99,7 +99,7 @@ class OpenAILLM(BaseLLM): raise Exception(f"LLM error: {e}") from e def extract_function_inputs( - self, query: str, function_schemas: list[dict[str, Any]] + self, query: str, function_schemas: List[dict[str, Any]] ) -> dict: messages = [] system_prompt = "You are an intelligent AI. Given a command or request from the user, call the function to complete the request." diff --git a/semantic_router/route.py b/semantic_router/route.py index 74209058f9a79e363e963054e4b4e85d0b854708..e9fe47bbf536ff8df3d4148b96cbb76b81c0dafe 100644 --- a/semantic_router/route.py +++ b/semantic_router/route.py @@ -47,7 +47,7 @@ class Route(BaseModel): name: str utterances: Union[List[str], List[Union[Any, "Image"]]] description: Optional[str] = None - function_schemas: Optional[list[Dict[str, Any]]] = None + function_schemas: Optional[List[Dict[str, Any]]] = None llm: Optional[BaseLLM] = None score_threshold: Optional[float] = None diff --git a/semantic_router/schema.py b/semantic_router/schema.py index f2c5baeb8cae606cd7027684c96bf17d9b15c89b..b18d9691d7650811ee73694a9515bf662d18d49b 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -24,7 +24,7 @@ class EncoderInfo(BaseModel): class RouteChoice(BaseModel): name: Optional[str] = None - function_call: Optional[list[dict]] = None + function_call: Optional[List[dict]] = None similarity_score: Optional[float] = None diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py index 89336a2b50f6b5f2db4d5ce514a1175ce9b69660..fb4ce20adae1e0c8dfa58e5a1ca901d5f6d96961 100644 --- a/semantic_router/splitters/rolling_window.py +++ b/semantic_router/splitters/rolling_window.py @@ -340,7 +340,7 @@ class RollingWindowSplitter(BaseSplitter): self, similarities: List[float], split_indices: List[int], - splits: list[DocumentSplit], + splits: List[DocumentSplit], ): try: from matplotlib import pyplot as plt diff --git a/semantic_router/splitters/utils.py b/semantic_router/splitters/utils.py index 349c3eaac76017c53dd5425abfe65e7e2952a679..6f71f979d1e1c668c5bded72722377e8232471ad 100644 --- a/semantic_router/splitters/utils.py +++ b/semantic_router/splitters/utils.py @@ -1,8 +1,8 @@ import regex import tiktoken +from typing import List - -def split_to_sentences(text: str) -> list[str]: +def split_to_sentences(text: str) -> List[str]: """ Enhanced regex pattern to split a given text into sentences more accurately. diff --git a/semantic_router/text.py b/semantic_router/text.py index 6038888d1926c59f256dd8b5df8c15851f84e784..0706556e32cc409a93a43b4d784e040bd77ce276 100644 --- a/semantic_router/text.py +++ b/semantic_router/text.py @@ -157,7 +157,7 @@ class Conversation(BaseModel): :raises ValueError: If the splitter is not configured before calling this method. :return: A tuple containing the updated list of topics and the list of new topics generated in this call. - :rtype: tuple[list[tuple[int, str]], list[DocumentSplit]] + :rtype: tuple[List[tuple[int, str]], List[DocumentSplit]] """ if self.splitter is None: