diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..990c18de229088f55c6c514fd0f2d49981d1b0e7 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..0896ca890d8bffd60a44fa824f8d57fecd73ee53 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/BUILD @@ -0,0 +1,3 @@ +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b9eab05aa370629a4a3de75df3ff64cd53887b68 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a73f90859f2b27bb96a8fe7ff0849ef7c55bb404 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/README.md @@ -0,0 +1,54 @@ +# LlamaIndex Embeddings Integration: Deepinfra + +With this integration, you can use the Deepinfra embeddings model to get embeddings for your text data. +Here is the link to the [embeddings models](https://deepinfra.com./models/embeddings). + +First, you need to sign up on the [Deepinfra website](https://deepinfra.com/) and get the API token. +You can copy model_ids over the model cards and start using them in your code. + +## Installation + +```bash +pip install llama-index llama-index-embeddings-deepinfra +``` + +## Usage + +```python +from dotenv import load_dotenv, find_dotenv +from llama_index.embeddings.deepinfra import DeepInfraEmbeddingModel + +# Load environment variables +_ = load_dotenv(find_dotenv()) + +# Initialize model with optional configuration +model = DeepInfraEmbeddingModel( + model_id="BAAI/bge-large-en-v1.5", # Use custom model ID + api_token="YOUR_API_TOKEN", # Optionally provide token here + normalize=True, # Optional normalization + text_prefix="text: ", # Optional text prefix + query_prefix="query: ", # Optional query prefix +) + +# Example usage +response = model.get_text_embedding("hello world") + +# Batch requests +texts = ["hello world", "goodbye world"] +response = model.get_text_embedding_batch(texts) + +# Query requests +response = model.get_query_embedding("hello world") + + +# Asynchronous requests +async def main(): + text = "hello world" + response = await model.aget_text_embedding(text) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) +``` diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..db46e8d6c978c67e301dd6c47bee08c1b3fd141c --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..06355de29b46c58c3e179c4fffe73a8cceeed8db --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/__init__.py @@ -0,0 +1,7 @@ +from llama_index.embeddings.deepinfra.base import ( + DeepInfraEmbeddingModel, +) + +__all__ = [ + "DeepInfraEmbeddingModel", +] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/base.py new file mode 100644 index 0000000000000000000000000000000000000000..efbc4e0a13155ebccae16220092f30eba09b1be8 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/llama_index/embeddings/deepinfra/base.py @@ -0,0 +1,214 @@ +import logging +import os + +import aiohttp +import requests +from typing import List, Optional + +from llama_index.core.base.embeddings.base import BaseEmbedding +from llama_index.core.bridge.pydantic import PrivateAttr +from llama_index.core.callbacks.base import CallbackManager + +logger = logging.getLogger(__name__) + +"""DeepInfra Inference API URL.""" +INFERENCE_URL = "https://api.deepinfra.com/v1/inference" +"""Environment variable name of DeepInfra API token.""" +ENV_VARIABLE = "DEEPINFRA_API_TOKEN" +"""Default model ID for DeepInfra embeddings.""" +DEFAULT_MODEL_ID = "sentence-transformers/clip-ViT-B-32" +"""Maximum batch size for embedding requests.""" +MAX_BATCH_SIZE = 1024 + + +class DeepInfraEmbeddingModel(BaseEmbedding): + """ + A wrapper class for accessing embedding models available via the DeepInfra API. This class allows for easy integration + of DeepInfra embeddings into your projects, supporting both synchronous and asynchronous retrieval of text embeddings. + + Args: + model_id (str): Identifier for the model to be used for embeddings. Defaults to 'sentence-transformers/clip-ViT-B-32'. + normalize (bool): Flag to normalize embeddings post retrieval. Defaults to False. + api_token (str): DeepInfra API token. If not provided, + the token is fetched from the environment variable 'DEEPINFRA_API_TOKEN'. + + Examples: + >>> from llama_index.embeddings.deepinfra import DeepInfraEmbeddingModel + >>> model = DeepInfraEmbeddingModel() + >>> print(model.get_text_embedding("Hello, world!")) + [0.1, 0.2, 0.3, ...] + """ + + """model_id can be obtained from the DeepInfra website.""" + _model_id: str = PrivateAttr() + """normalize flag to normalize embeddings post retrieval.""" + _normalize: bool = PrivateAttr() + """api_token should be obtained from the DeepInfra website.""" + _api_token: str = PrivateAttr() + """query_prefix is used to add a prefix to queries.""" + _query_prefix: str = PrivateAttr() + """text_prefix is used to add a prefix to texts.""" + _text_prefix: str = PrivateAttr() + + def __init__( + self, + model_id: str = DEFAULT_MODEL_ID, + normalize: bool = False, + api_token: str = None, + callback_manager: Optional[CallbackManager] = None, + query_prefix: str = "", + text_prefix: str = "", + embed_batch_size: int = MAX_BATCH_SIZE, + ) -> None: + """ + Init params. + """ + super().__init__( + callback_manager=callback_manager, embed_batch_size=embed_batch_size + ) + + self._model_id = model_id + self._normalize = normalize + self._api_token = api_token or os.getenv(ENV_VARIABLE, None) + self._query_prefix = query_prefix + self._text_prefix = text_prefix + + def _post(self, data: List[str]) -> List[List[float]]: + """ + Sends a POST request to the DeepInfra Inference API with the given data and returns the API response. + Input data is chunked into batches to avoid exceeding the maximum batch size (1024). + + Args: + data (List[str]): A list of strings to be embedded. + + Returns: + dict: A dictionary containing embeddings from the API. + """ + url = self.get_url() + chunked_data = _chunk(data, self.embed_batch_size) + embeddings = [] + for chunk in chunked_data: + response = requests.post( + url, + json={ + "inputs": chunk, + }, + headers={ + "Authorization": f"Bearer {self._api_token}", + "Content-Type": "application/json", + }, + ) + response.raise_for_status() + embeddings.extend(response.json()["embeddings"]) + return embeddings + + def get_url(self): + """ + Get DeepInfra API URL. + """ + return f"{INFERENCE_URL}/{self._model_id}" + + async def _apost(self, data: List[str]) -> List[List[float]]: + """ + Sends a POST request to the DeepInfra Inference API with the given data and returns the API response. + Input data is chunked into batches to avoid exceeding the maximum batch size (1024). + + Args: + data (List[str]): A list of strings to be embedded. + Output: + List[float]: A list of embeddings from the API. + + """ + url = self.get_url() + chunked_data = _chunk(data, self.embed_batch_size) + embeddings = [] + for chunk in chunked_data: + async with aiohttp.ClientSession() as session: + async with session.post( + url, + json={ + "inputs": chunk, + }, + headers={ + "Authorization": f"Bearer {self._api_token}", + "Content-Type": "application/json", + }, + ) as resp: + response = await resp.json() + embeddings.extend(response["embeddings"]) + return embeddings + + def _get_query_embedding(self, query: str) -> List[float]: + """ + Get query embedding. + """ + return self._post(self._add_query_prefix([query]))[0] + + async def _aget_query_embedding(self, query: str) -> List[float]: + """ + Async get query embedding. + """ + response = await self._apost(self._add_query_prefix([query])) + return response[0] + + def _get_query_embeddings(self, queries: List[str]) -> List[List[float]]: + """ + Get query embeddings. + """ + return self._post(self._add_query_prefix(queries)) + + async def _aget_query_embeddings(self, queries: List[str]) -> List[List[float]]: + """ + Async get query embeddings. + """ + return await self._apost(self._add_query_prefix(queries)) + + def _get_text_embedding(self, text: str) -> List[float]: + """ + Get text embedding. + """ + return self._post(self._add_text_prefix([text]))[0] + + async def _aget_text_embedding(self, text: str) -> List[float]: + """ + Async get text embedding. + """ + response = await self._apost(self._add_text_prefix([text])) + return response[0] + + def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Get text embedding. + """ + return self._post(self._add_text_prefix(texts)) + + async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Async get text embeddings. + """ + return await self._apost(self._add_text_prefix(texts)) + + def _add_query_prefix(self, queries: List[str]) -> List[str]: + """ + Add query prefix to queries. + """ + return ( + [self._query_prefix + query for query in queries] + if self._query_prefix + else queries + ) + + def _add_text_prefix(self, texts: List[str]) -> List[str]: + """ + Add text prefix to texts. + """ + return ( + [self._text_prefix + text for text in texts] if self._text_prefix else texts + ) + + +def _chunk(items: List[str], batch_size: int = MAX_BATCH_SIZE) -> List[List[str]]: + """ + Chunk items into batches of size batch_size. + """ + return [items[i : i + batch_size] for i in range(0, len(items), batch_size)] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..7283fb08d03d11b117f2ff5ba51ac7d91571173f --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["Your Name <you@example.com>"] +description = "llama-index embeddings deepinfra integration" +license = "MIT" +name = "llama-index-embeddings-deepinfra" +packages = [{include = "llama_index/"}] +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +llama-index-core = "^0.10.0" +aiohttp = "^3.8.1" + +[tool.poetry.group.dev.dependencies] +black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} +codespell = {extras = ["toml"], version = ">=v2.2.6"} +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" +types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..dabf212d7e7162849c24a733909ac4f645d75a31 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/BUILD @@ -0,0 +1 @@ +python_tests() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/test_embeddings_deepinfra.py b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/test_embeddings_deepinfra.py new file mode 100644 index 0000000000000000000000000000000000000000..823a01fff8939a22e5a23b7d120946419f7ecd5a --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-deepinfra/tests/test_embeddings_deepinfra.py @@ -0,0 +1,31 @@ +from llama_index.core.base.embeddings.base import BaseEmbedding +from llama_index.embeddings.deepinfra import DeepInfraEmbeddingModel + + +def test_deepinfra_embedding_class(): + model = DeepInfraEmbeddingModel() + assert isinstance(model, BaseEmbedding) + + +def test_deepinfra_query_prefix(): + model = DeepInfraEmbeddingModel(query_prefix="query") + result = model._add_query_prefix(["test"]) + assert result == ["querytest"] + + +def test_deepinfra_text_prefix(): + model = DeepInfraEmbeddingModel(text_prefix="text") + result = model._add_text_prefix(["test"]) + assert result == ["texttest"] + + +def test_deepinfra_default_query_prefix(): + model = DeepInfraEmbeddingModel() + result = model._add_query_prefix(["test"]) + assert result == ["test"] + + +def test_deepinfra_default_text_prefix(): + model = DeepInfraEmbeddingModel() + result = model._add_text_prefix(["test"]) + assert result == ["test"]