From bd8cc5e4dd521156258e2fd6ff0a818d5ee82e6e Mon Sep 17 00:00:00 2001 From: ryanrib14 <147437431+ryanrib14@users.noreply.github.com> Date: Wed, 13 Mar 2024 18:54:22 -0300 Subject: [PATCH] keyval_docstore and mongodb docstore updated to accept custom collections suffix (#11873) --- .../core/storage/docstore/keyval_docstore.py | 23 +++++++++++-- .../storage/docstore/mongodb/base.py | 34 +++++++++++++++++-- .../pyproject.toml | 2 +- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py b/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py index 2230f3052d..8d58e8c864 100644 --- a/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py +++ b/llama-index-core/llama_index/core/storage/docstore/keyval_docstore.py @@ -11,6 +11,9 @@ from llama_index.core.storage.docstore.utils import doc_to_json, json_to_doc from llama_index.core.storage.kvstore.types import DEFAULT_BATCH_SIZE, BaseKVStore DEFAULT_NAMESPACE = "docstore" +DEFAULT_COLLECTION_DATA_SUFFIX = "/data" +DEFAULT_REF_DOC_COLLECTION_SUFFIX = "/ref_doc_info" +DEFAULT_METADATA_COLLECTION_SUFFIX = "/metadata" class KVDocumentStore(BaseDocumentStore): @@ -46,13 +49,27 @@ class KVDocumentStore(BaseDocumentStore): kvstore: BaseKVStore, namespace: Optional[str] = None, batch_size: int = DEFAULT_BATCH_SIZE, + node_collection_suffix: Optional[str] = None, + ref_doc_collection_suffix: Optional[str] = None, + metadata_collection_suffix: Optional[str] = None, ) -> None: """Init a KVDocumentStore.""" self._kvstore = kvstore self._namespace = namespace or DEFAULT_NAMESPACE - self._node_collection = f"{self._namespace}/data" - self._ref_doc_collection = f"{self._namespace}/ref_doc_info" - self._metadata_collection = f"{self._namespace}/metadata" + self._node_collection_suffix = ( + node_collection_suffix or DEFAULT_COLLECTION_DATA_SUFFIX + ) + self._ref_doc_collection_suffix = ( + ref_doc_collection_suffix or DEFAULT_REF_DOC_COLLECTION_SUFFIX + ) + self._metadata_collection_suffix = ( + metadata_collection_suffix or DEFAULT_METADATA_COLLECTION_SUFFIX + ) + self._node_collection = f"{self._namespace}{self._node_collection_suffix}" + self._ref_doc_collection = f"{self._namespace}{self._ref_doc_collection_suffix}" + self._metadata_collection = ( + f"{self._namespace}{self._metadata_collection_suffix}" + ) self._batch_size = batch_size @property diff --git a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/llama_index/storage/docstore/mongodb/base.py b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/llama_index/storage/docstore/mongodb/base.py index 9ca2a321cb..a9849d8af2 100644 --- a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/llama_index/storage/docstore/mongodb/base.py +++ b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/llama_index/storage/docstore/mongodb/base.py @@ -20,10 +20,20 @@ class MongoDocumentStore(KVDocumentStore): self, mongo_kvstore: MongoDBKVStore, namespace: Optional[str] = None, + node_collection_suffix: Optional[str] = None, + ref_doc_collection_suffix: Optional[str] = None, + metadata_collection_suffix: Optional[str] = None, batch_size: int = DEFAULT_BATCH_SIZE, ) -> None: """Init a MongoDocumentStore.""" - super().__init__(mongo_kvstore, namespace=namespace, batch_size=batch_size) + super().__init__( + mongo_kvstore, + namespace=namespace, + batch_size=batch_size, + node_collection_suffix=node_collection_suffix, + ref_doc_collection_suffix=ref_doc_collection_suffix, + metadata_collection_suffix=metadata_collection_suffix, + ) @classmethod def from_uri( @@ -31,10 +41,19 @@ class MongoDocumentStore(KVDocumentStore): uri: str, db_name: Optional[str] = None, namespace: Optional[str] = None, + node_collection_suffix: Optional[str] = None, + ref_doc_collection_suffix: Optional[str] = None, + metadata_collection_suffix: Optional[str] = None, ) -> "MongoDocumentStore": """Load a MongoDocumentStore from a MongoDB URI.""" mongo_kvstore = MongoDBKVStore.from_uri(uri, db_name) - return cls(mongo_kvstore, namespace) + return cls( + mongo_kvstore, + namespace, + node_collection_suffix, + ref_doc_collection_suffix, + metadata_collection_suffix, + ) @classmethod def from_host_and_port( @@ -43,7 +62,16 @@ class MongoDocumentStore(KVDocumentStore): port: int, db_name: Optional[str] = None, namespace: Optional[str] = None, + node_collection_suffix: Optional[str] = None, + ref_doc_collection_suffix: Optional[str] = None, + metadata_collection_suffix: Optional[str] = None, ) -> "MongoDocumentStore": """Load a MongoDocumentStore from a MongoDB host and port.""" mongo_kvstore = MongoDBKVStore.from_host_and_port(host, port, db_name) - return cls(mongo_kvstore, namespace) + return cls( + mongo_kvstore, + namespace, + node_collection_suffix, + ref_doc_collection_suffix, + metadata_collection_suffix, + ) diff --git a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/pyproject.toml b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/pyproject.toml index 20c2621a5f..884c985b3b 100644 --- a/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/pyproject.toml +++ b/llama-index-integrations/storage/docstore/llama-index-storage-docstore-mongodb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-storage-docstore-mongodb" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -- GitLab