From dc5f07d5e831dc88bcc4f7fc7d991b93a0bacf10 Mon Sep 17 00:00:00 2001
From: Jing <lscmirror@gmail.com>
Date: Sun, 20 Aug 2023 16:01:41 -0700
Subject: [PATCH] Support Firestore as LlamaIndex Storage (#7305)

---
 .vscode/settings.json                         |   2 +-
 CHANGELOG.md                                  |   3 +
 .../data_modules/storage/docstores.md         |  33 ++
 docs/examples/docstore/FirestoreDemo.ipynb    | 327 ++++++++++++++++++
 llama_index/storage/docstore/__init__.py      |   2 +
 .../storage/docstore/firestore_docstore.py    |  39 +++
 llama_index/storage/index_store/__init__.py   |   9 +-
 .../index_store/firestore_indexstore.py       |  37 ++
 llama_index/storage/kvstore/__init__.py       |   3 +-
 .../storage/kvstore/firestore_kvstore.py      | 109 ++++++
 tests/storage/conftest.py                     |   6 +
 .../docstore/test_firestore_docstore.py       |  72 ++++
 .../index_store/test_firestore_indexstore.py  |  24 ++
 .../storage/kvstore/test_firestore_kvstore.py |  27 ++
 14 files changed, 690 insertions(+), 3 deletions(-)
 create mode 100644 docs/examples/docstore/FirestoreDemo.ipynb
 create mode 100644 llama_index/storage/docstore/firestore_docstore.py
 create mode 100644 llama_index/storage/index_store/firestore_indexstore.py
 create mode 100644 llama_index/storage/kvstore/firestore_kvstore.py
 create mode 100644 tests/storage/docstore/test_firestore_docstore.py
 create mode 100644 tests/storage/index_store/test_firestore_indexstore.py
 create mode 100644 tests/storage/kvstore/test_firestore_kvstore.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
index fe6911f4bd..781eafd2ec 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -4,4 +4,4 @@
   "editor.codeActionsOnSave": {
       "source.organizeImports": true,
   },
-}
\ No newline at end of file
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72f387e510..96fe72be16 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## Unreleased
 
+### New Features
+- Added FireStore docstore/index store support (#7305)
+
 ### Bug Fixes / Nits
 - Fix Azure pydantic error (#7329)
 
diff --git a/docs/core_modules/data_modules/storage/docstores.md b/docs/core_modules/data_modules/storage/docstores.md
index 2a2b2c54c3..266151435f 100644
--- a/docs/core_modules/data_modules/storage/docstores.md
+++ b/docs/core_modules/data_modules/storage/docstores.md
@@ -74,3 +74,36 @@ Under the hood, `RedisDocumentStore` connects to a redis database and adds your
 You can easily reconnect to your Redis client and reload the index by re-initializing a `RedisDocumentStore` with an existing `host`, `port`, and `namespace`.
 
 A more complete example can be found [here](../../examples/docstore/RedisDocstoreIndexStoreDemo.ipynb)
+
+### Firestore Document Store
+
+We support Firestore as an alternative document store backend that persists data as `Node` objects are ingested.
+
+```python
+from llama_index.storage.docstore import FirestoreDocumentStore
+from llama_index.node_parser import SimpleNodeParser
+
+# create parser and parse document into nodes
+parser = SimpleNodeParser()
+nodes = parser.get_nodes_from_documents(documents)
+
+# create (or load) docstore and add nodes
+docstore = FirestoreDocumentStore.from_dataabse(
+  project="project-id",
+  database="(default)",
+)
+docstore.add_documents(nodes)
+
+# create storage context
+storage_context = StorageContext.from_defaults(docstore=docstore)
+
+# build index
+index = VectorStoreIndex(nodes, storage_context=storage_context)
+```
+
+Under the hood, `FirestoreDocumentStore` connects to a firestore database in Google Cloud and adds your nodes to a namespace stored under `{namespace}/docs`.
+> Note: You can configure the `namespace` when instantiating `FirestoreDocumentStore`, otherwise it defaults `namespace="docstore"`.
+
+You can easily reconnect to your Firestore database and reload the index by re-initializing a `FirestoreDocumentStore` with an existing `project`, `database`, and `namespace`.
+
+A more complete example can be found [here](../../examples/docstore/FirestoreDemo.ipynb)
diff --git a/docs/examples/docstore/FirestoreDemo.ipynb b/docs/examples/docstore/FirestoreDemo.ipynb
new file mode 100644
index 0000000000..098b9855c6
--- /dev/null
+++ b/docs/examples/docstore/FirestoreDemo.ipynb
@@ -0,0 +1,327 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "import sys\n",
+    "\n",
+    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
+    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index import (\n",
+    "    SimpleDirectoryReader,\n",
+    "    ServiceContext,\n",
+    "    LLMPredictor,\n",
+    "    StorageContext,\n",
+    ")\n",
+    "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
+    "from llama_index.composability import ComposableGraph\n",
+    "from llama_index.llms import OpenAI\n",
+    "from llama_index.response.notebook_utils import display_response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load Documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reader = SimpleDirectoryReader(\"../paul_graham_essay/data\")\n",
+    "documents = reader.load_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Parse into Nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.node_parser import SimpleNodeParser\n",
+    "\n",
+    "nodes = SimpleNodeParser().get_nodes_from_documents(documents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Add to Docstore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore\n",
+    "from llama_index.storage.docstore.firestore_docstore import FirestoreDocumentStore\n",
+    "from llama_index.storage.index_store.firestore_indexstore import FirestoreIndexStore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kvstore = FirestoreKVStore()\n",
+    "\n",
+    "storage_context = StorageContext.from_defaults(\n",
+    "    docstore=FirestoreDocumentStore(kvstore),\n",
+    "    index_store=FirestoreIndexStore(kvstore),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "storage_context.docstore.add_documents(nodes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Define Multiple Indexes\n",
+    "\n",
+    "Each index uses the same underlying Node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_index = ListIndex(nodes, storage_context=storage_context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vector_index = VectorStoreIndex(nodes, storage_context=storage_context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NOTE: the docstore still has the same nodes\n",
+    "len(storage_context.docstore.docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Test out saving and loading"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NOTE: docstore and index_store is persisted in Firestore by default\n",
+    "# NOTE: here only need to persist simple vector store to disk\n",
+    "storage_context.persist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# note down index IDs\n",
+    "list_id = list_index.index_id\n",
+    "vector_id = vector_index.index_id\n",
+    "keyword_id = keyword_table_index.index_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.indices.loading import load_index_from_storage\n",
+    "\n",
+    "kvstore = FirestoreKVStore()\n",
+    "\n",
+    "# re-create storage context\n",
+    "storage_context = StorageContext.from_defaults(\n",
+    "    docstore=FirestoreDocumentStore(kvstore),\n",
+    "    index_store=FirestoreIndexStore(kvstore),\n",
+    ")\n",
+    "\n",
+    "# load indices\n",
+    "list_index = load_index_from_storage(storage_context=storage_context, index_id=list_id)\n",
+    "vector_index = load_index_from_storage(\n",
+    "    storage_context=storage_context, vector_id=vector_id\n",
+    ")\n",
+    "keyword_table_index = load_index_from_storage(\n",
+    "    storage_context=storage_context, keyword_id=keyword_id\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Test out some Queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_engine = list_index.as_query_engine()\n",
+    "list_response = query_engine.query(\"What is a summary of this document?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display_response(list_response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_engine = vector_index.as_query_engine()\n",
+    "vector_response = query_engine.query(\"What did the author do growing up?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display_response(vector_response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_engine = keyword_table_index.as_query_engine()\n",
+    "keyword_response = query_engine.query(\"What did the author do after his time at YC?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display_response(keyword_response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llama_index/storage/docstore/__init__.py b/llama_index/storage/docstore/__init__.py
index ca95785279..8aae54c4cc 100644
--- a/llama_index/storage/docstore/__init__.py
+++ b/llama_index/storage/docstore/__init__.py
@@ -1,4 +1,5 @@
 from llama_index.storage.docstore.types import BaseDocumentStore
+from llama_index.storage.docstore.firestore_docstore import FirestoreDocumentStore
 from llama_index.storage.docstore.simple_docstore import SimpleDocumentStore
 from llama_index.storage.docstore.mongo_docstore import MongoDocumentStore
 from llama_index.storage.docstore.keyval_docstore import KVDocumentStore
@@ -11,6 +12,7 @@ from llama_index.storage.docstore.simple_docstore import DocumentStore
 __all__ = [
     "BaseDocumentStore",
     "DocumentStore",
+    "FirestoreDocumentStore",
     "SimpleDocumentStore",
     "MongoDocumentStore",
     "KVDocumentStore",
diff --git a/llama_index/storage/docstore/firestore_docstore.py b/llama_index/storage/docstore/firestore_docstore.py
new file mode 100644
index 0000000000..6792d2d98f
--- /dev/null
+++ b/llama_index/storage/docstore/firestore_docstore.py
@@ -0,0 +1,39 @@
+from typing import Optional
+from llama_index.storage.docstore.keyval_docstore import KVDocumentStore
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
+
+
+class FirestoreDocumentStore(KVDocumentStore):
+    """Firestore Document (Node) store.
+
+    A Firestore store for Document and Node objects.
+
+    Args:
+        firestore_kvstore (FirestoreKVStore): Firestore key-value store
+        namespace (str): namespace for the docstore
+
+    """
+
+    def __init__(
+        self,
+        firestore_kvstore: FirestoreKVStore,
+        namespace: Optional[str] = None,
+    ) -> None:
+        """Init a FirestoreDocumentStore."""
+        super().__init__(firestore_kvstore, namespace)
+
+    @classmethod
+    def from_database(
+        cls,
+        project: str,
+        database: str,
+        namespace: Optional[str] = None,
+    ) -> "FirestoreDocumentStore":
+        """
+        Args:
+            project (str): The project which the client acts on behalf of.
+            database (str): The database name that the client targets.
+            namespace (str): namespace for the docstore
+        """
+        firestore_kvstore = FirestoreKVStore(project=project, database=database)
+        return cls(firestore_kvstore, namespace)
diff --git a/llama_index/storage/index_store/__init__.py b/llama_index/storage/index_store/__init__.py
index 70f1990e84..236c9ddb91 100644
--- a/llama_index/storage/index_store/__init__.py
+++ b/llama_index/storage/index_store/__init__.py
@@ -1,6 +1,13 @@
+from llama_index.storage.index_store.firestore_indexstore import FirestoreKVStore
 from llama_index.storage.index_store.keyval_index_store import KVIndexStore
 from llama_index.storage.index_store.simple_index_store import SimpleIndexStore
 from llama_index.storage.index_store.mongo_index_store import MongoIndexStore
 from llama_index.storage.index_store.redis_index_store import RedisIndexStore
 
-__all__ = ["KVIndexStore", "SimpleIndexStore", "MongoIndexStore", "RedisIndexStore"]
+__all__ = [
+    "FirestoreKVStore",
+    "KVIndexStore",
+    "SimpleIndexStore",
+    "MongoIndexStore",
+    "RedisIndexStore",
+]
diff --git a/llama_index/storage/index_store/firestore_indexstore.py b/llama_index/storage/index_store/firestore_indexstore.py
new file mode 100644
index 0000000000..850f842d47
--- /dev/null
+++ b/llama_index/storage/index_store/firestore_indexstore.py
@@ -0,0 +1,37 @@
+from typing import Optional
+from llama_index.storage.index_store.keyval_index_store import KVIndexStore
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
+
+
+class FirestoreIndexStore(KVIndexStore):
+    """Firestore Index store.
+
+    Args:
+        firestore_kvstore (FirestoreKVStore): Firestore key-value store
+        namespace (str): namespace for the index store
+
+    """
+
+    def __init__(
+        self,
+        firestore_kvstore: FirestoreKVStore,
+        namespace: Optional[str] = None,
+    ) -> None:
+        """Init a FirestoreIndexStore."""
+        super().__init__(firestore_kvstore, namespace=namespace)
+
+    @classmethod
+    def from_database(
+        cls,
+        project: str,
+        database: str,
+        namespace: Optional[str] = None,
+    ) -> "FirestoreIndexStore":
+        """
+        Args:
+            project (str): The project which the client acts on behalf of.
+            database (str): The database name that the client targets.
+            namespace (str): namespace for the docstore
+        """
+        firestore_kvstore = FirestoreKVStore(project=project, database=database)
+        return cls(firestore_kvstore, namespace)
diff --git a/llama_index/storage/kvstore/__init__.py b/llama_index/storage/kvstore/__init__.py
index 47410f4214..869f2e13ec 100644
--- a/llama_index/storage/kvstore/__init__.py
+++ b/llama_index/storage/kvstore/__init__.py
@@ -1,5 +1,6 @@
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
 from llama_index.storage.kvstore.simple_kvstore import SimpleKVStore
 from llama_index.storage.kvstore.mongodb_kvstore import MongoDBKVStore
 from llama_index.storage.kvstore.redis_kvstore import RedisKVStore
 
-__all__ = ["SimpleKVStore", "MongoDBKVStore", "RedisKVStore"]
+__all__ = ["FirestoreKVStore", "SimpleKVStore", "MongoDBKVStore", "RedisKVStore"]
diff --git a/llama_index/storage/kvstore/firestore_kvstore.py b/llama_index/storage/kvstore/firestore_kvstore.py
new file mode 100644
index 0000000000..d02991ba5c
--- /dev/null
+++ b/llama_index/storage/kvstore/firestore_kvstore.py
@@ -0,0 +1,109 @@
+from typing import Any, Dict, Optional
+from llama_index.storage.kvstore.types import DEFAULT_COLLECTION, BaseKVStore
+
+# keyword "_" is reserved in Firestore but refered in llama_index/constants.py.
+FIELD_NAME_REPLACE_SET = {"__data__": "data", "__type__": "type"}
+FIELD_NAME_REPLACE_GET = {"data": "__data__", "type": "__type__"}
+
+# "/" is not supported in Firestore Collection ID.
+SLASH_REPLACEMENT = "_"
+IMPORT_ERROR_MSG = (
+    "`firestore` package not found, please run `pip3 install google-cloud-firestore`"
+)
+CLIENT_INFO = "LlamaIndex"
+DEFAULT_FIRESTORE_DATABASE = "(default)"
+
+
+class FirestoreKVStore(BaseKVStore):
+    """Firestore Key-Value store.
+
+    Args:
+        project (str): The project which the client acts on behalf of.
+        database (str): The database name that the client targets.
+    """
+
+    def __init__(
+        self, project: Optional[str] = None, database: str = DEFAULT_FIRESTORE_DATABASE
+    ) -> None:
+        try:
+            from google.cloud import firestore_v1 as firestore
+        except ImportError:
+            raise ImportError(IMPORT_ERROR_MSG)
+
+        self._db = firestore.client.Client(
+            project=project, database=database, client_info=CLIENT_INFO
+        )
+
+    def firestore_collection(self, collection: str) -> str:
+        return collection.replace("/", SLASH_REPLACEMENT)
+
+    def replace_field_name_set(self, val: Dict[str, Any]) -> Dict[str, Any]:
+        val = val.copy()
+        for k, v in FIELD_NAME_REPLACE_SET.items():
+            if k in val:
+                val[v] = val[k]
+                val.pop(k)
+        return val
+
+    def replace_field_name_get(self, val: Dict[str, Any]) -> Dict[str, Any]:
+        val = val.copy()
+        for k, v in FIELD_NAME_REPLACE_GET.items():
+            if k in val:
+                val[v] = val[k]
+                val.pop(k)
+        return val
+
+    def put(
+        self,
+        key: str,
+        val: dict,
+        collection: str = DEFAULT_COLLECTION,
+    ) -> None:
+        """Put a key-value pair into the Firestore collection.
+        Args:
+            key (str): key
+            val (dict): value
+            collection (str): collection name
+        """
+        collection_id = self.firestore_collection(collection)
+        val = self.replace_field_name_set(val)
+        doc = self._db.collection(collection_id).document(key)
+        doc.set(val, merge=True)
+
+    def get(self, key: str, collection: str = DEFAULT_COLLECTION) -> Optional[dict]:
+        """Get a key-value pair from the Firestore.
+        Args:
+            key (str): key
+            collection (str): collection name
+        """
+        collection_id = self.firestore_collection(collection)
+        result = self._db.collection(collection_id).document(key).get().to_dict()
+        if not result:
+            return None
+
+        return self.replace_field_name_get(result)
+
+    def get_all(self, collection: str = DEFAULT_COLLECTION) -> Dict[str, dict]:
+        """Get all values from the Firestore collection.
+        Args:
+            collection (str): collection name
+        """
+        collection_id = self.firestore_collection(collection)
+        docs = self._db.collection(collection_id).list_documents()
+        output = {}
+        for doc in docs:
+            key = doc.id
+            val = self.replace_field_name_get(doc.get().to_dict())
+            output[key] = val
+        return output
+
+    def delete(self, key: str, collection: str = DEFAULT_COLLECTION) -> bool:
+        """Delete a value from the Firestore.
+        Args:
+            key (str): key
+            collection (str): collection name
+        """
+        collection_id = self.firestore_collection(collection)
+        doc = self._db.collection(collection_id).document(key)
+        doc.delete()
+        return True
diff --git a/tests/storage/conftest.py b/tests/storage/conftest.py
index f3d6e9d166..b244f49802 100644
--- a/tests/storage/conftest.py
+++ b/tests/storage/conftest.py
@@ -1,4 +1,5 @@
 import pytest
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
 from llama_index.storage.kvstore.mongodb_kvstore import MongoDBKVStore
 from llama_index.storage.kvstore.redis_kvstore import RedisKVStore
 from llama_index.storage.kvstore.simple_kvstore import SimpleKVStore
@@ -15,6 +16,11 @@ def mongo_kvstore(mongo_client: MockMongoClient) -> MongoDBKVStore:
     return MongoDBKVStore(mongo_client=mongo_client)  # type: ignore
 
 
+@pytest.fixture()
+def firestore_kvstore() -> FirestoreKVStore:
+    return FirestoreKVStore()
+
+
 @pytest.fixture()
 def simple_kvstore() -> SimpleKVStore:
     return SimpleKVStore()
diff --git a/tests/storage/docstore/test_firestore_docstore.py b/tests/storage/docstore/test_firestore_docstore.py
new file mode 100644
index 0000000000..aa6ae27220
--- /dev/null
+++ b/tests/storage/docstore/test_firestore_docstore.py
@@ -0,0 +1,72 @@
+from typing import List
+
+import pytest
+
+from llama_index.storage.docstore.firestore_docstore import FirestoreDocumentStore
+from llama_index.schema import Document
+from llama_index.schema import BaseNode
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
+
+try:
+    from google.cloud import firestore_v1 as firestore
+except ImportError:
+    firestore = None  # type: ignore
+
+
+@pytest.fixture
+def documents() -> List[Document]:
+    return [
+        Document(text="doc_1"),
+        Document(text="doc_2"),
+    ]
+
+
+@pytest.fixture()
+def firestore_docstore(firestore_kvstore: FirestoreKVStore) -> FirestoreDocumentStore:
+    return FirestoreDocumentStore(firestore_kvstore=firestore_kvstore)
+
+
+@pytest.mark.skipif(firestore is None, reason="firestore not installed")
+def test_firestore_docstore(
+    firestore_docstore: FirestoreDocumentStore, documents: List[Document]
+) -> None:
+    ds = firestore_docstore
+    assert len(ds.docs) == 0
+
+    # test adding documents
+    ds.add_documents(documents)
+    assert len(ds.docs) == 2
+    assert all(isinstance(doc, BaseNode) for doc in ds.docs.values())
+
+    # test updating documents
+    ds.add_documents(documents)
+    print(ds.docs)
+    assert len(ds.docs) == 2
+
+    # test getting documents
+    doc0 = ds.get_document(documents[0].get_doc_id())
+    assert doc0 is not None
+    assert documents[0].get_content() == doc0.get_content()
+
+    # test deleting documents
+    ds.delete_document(documents[0].get_doc_id())
+    assert len(ds.docs) == 1
+
+
+@pytest.mark.skipif(firestore is None, reason="firestore not installed")
+def test_firestore_docstore_hash(firestore_docstore: FirestoreDocumentStore) -> None:
+    ds = firestore_docstore
+
+    # Test setting hash
+    ds.set_document_hash("test_doc_id", "test_doc_hash")
+    doc_hash = ds.get_document_hash("test_doc_id")
+    assert doc_hash == "test_doc_hash"
+
+    # Test updating hash
+    ds.set_document_hash("test_doc_id", "test_doc_hash_new")
+    doc_hash = ds.get_document_hash("test_doc_id")
+    assert doc_hash == "test_doc_hash_new"
+
+    # Test getting non-existent
+    doc_hash = ds.get_document_hash("test_not_exist")
+    assert doc_hash is None
diff --git a/tests/storage/index_store/test_firestore_indexstore.py b/tests/storage/index_store/test_firestore_indexstore.py
new file mode 100644
index 0000000000..2b615932b7
--- /dev/null
+++ b/tests/storage/index_store/test_firestore_indexstore.py
@@ -0,0 +1,24 @@
+import pytest
+
+from llama_index.storage.index_store.firestore_indexstore import FirestoreIndexStore
+from llama_index.data_structs.data_structs import IndexGraph
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
+
+try:
+    from google.cloud import firestore_v1 as firestore
+except ImportError:
+    firestore = None  # type: ignore
+
+
+@pytest.fixture()
+def firestore_indexstore(firestore_kvstore: FirestoreKVStore) -> FirestoreIndexStore:
+    return FirestoreIndexStore(firestore_kvstore=firestore_kvstore)
+
+
+@pytest.mark.skipif(firestore is None, reason="firestore not installed")
+def test_firestore_docstore(firestore_indexstore: FirestoreIndexStore) -> None:
+    index_struct = IndexGraph()
+    index_store = firestore_indexstore
+
+    index_store.add_index_struct(index_struct)
+    assert index_store.get_index_struct(struct_id=index_struct.index_id) == index_struct
diff --git a/tests/storage/kvstore/test_firestore_kvstore.py b/tests/storage/kvstore/test_firestore_kvstore.py
new file mode 100644
index 0000000000..5ad1b63615
--- /dev/null
+++ b/tests/storage/kvstore/test_firestore_kvstore.py
@@ -0,0 +1,27 @@
+import pytest
+from llama_index.storage.kvstore.firestore_kvstore import FirestoreKVStore
+
+try:
+    from google.cloud import firestore_v1 as firestore
+except ImportError:
+    firestore = None  # type: ignore
+
+
+@pytest.fixture()
+def kvstore_with_data(firestore_kvstore: FirestoreKVStore) -> FirestoreKVStore:
+    test_key = "test_key"
+    test_doc = {"test_obj_key": "test_obj_val"}
+    firestore_kvstore.put(test_key, test_doc)
+    return firestore_kvstore
+
+
+@pytest.mark.skipif(firestore is None, reason="firestore not installed")
+def test_kvstore_basic(firestore_kvstore: FirestoreKVStore) -> None:
+    test_key = "test_key"
+    test_doc = {"test_obj_key": "test_obj_val"}
+    firestore_kvstore.put(test_key, test_doc)
+    doc = firestore_kvstore.get(test_key)
+    assert doc == test_doc
+
+    doc = firestore_kvstore.get(test_key, collection="non_existent")
+    assert doc is None
-- 
GitLab