From a29d8351c8be3e9290d7d36eb9d309defd9c7fd1 Mon Sep 17 00:00:00 2001
From: Philipp Serrer <72248290+pserrer1@users.noreply.github.com>
Date: Mon, 3 Jun 2024 18:05:23 +0200
Subject: [PATCH] fix: setDocumentHash should be async (#868)

---
 .changeset/twelve-bottles-wait.md                           | 6 ++++++
 .../current/api/classes/BaseDocumentStore.md                | 2 +-
 .../current/api/classes/BaseDocumentStore.md                | 2 +-
 .../current/api/classes/BaseDocumentStore.md                | 2 +-
 packages/core/src/indices/BaseIndex.ts                      | 2 +-
 packages/core/src/indices/keyword/index.ts                  | 2 +-
 packages/core/src/indices/summary/index.ts                  | 2 +-
 .../core/src/ingestion/strategies/DuplicatesStrategy.ts     | 2 +-
 packages/core/src/storage/docStore/types.ts                 | 2 +-
 9 files changed, 14 insertions(+), 8 deletions(-)
 create mode 100644 .changeset/twelve-bottles-wait.md

diff --git a/.changeset/twelve-bottles-wait.md b/.changeset/twelve-bottles-wait.md
new file mode 100644
index 000000000..fa5b176d4
--- /dev/null
+++ b/.changeset/twelve-bottles-wait.md
@@ -0,0 +1,6 @@
+---
+"llamaindex": minor
+"docs": minor
+---
+
+setDocumentHash should be async
diff --git a/apps/docs/i18n/fr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md b/apps/docs/i18n/fr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
index 0743da72b..690ae542f 100644
--- a/apps/docs/i18n/fr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
+++ b/apps/docs/i18n/fr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
@@ -271,7 +271,7 @@ custom_edit_url: null
 
 ### setDocumentHash
 
-â–¸ `Abstract` **setDocumentHash**(`docId`, `docHash`): `void`
+â–¸ `Abstract` **setDocumentHash**(`docId`, `docHash`): `Promise`<`void`\>
 
 #### Parameters
 
diff --git a/apps/docs/i18n/hr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md b/apps/docs/i18n/hr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
index f90b88e5f..b7ae4720b 100644
--- a/apps/docs/i18n/hr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
+++ b/apps/docs/i18n/hr/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
@@ -271,7 +271,7 @@ custom_edit_url: null
 
 ### setDocumentHash
 
-â–¸ `Abstract` **setDocumentHash**(`docId`, `docHash`): `void`
+â–¸ `Abstract` **setDocumentHash**(`docId`, `docHash`): `Promise`<`void`\>
 
 #### Parameters
 
diff --git a/apps/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md b/apps/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
index 4c3f59628..03bcc954e 100644
--- a/apps/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
+++ b/apps/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/api/classes/BaseDocumentStore.md
@@ -271,7 +271,7 @@ custom_edit_url: null
 
 ### setDocumentHash
 
-â–¸ `Abstract` **setDocumentHash**(`docId`, `docHash`): `void`
+â–¸ `Abstract` **setDocumentHash**(`docId`, `docHash`): `Promise`<`void`\>
 
 #### Parameters
 
diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts
index cd199a7d7..80a1a9d23 100644
--- a/packages/core/src/indices/BaseIndex.ts
+++ b/packages/core/src/indices/BaseIndex.ts
@@ -95,7 +95,7 @@ export abstract class BaseIndex<T> {
       [nodeParserFromSettingsOrContext(this.serviceContext)],
     );
     await this.insertNodes(nodes);
-    this.docStore.setDocumentHash(document.id_, document.hash);
+    await this.docStore.setDocumentHash(document.id_, document.hash);
   }
 
   abstract insertNodes(nodes: BaseNode[]): Promise<void>;
diff --git a/packages/core/src/indices/keyword/index.ts b/packages/core/src/indices/keyword/index.ts
index 7ff2c8255..48dc4c2a4 100644
--- a/packages/core/src/indices/keyword/index.ts
+++ b/packages/core/src/indices/keyword/index.ts
@@ -281,7 +281,7 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
 
     await docStore.addDocuments(documents, true);
     for (const doc of documents) {
-      docStore.setDocumentHash(doc.id_, doc.hash);
+      await docStore.setDocumentHash(doc.id_, doc.hash);
     }
 
     const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
diff --git a/packages/core/src/indices/summary/index.ts b/packages/core/src/indices/summary/index.ts
index 317ee00c0..affbf7178 100644
--- a/packages/core/src/indices/summary/index.ts
+++ b/packages/core/src/indices/summary/index.ts
@@ -138,7 +138,7 @@ export class SummaryIndex extends BaseIndex<IndexList> {
 
     await docStore.addDocuments(documents, true);
     for (const doc of documents) {
-      docStore.setDocumentHash(doc.id_, doc.hash);
+      await docStore.setDocumentHash(doc.id_, doc.hash);
     }
 
     const nodes =
diff --git a/packages/core/src/ingestion/strategies/DuplicatesStrategy.ts b/packages/core/src/ingestion/strategies/DuplicatesStrategy.ts
index 3aee977bc..f47f82af2 100644
--- a/packages/core/src/ingestion/strategies/DuplicatesStrategy.ts
+++ b/packages/core/src/ingestion/strategies/DuplicatesStrategy.ts
@@ -19,7 +19,7 @@ export class DuplicatesStrategy implements TransformComponent {
 
     for (const node of nodes) {
       if (!(node.hash in hashes) && !currentHashes.has(node.hash)) {
-        this.docStore.setDocumentHash(node.id_, node.hash);
+        await this.docStore.setDocumentHash(node.id_, node.hash);
         nodesToRun.push(node);
         currentHashes.add(node.hash);
       }
diff --git a/packages/core/src/storage/docStore/types.ts b/packages/core/src/storage/docStore/types.ts
index f5206b9e4..006721935 100644
--- a/packages/core/src/storage/docStore/types.ts
+++ b/packages/core/src/storage/docStore/types.ts
@@ -32,7 +32,7 @@ export abstract class BaseDocumentStore {
   abstract documentExists(docId: string): Promise<boolean>;
 
   // Hash
-  abstract setDocumentHash(docId: string, docHash: string): void;
+  abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
 
   abstract getDocumentHash(docId: string): Promise<string | undefined>;
 
-- 
GitLab