From ab9d941d155ccf7127ccf374e6e50aa873253a07 Mon Sep 17 00:00:00 2001 From: Emanuel Ferreira <contatoferreirads@gmail.com> Date: Thu, 1 Feb 2024 07:46:33 -0300 Subject: [PATCH] fix(cyclic): remove cyclic structures from transform hash (#500) --- .changeset/three-stingrays-build.md | 5 +++++ packages/core/src/ingestion/IngestionCache.ts | 22 ++++++++++++++++++- .../tests/ingestion/IngestionCache.test.ts | 7 ++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 .changeset/three-stingrays-build.md diff --git a/.changeset/three-stingrays-build.md b/.changeset/three-stingrays-build.md new file mode 100644 index 000000000..516d535b7 --- /dev/null +++ b/.changeset/three-stingrays-build.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +fix(cyclic): remove cyclic structures from transform hash diff --git a/packages/core/src/ingestion/IngestionCache.ts b/packages/core/src/ingestion/IngestionCache.ts index 929f3a5d4..a98459af8 100644 --- a/packages/core/src/ingestion/IngestionCache.ts +++ b/packages/core/src/ingestion/IngestionCache.ts @@ -5,6 +5,25 @@ import { SimpleKVStore } from "../storage/kvStore/SimpleKVStore"; import { BaseKVStore } from "../storage/kvStore/types"; import { TransformComponent } from "./types"; +const transformToJSON = (obj: TransformComponent) => { + let seen: any[] = []; + + const replacer = (key: string, value: any) => { + if (value != null && typeof value == "object") { + if (seen.indexOf(value) >= 0) { + return; + } + seen.push(value); + } + return value; + }; + + // this is a custom replacer function that will allow us to handle circular references + const jsonStr = JSON.stringify(obj, replacer); + + return jsonStr; +}; + export function getTransformationHash( nodes: BaseNode[], transform: TransformComponent, @@ -13,7 +32,8 @@ export function getTransformationHash( .map((node) => node.getContent(MetadataMode.ALL)) .join(""); - const transformString: string = JSON.stringify(transform); + const transformString: string = transformToJSON(transform); + const hash = createSHA256(); hash.update(nodesStr + transformString); return hash.digest(); diff --git a/packages/core/src/tests/ingestion/IngestionCache.test.ts b/packages/core/src/tests/ingestion/IngestionCache.test.ts index 52a27801e..bc4ee1733 100644 --- a/packages/core/src/tests/ingestion/IngestionCache.test.ts +++ b/packages/core/src/tests/ingestion/IngestionCache.test.ts @@ -71,4 +71,11 @@ describe("getTransformationHash", () => { ); expect(result1).not.toBe(result2); }); + + test("should not break with circular references", () => { + const obj: any = { a: 1, b: 2 }; + obj["circular"] = obj; + const result = getTransformationHash(nodes, obj); + expect(typeof result).toBe("string"); + }); }); -- GitLab