Skip to content
Snippets Groups Projects
Unverified Commit 95602c79 authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

feat: overide generate hash function for image document (#751)

parent 20bc466c
No related branches found
No related tags found
No related merge requests found
...@@ -4,7 +4,6 @@ import { ...@@ -4,7 +4,6 @@ import {
VectorStoreIndex, VectorStoreIndex,
storageContextFromDefaults, storageContextFromDefaults,
} from "llamaindex"; } from "llamaindex";
import { DocStoreStrategy } from "llamaindex/ingestion/strategies/index";
import * as path from "path"; import * as path from "path";
...@@ -32,7 +31,6 @@ async function generateDatasource() { ...@@ -32,7 +31,6 @@ async function generateDatasource() {
}); });
await VectorStoreIndex.fromDocuments(documents, { await VectorStoreIndex.fromDocuments(documents, {
storageContext, storageContext,
docStoreStrategy: DocStoreStrategy.NONE,
}); });
}); });
console.log(`Storage successfully generated in ${ms / 1000}s.`); console.log(`Storage successfully generated in ${ms / 1000}s.`);
......
...@@ -326,6 +326,37 @@ export class ImageNode<T extends Metadata = Metadata> extends TextNode<T> { ...@@ -326,6 +326,37 @@ export class ImageNode<T extends Metadata = Metadata> extends TextNode<T> {
const absPath = path.resolve(this.id_); const absPath = path.resolve(this.id_);
return new URL(`file://${absPath}`); return new URL(`file://${absPath}`);
} }
// Calculates the image part of the hash
private generateImageHash() {
const hashFunction = createSHA256();
if (this.image instanceof Blob) {
// TODO: ideally we should use the blob's content to calculate the hash:
// hashFunction.update(new Uint8Array(await this.image.arrayBuffer()));
// as this is async, we're using the node's ID for the time being
hashFunction.update(this.id_);
} else if (this.image instanceof URL) {
hashFunction.update(this.image.toString());
} else if (typeof this.image === "string") {
hashFunction.update(this.image);
} else {
throw new Error(
`Unknown image type: ${typeof this.image}. Can't calculate hash`,
);
}
return hashFunction.digest();
}
generateHash() {
const hashFunction = createSHA256();
// calculates hash based on hash of both components (image and text)
hashFunction.update(super.generateHash());
hashFunction.update(this.generateImageHash());
return hashFunction.digest();
}
} }
export class ImageDocument<T extends Metadata = Metadata> extends ImageNode<T> { export class ImageDocument<T extends Metadata = Metadata> extends ImageNode<T> {
......
import { Document } from "llamaindex/Node"; import { Document, ImageDocument } from "llamaindex/Node";
import { describe, expect, test } from "vitest"; import { describe, expect, test } from "vitest";
describe("Document", () => { describe("Document", () => {
...@@ -6,4 +6,16 @@ describe("Document", () => { ...@@ -6,4 +6,16 @@ describe("Document", () => {
const doc = new Document({ text: "text", id_: "docId" }); const doc = new Document({ text: "text", id_: "docId" });
expect(doc).toBeDefined(); expect(doc).toBeDefined();
}); });
test("should generate different hash for different image contents", () => {
const imageNode1 = new ImageDocument({
id_: "image",
image: "data:image/png;base64,sample_image_content1",
});
const imageNode2 = new ImageDocument({
id_: "image",
image: "data:image/png;base64,sample_image_content2",
});
expect(imageNode1.hash).not.toBe(imageNode2.hash);
});
}); });
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment