-
Goran authored
Co-authored-by:
Alex Yang <himself65@outlook.com>
Goran authoredCo-authored-by:
Alex Yang <himself65@outlook.com>
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
IngestionCache.ts 2.00 KiB
import type { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import { MetadataMode } from "@llamaindex/core/schema";
import { createSHA256 } from "@llamaindex/env";
import {
docToJson,
jsonSerializer,
jsonToDoc,
} from "../storage/docStore/utils.js";
import { SimpleKVStore } from "../storage/kvStore/SimpleKVStore.js";
import type { BaseKVStore } from "../storage/kvStore/types.js";
const transformToJSON = (obj: TransformComponent) => {
const seen: any[] = [];
const replacer = (key: string, value: any) => {
if (value != null && typeof value == "object") {
if (seen.indexOf(value) >= 0) {
return;
}
seen.push(value);
}
return value;
};
// this is a custom replacer function that will allow us to handle circular references
const jsonStr = JSON.stringify(obj, replacer);
return jsonStr;
};
export function getTransformationHash(
nodes: BaseNode[],
transform: TransformComponent,
) {
const nodesStr: string = nodes
.map((node) => node.getContent(MetadataMode.ALL))
.join("");
const transformString: string = transformToJSON(transform);
const hash = createSHA256();
hash.update(nodesStr + transformString + transform.id);
return hash.digest();
}
export class IngestionCache {
collection: string = "llama_cache";
cache: BaseKVStore;
nodesKey = "nodes";
constructor(collection?: string) {
if (collection) {
this.collection = collection;
}
this.cache = new SimpleKVStore();
}
async put(hash: string, nodes: BaseNode[]) {
const val = {
[this.nodesKey]: nodes.map((node) => docToJson(node, jsonSerializer)),
};
await this.cache.put(hash, val, this.collection);
}
async get(hash: string): Promise<BaseNode[] | undefined> {
const json = await this.cache.get(hash, this.collection);
if (!json || !json[this.nodesKey] || !Array.isArray(json[this.nodesKey])) {
return undefined;
}
return json[this.nodesKey].map((doc: any) =>
jsonToDoc(doc, jsonSerializer),
);
}
}