From 66d59c862e3b82f0e6153ec12ab848bfb9e4102f Mon Sep 17 00:00:00 2001 From: Sourabh Desai <sourabhdesai@gmail.com> Date: Thu, 22 Jun 2023 07:22:36 +0000 Subject: [PATCH] update node types and doc serialization/deserialization --- packages/core/src/Document.ts | 18 ++++++++++++++++-- packages/core/src/Node.ts | 12 +++++------- packages/core/src/storage/constants.ts | 4 +--- packages/core/src/storage/docStore/utils.ts | 12 ++++++++---- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/packages/core/src/Document.ts b/packages/core/src/Document.ts index 0c1618cbe..2b2fc3a87 100644 --- a/packages/core/src/Document.ts +++ b/packages/core/src/Document.ts @@ -1,4 +1,12 @@ import { v4 as uuidv4 } from "uuid"; + +export enum NodeType { + DOCUMENT, + TEXT, + IMAGE, + INDEX, +} + export abstract class BaseDocument { text: string; docId?: string; @@ -45,14 +53,20 @@ export abstract class BaseDocument { getDocHash() { return this.docHash; } + + abstract getType(): NodeType; } export class Document extends BaseDocument { - static getType() { - return "Document"; + getType() { + return NodeType.DOCUMENT; } } export class ImageDocument extends Document { image?: string; + + getType() { + return NodeType.IMAGE; + } } \ No newline at end of file diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts index bc1872e80..8247e104b 100644 --- a/packages/core/src/Node.ts +++ b/packages/core/src/Node.ts @@ -1,4 +1,4 @@ -import { BaseDocument } from "./Document"; +import { BaseDocument, NodeType } from "./Document"; export enum DocumentRelationship { SOURCE = "source", @@ -8,12 +8,6 @@ export enum DocumentRelationship { CHILD = "child", } -export enum NodeType { - TEXT, - IMAGE, - INDEX, -} - export class Node extends BaseDocument { relationships: { [key in DocumentRelationship]: string | string[] | null }; @@ -61,6 +55,10 @@ export class Node extends BaseDocument { childNodeIds(): string[] { return []; } + + getType() { + return NodeType.TEXT; + } } export interface NodeWithEmbedding { diff --git a/packages/core/src/storage/constants.ts b/packages/core/src/storage/constants.ts index e00fa7bc1..a0ded7c10 100644 --- a/packages/core/src/storage/constants.ts +++ b/packages/core/src/storage/constants.ts @@ -5,6 +5,4 @@ export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "docstore.json"; export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json"; export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json"; export const DEFAULT_NAMESPACE = "docstore"; -export const TYPE_KEY = "__type__"; -export const DATA_KEY = "__data__"; -export { DEFAULT_FS } from "./FileSystem"; \ No newline at end of file +export { DEFAULT_FS } from "./FileSystem"; diff --git a/packages/core/src/storage/docStore/utils.ts b/packages/core/src/storage/docStore/utils.ts index e7667ed5b..059174429 100644 --- a/packages/core/src/storage/docStore/utils.ts +++ b/packages/core/src/storage/docStore/utils.ts @@ -1,6 +1,8 @@ import { Node } from "../../Node"; -import { BaseDocument, NodeType, Document } from '../../Document'; -import { DATA_KEY, TYPE_KEY } from '../constants'; +import { BaseDocument, Document, NodeType } from '../../Document'; + +const TYPE_KEY = "__type__"; +const DATA_KEY = "__data__"; export function docToJson(doc: BaseDocument): Record<string, any> { @@ -16,9 +18,11 @@ export function jsonToDoc(docDict: Record<string, any>): BaseDocument { let doc: BaseDocument; if (docType === NodeType.DOCUMENT) { - doc = new Document(dataDict.docId, dataDict.text); + doc = new Document(dataDict.text, dataDict.docId, dataDict.embedding, dataDict.docHash); } else if (docType === NodeType.TEXT) { - doc = new Node(dataDict.relationships); + const reslationships = dataDict.relationships; + doc = new Node(reslationships.text, reslationships.docId, + reslationships.embedding, reslationships.docHash); } else { throw new Error(`Unknown doc type: ${docType}`); } -- GitLab