diff --git a/README.md b/README.md
index 11b97a378370fe6452c24be8eff31bda55f02c11..9e6d712e712401354c747f1a667c09394b72b228 100644
--- a/README.md
+++ b/README.md
@@ -9,3 +9,56 @@ Right now there are two packages of importance:
 packages/core which is the main NPM library @llamaindex/core
 
 apps/simple is where the demo code lives
+
+### Turborepo docs
+
+You can checkout how Turborepo works using the built in [README-turborepo.md](README-turborepo.md)
+
+## Getting Started
+
+Install NodeJS. Preferably v18 using nvm or n.
+
+Inside the llamascript directory:
+
+```
+npm i -g pnpm ts-node
+pnpm install
+```
+
+Note: we use pnpm in this repo, which has a lot of the same functionality and CLI options as npm but it does do some things better in a monorepo, like centralizing dependencies and caching.
+
+PNPM's has documentation on its [workspace feature](https://pnpm.io/workspaces) and Turborepo had some [useful documentation also](https://turbo.build/repo/docs/core-concepts/monorepos/running-tasks).
+
+### Running Typescript
+
+When we publish to NPM we will have a tsc compiled version of the library in JS. For now, the easiest thing to do is use ts-node.
+
+### Test cases
+
+To run them, run
+
+```
+pnpm run test
+```
+
+To write new test cases write them in packages/core/src/tests
+
+We use Jest https://jestjs.io/ to write our test cases. Jest comes with a bunch of built in assertions using the expect function: https://jestjs.io/docs/expect
+
+### Demo applications
+
+You can create new demo applications in the apps folder. Just run pnpm init in the folder after you create it to create its own package.json
+
+### Installing packages
+
+To install packages for a specific package or demo application, run
+
+```
+pnpm add [NPM Package] --filter [package or application i.e. core or simple]
+```
+
+To install packages for every package or application run
+
+```
+pnpm add -w [NPM Package]
+```
diff --git a/apps/simple/index.ts b/apps/simple/index.ts
index 254c8703f8247e8728b35d962737603aa8fd30c6..d7e804240f0158e99b27333501ed615e61fa4ca3 100644
--- a/apps/simple/index.ts
+++ b/apps/simple/index.ts
@@ -2,8 +2,12 @@ import { Document } from "@llamaindex/core/src/Document";
 import { VectorStoreIndex } from "@llamaindex/core/src/BaseIndex";
 import essay from "./essay";
 
-const document = new Document("doc1", essay);
-const index = VectorStoreIndex.fromDocuments([document]);
-const queryEngine = index.asQueryEngine();
-const response = await queryEngine.aquery("foo");
-console.log(response);
+(async () => {
+  const document = new Document(essay);
+  const index = await VectorStoreIndex.fromDocuments([document]);
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.aquery(
+    "What did the author do growing up?"
+  );
+  console.log(response.toString());
+})();
diff --git a/apps/simple/simple.txt b/apps/simple/simple.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cd89b8d0d51646af616cc9f784f61fcb1469bf1
--- /dev/null
+++ b/apps/simple/simple.txt
@@ -0,0 +1,9 @@
+Simple flow:
+
+Get document list, in this case one document.
+Split each document into nodes, in this case sentences or lines.
+Embed each of the nodes and get vectors. Store them in memory for now.
+Embed query.
+Compare query with nodes and get the top n
+Put the top n nodes into the prompt.
+Execute prompt, get result.
diff --git a/package.json b/package.json
index 5bf808928028a6b3065e1ed206c16e13b796e777..e177cc22af4e303b477911fd422ae619ee7a2135 100644
--- a/package.json
+++ b/package.json
@@ -16,7 +16,9 @@
     "prettier": "^2.5.1",
     "prettier-plugin-tailwindcss": "^0.3.0",
     "ts-jest": "^29.1.0",
-    "turbo": "latest"
+    "turbo": "latest",
+    "wink-nlp": "latest",
+    "tiktoken-node": "latest"
   },
   "packageManager": "pnpm@7.15.0",
   "name": "llamascript"
diff --git a/packages/core/package.json b/packages/core/package.json
index be8e1a8d2f176a313543a23615691ab7ea6e530f..bd0debc64778c5e183a5394b3d1856115378c4fe 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -4,7 +4,8 @@
     "axios": "^0.26.1",
     "js-tiktoken": "^1.0.7",
     "lodash": "^4.17.21",
-    "openai": "^3.3.0"
+    "openai": "^3.3.0",
+    "uuid": "^9.0.0"
   },
   "main": "src/index.ts",
   "types": "src/index.ts",
@@ -13,7 +14,8 @@
   },
   "devDependencies": {
     "@types/lodash": "^4.14.195",
-    "@types/node": "^20.3.1",
+    "@types/node": "^18",
+    "@types/uuid": "^9.0.2",
     "node-stdlib-browser": "^1.2.0"
   }
 }
diff --git a/packages/core/src/BaseIndex.ts b/packages/core/src/BaseIndex.ts
index 2bc40619dc68f47c697fb88fcfb737c48dde3fe6..9934e56071b23a91b65a536dabfcbe041a5c38a5 100644
--- a/packages/core/src/BaseIndex.ts
+++ b/packages/core/src/BaseIndex.ts
@@ -1,19 +1,90 @@
 import { Document } from "./Document";
-import { Node } from "./Node";
-import { BaseQueryEngine } from "./QueryEngine";
-
+import { Node, NodeWithEmbedding } from "./Node";
+import { SimpleNodeParser } from "./NodeParser";
+import { BaseQueryEngine, RetrieverQueryEngine } from "./QueryEngine";
+import { v4 as uuidv4 } from "uuid";
+import { VectorIndexRetriever } from "./Retriever";
+import { BaseEmbedding, OpenAIEmbedding } from "./Embedding";
 export class BaseIndex {
-  constructor(nodes?: Node[]) {}
+  nodes: Node[] = [];
 
-  asQueryEngine(): BaseQueryEngine {
-    console.log("asQueryEngine");
-    return new BaseQueryEngine();
+  constructor(nodes?: Node[]) {
+    this.nodes = nodes ?? [];
+  }
+}
+
+export class IndexDict {
+  indexId: string;
+  summary?: string;
+  nodesDict: Record<string, Node> = {};
+  docStore: Record<string, Document> = {}; // FIXME: this should be implemented in storageContext
+
+  constructor(indexId = uuidv4(), summary = undefined) {
+    this.indexId = indexId;
+    this.summary = summary;
+  }
+
+  getSummary(): string {
+    if (this.summary === undefined) {
+      throw new Error("summary field of the index dict is not set");
+    }
+    return this.summary;
+  }
+
+  addNode(node: Node, textId?: string) {
+    const vectorId = textId ?? node.getDocId();
+    this.nodesDict[vectorId] = node;
   }
 }
 
 export class VectorStoreIndex extends BaseIndex {
-  static fromDocuments(documents: Document[]): VectorStoreIndex {
-    console.log("fromDocuments");
-    return new VectorStoreIndex();
+  indexStruct: IndexDict;
+  nodesWithEmbeddings: NodeWithEmbedding[] = []; // FIXME replace with storage context
+  embeddingService: BaseEmbedding; // FIXME replace with service context
+
+  constructor(nodes: Node[]) {
+    super(nodes);
+    this.indexStruct = new IndexDict();
+
+    if (nodes !== undefined) {
+      this.buildIndexFromNodes();
+    }
+
+    this.embeddingService = new OpenAIEmbedding();
+  }
+
+  async getNodeEmbeddingResults(logProgress = false) {
+    for (let i = 0; i < this.nodes.length; ++i) {
+      const node = this.nodes[i];
+      if (logProgress) {
+        console.log(`getting embedding for node ${i}/${this.nodes.length}`);
+      }
+      const embedding = await this.embeddingService.aGetTextEmbedding(
+        node.getText()
+      );
+      this.nodesWithEmbeddings.push({ node: node, embedding: embedding });
+    }
+  }
+
+  buildIndexFromNodes() {
+    for (const node of this.nodes) {
+      this.indexStruct.addNode(node);
+    }
+  }
+
+  static async fromDocuments(documents: Document[]): Promise<VectorStoreIndex> {
+    const nodeParser = new SimpleNodeParser(); // FIXME use service context
+    const nodes = nodeParser.getNodesFromDocuments(documents);
+    const index = new VectorStoreIndex(nodes);
+    await index.getNodeEmbeddingResults();
+    return index;
+  }
+
+  asRetriever(): VectorIndexRetriever {
+    return new VectorIndexRetriever(this, this.embeddingService);
+  }
+
+  asQueryEngine(): BaseQueryEngine {
+    return new RetrieverQueryEngine(this.asRetriever());
   }
 }
diff --git a/packages/core/src/Document.ts b/packages/core/src/Document.ts
index 0aa82e7e8c04deb5e8b06bebd27e3fd6a1704bc8..0c1618cbe2c11900d9a2e8529d80b225afe0cbbf 100644
--- a/packages/core/src/Document.ts
+++ b/packages/core/src/Document.ts
@@ -1,50 +1,58 @@
-export enum NodeType {
-  DOCUMENT,
-  TEXT,
-  IMAGE,
-  INDEX,
-}
-
-export interface BaseDocument {
-  getText(): string;
-  getDocId(): string;
-  getDocHash(): string;
-  getEmbedding(): number[];
-  getType(): NodeType;
-}
-
-export class Document implements BaseDocument {
-  docId: string;
+import { v4 as uuidv4 } from "uuid";
+export abstract class BaseDocument {
   text: string;
-  // embedding: number[];
-  // docHash: string;
-
-  constructor(docId: string, text: string) {
-    this.docId = docId;
+  docId?: string;
+  embedding?: number[];
+  docHash?: string;
+
+  constructor(
+    text: string,
+    docId?: string,
+    embedding?: number[],
+    docHash?: string
+  ) {
     this.text = text;
+    this.docId = docId;
+    this.embedding = embedding;
+    this.docHash = docHash;
+
+    if (!docId) {
+      this.docId = uuidv4();
+    }
   }
 
   getText() {
-    console.log("getText");
-    return "";
+    if (this.text === undefined) {
+      throw new Error("Text not set");
+    }
+    return this.text;
   }
 
   getDocId() {
-    console.log("getDocId");
-    return "";
+    if (this.docId === undefined) {
+      throw new Error("doc id not set");
+    }
+    return this.docId;
   }
 
-  getDocHash() {
-    console.log("getDocHash");
-    return "";
+  getEmbedding() {
+    if (this.embedding === undefined) {
+      throw new Error("Embedding not set");
+    }
+    return this.embedding;
   }
 
-  getEmbedding() {
-    console.log("getEmbedding");
-    return [];
+  getDocHash() {
+    return this.docHash;
   }
+}
 
-  getType() {
-    return NodeType.DOCUMENT;
+export class Document extends BaseDocument {
+  static getType() {
+    return "Document";
   }
 }
+
+export class ImageDocument extends Document {
+  image?: string;
+}
\ No newline at end of file
diff --git a/packages/core/src/Embedding.ts b/packages/core/src/Embedding.ts
index 1d036e6ef99547febd7a51253d44db8347f89217..cc9c551208a157f40393f20626f00c0b697d237b 100644
--- a/packages/core/src/Embedding.ts
+++ b/packages/core/src/Embedding.ts
@@ -1,29 +1,111 @@
+import { DEFAULT_SIMILARITY_TOP_K } from "./constants";
+import { OpenAISession, getOpenAISession } from "./openai";
+
 export enum SimilarityType {
   DEFAULT = "cosine",
   DOT_PRODUCT = "dot_product",
   EUCLIDEAN = "euclidean",
 }
 
-export class BaseEmbedding {
-  getQueryEmbedding(query: string): number[] {
-    return [];
+export function getTopKEmbeddings(
+  query_embedding: number[],
+  embeddings: number[][],
+  similarityTopK: number = DEFAULT_SIMILARITY_TOP_K,
+  embeddingIds: any[] | null = null,
+  similarityCutoff: number | null = null
+): [number[], any[]] {
+  if (embeddingIds == null) {
+    embeddingIds = Array(embeddings.length).map((_, i) => i);
+  }
+
+  if (embeddingIds.length !== embeddings.length) {
+    throw new Error(
+      "getTopKEmbeddings: embeddings and embeddingIds length mismatch"
+    );
+  }
+
+  let similarities: { similarity: number; id: number }[] = [];
+
+  for (let i = 0; i < embeddings.length; i++) {
+    let similarity = BaseEmbedding.similarity(query_embedding, embeddings[i]);
+    if (similarityCutoff == null || similarity > similarityCutoff) {
+      similarities.push({ similarity: similarity, id: embeddingIds[i] });
+    }
   }
 
-  getTextEmbedding(text: string): number[] {
-    return [];
+  similarities.sort((a, b) => b.similarity - a.similarity); // Reverse sort
+
+  let resultSimilarities: number[] = [];
+  let resultIds: any[] = [];
+
+  for (let i = 0; i < similarityTopK; i++) {
+    if (i >= similarities.length) {
+      break;
+    }
+    resultSimilarities.push(similarities[i].similarity);
+    resultIds.push(similarities[i].id);
   }
 
-  similarity(
+  return [resultSimilarities, resultIds];
+}
+
+export abstract class BaseEmbedding {
+  static similarity(
     embedding1: number[],
     embedding2: number[],
-    mode: SimilarityType
+    mode: SimilarityType = SimilarityType.DOT_PRODUCT
   ): number {
-    return 0;
+    if (embedding1.length !== embedding2.length) {
+      throw new Error("Embedding length mismatch");
+    }
+
+    if (mode === SimilarityType.DOT_PRODUCT) {
+      let result = 0;
+      for (let i = 0; i < embedding1.length; i++) {
+        result += embedding1[i] * embedding2[i];
+      }
+      return result;
+    } else {
+      throw new Error("Not implemented yet");
+    }
   }
+
+  abstract aGetTextEmbedding(text: string): Promise<number[]>;
+  abstract aGetQueryEmbedding(query: string): Promise<number[]>;
 }
 
 enum OpenAIEmbeddingModelType {
   TEXT_EMBED_ADA_002 = "text-embedding-ada-002",
 }
 
-export class OpenAIEmbedding extends BaseEmbedding {}
+export class OpenAIEmbedding extends BaseEmbedding {
+  session: OpenAISession;
+  model: OpenAIEmbeddingModelType;
+
+  constructor() {
+    super();
+
+    this.session = getOpenAISession();
+    this.model = OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002;
+  }
+
+  private async _aGetOpenAIEmbedding(input: string) {
+    input = input.replace(/\n/g, " ");
+    //^ NOTE this performance helper is in the OpenAI python library but may not be in the JS library
+
+    const { data } = await this.session.openai.createEmbedding({
+      model: this.model,
+      input,
+    });
+
+    return data.data[0].embedding;
+  }
+
+  async aGetTextEmbedding(text: string): Promise<number[]> {
+    return this._aGetOpenAIEmbedding(text);
+  }
+
+  async aGetQueryEmbedding(query: string): Promise<number[]> {
+    return this._aGetOpenAIEmbedding(query);
+  }
+}
diff --git a/packages/core/src/LLMPredictor.ts b/packages/core/src/LLMPredictor.ts
index 522965a709322a989fbc58d64251fb770de2715a..0a7618b8d845d3752d2a3679cd85fa4d15498794 100644
--- a/packages/core/src/LLMPredictor.ts
+++ b/packages/core/src/LLMPredictor.ts
@@ -1,27 +1,40 @@
+import { ChatOpenAI } from "./LanguageModel";
+
 export interface BaseLLMPredictor {
   getLlmMetadata(): Promise<any>;
-  predict(prompt: string, options: any): Promise<any>;
-  stream(prompt: string, options: any): Promise<any>;
+  predict(prompt: string, options: any): Promise<string>;
+  // stream(prompt: string, options: any): Promise<any>;
 }
 
-export class LLMPredictor implements BaseLLMPredictor {
+export class ChatOpenAILLMPredictor implements BaseLLMPredictor {
   llm: string;
   retryOnThrottling: boolean;
+  languageModel: ChatOpenAI;
 
-  constructor(llm: string, retryOnThrottling: boolean = true) {
+  constructor(
+    llm: string = "gpt-3.5-turbo",
+    retryOnThrottling: boolean = true
+  ) {
     this.llm = llm;
     this.retryOnThrottling = retryOnThrottling;
+
+    this.languageModel = new ChatOpenAI(this.llm);
   }
 
   async getLlmMetadata() {
-    console.log("getLlmMetadata");
+    throw new Error("Not implemented yet");
   }
 
   async predict(prompt: string, options: any) {
-    console.log("predict");
+    return this.languageModel.agenerate([
+      {
+        content: prompt,
+        type: "human",
+      },
+    ]);
   }
 
-  async stream(prompt: string, options: any) {
-    console.log("stream");
-  }
+  // async stream(prompt: string, options: any) {
+  //   console.log("stream");
+  // }
 }
diff --git a/packages/core/src/LanguageModel.ts b/packages/core/src/LanguageModel.ts
new file mode 100644
index 0000000000000000000000000000000000000000..3ca0af7cfbd3b31323de13b1ce33daf16dc308fb
--- /dev/null
+++ b/packages/core/src/LanguageModel.ts
@@ -0,0 +1,80 @@
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  Configuration,
+  OpenAISession,
+  OpenAIWrapper,
+  getOpenAISession,
+} from "./openai";
+
+interface LLMResult {}
+
+interface BaseLanguageModel {}
+
+type MessageType = "human" | "ai" | "system" | "generic" | "function";
+
+interface BaseMessage {
+  content: string;
+  type: MessageType;
+}
+
+interface Generation {
+  text: string;
+  generationInfo?: { [key: string]: any };
+}
+
+interface LLMResult {
+  generations: Generation[][]; // Each input can have more than one generations
+}
+
+class BaseChatModel implements BaseLanguageModel {}
+
+export class ChatOpenAI extends BaseChatModel {
+  model: string;
+  temperature: number = 0.7;
+  openAIKey: string | null = null;
+  requestTimeout: number | null = null;
+  maxRetries: number = 6;
+  n: number = 1;
+  maxTokens?: number;
+
+  session: OpenAISession;
+
+  constructor(model: string = "gpt-3.5-turbo") {
+    super();
+    this.model = model;
+    this.session = getOpenAISession();
+  }
+
+  static mapMessageType(
+    type: MessageType
+  ): ChatCompletionRequestMessageRoleEnum {
+    switch (type) {
+      case "human":
+        return "user";
+      case "ai":
+        return "assistant";
+      case "system":
+        return "system";
+      case "function":
+        return "function";
+      default:
+        return "user";
+    }
+  }
+
+  async agenerate(messages: BaseMessage[]) {
+    const { data } = await this.session.openai.createChatCompletion({
+      model: this.model,
+      temperature: this.temperature,
+      max_tokens: this.maxTokens,
+      n: this.n,
+      messages: messages.map((message) => ({
+        role: ChatOpenAI.mapMessageType(message.type),
+        content: message.content,
+      })),
+    });
+
+    const content = data.choices[0].message?.content ?? "";
+    return content;
+  }
+}
diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts
index 1a27ad219f5d3d53d4a19864f2dc4958999ec373..bc1872e807673ae4fc1f5b66668db42338a054ea 100644
--- a/packages/core/src/Node.ts
+++ b/packages/core/src/Node.ts
@@ -1,4 +1,4 @@
-import { BaseDocument, NodeType } from "./Document";
+import { BaseDocument } from "./Document";
 
 export enum DocumentRelationship {
   SOURCE = "source",
@@ -8,25 +8,34 @@ export enum DocumentRelationship {
   CHILD = "child",
 }
 
+export enum NodeType {
+  TEXT,
+  IMAGE,
+  INDEX,
+}
 
-export class Node implements BaseDocument {
-  relationships: { [key in DocumentRelationship]: string | string[] };
+export class Node extends BaseDocument {
+  relationships: { [key in DocumentRelationship]: string | string[] | null };
 
-  constructor(relationships: { [key in DocumentRelationship]: string | string[] }) {
-    this.relationships = relationships;
-  }
+  constructor(
+    text: string, // Text is required
+    docId?: string,
+    embedding?: number[],
+    docHash?: string
+  ) {
+    if (text === undefined) {
+      throw new Error("Text is required");
+    }
 
-  getText(): string {
-    throw new Error("Method not implemented.");
-  }
-  getDocId(): string {
-    throw new Error("Method not implemented.");
-  }
-  getDocHash(): string {
-    throw new Error("Method not implemented.");
-  }
-  getEmbedding(): number[] {
-    throw new Error("Method not implemented.");
+    super(text, docId, embedding, docHash);
+
+    this.relationships = {
+      source: null,
+      previous: null,
+      next: null,
+      parent: null,
+      child: [],
+    };
   }
 
   getNodeInfo(): { [key: string]: any } {
@@ -52,8 +61,14 @@ export class Node implements BaseDocument {
   childNodeIds(): string[] {
     return [];
   }
+}
 
-  getType(): NodeType {
-    return NodeType.NODE;
-  }
+export interface NodeWithEmbedding {
+  node: Node;
+  embedding: number[];
 }
+
+export interface NodeWithScore {
+  node: Node;
+  score: number;
+}
\ No newline at end of file
diff --git a/packages/core/src/NodeParser.ts b/packages/core/src/NodeParser.ts
index 37b37c371c07ff9ccb1a9c4acf86256558a94b87..8a13f2f3e4f78be66022e2c2f6ca69ae11228eb3 100644
--- a/packages/core/src/NodeParser.ts
+++ b/packages/core/src/NodeParser.ts
@@ -1,13 +1,57 @@
+import { Document } from "./Document";
+import { Node } from "./Node";
+import { SentenceSplitter } from "./TextSplitter";
+
+export function getTextSplitsFromDocument(
+  document: Document,
+  textSplitter: SentenceSplitter
+) {
+  const text = document.getText();
+  const splits = textSplitter.splitText(text);
+
+  return splits;
+}
+
+export function getNodesFromDocument(
+  document: Document,
+  textSplitter: SentenceSplitter
+) {
+  let nodes: Node[] = [];
+
+  const textSplits = getTextSplitsFromDocument(document, textSplitter);
+
+  textSplits.forEach((textSplit, index) => {
+    const node = new Node(textSplit);
+    node.relationships.source = document.getDocId();
+    nodes.push(node);
+  });
+
+  return nodes;
+}
+
 interface NodeParser {}
+export class SimpleNodeParser implements NodeParser {
+  textSplitter: SentenceSplitter;
 
-class SimpleNodeParser implements NodeParser {
   constructor(
     textSplitter: any = null,
     includeExtraInfo: boolean = true,
     includePrevNextRel: boolean = true
-  ) {}
+  ) {
+    this.textSplitter = textSplitter ?? new SentenceSplitter();
+  }
 
   static fromDefaults(): SimpleNodeParser {
     return new SimpleNodeParser();
   }
+
+  /**
+   * Generate Node objects from documents
+   * @param documents
+   */
+  getNodesFromDocuments(documents: Document[]) {
+    return documents
+      .map((document) => getNodesFromDocument(document, this.textSplitter))
+      .flat();
+  }
 }
diff --git a/packages/core/src/Prompt.ts b/packages/core/src/Prompt.ts
index 94d329d34b0efa2817d76f6035edecbf5ea3448b..2d90617b3affb93a4826be6b6372abc3ec33d511 100644
--- a/packages/core/src/Prompt.ts
+++ b/packages/core/src/Prompt.ts
@@ -1,8 +1,8 @@
 /**
- * A prompt is a function that takes a dictionary of inputs and returns a string.
+ * A SimplePrompt is a function that takes a dictionary of inputs and returns a string.
  * NOTE this is a different interface compared to LlamaIndex Python
  */
-export type Prompt = (input: { [key: string]: string }) => string;
+export type SimplePrompt = (input: { [key: string]: string }) => string;
 
 /*
 DEFAULT_TEXT_QA_PROMPT_TMPL = (
@@ -15,9 +15,7 @@ DEFAULT_TEXT_QA_PROMPT_TMPL = (
 )
 */
 
-export const defaultTextQaPrompt: Prompt = (input: {
-  [key: string]: string;
-}) => {
+export const defaultTextQaPrompt: SimplePrompt = (input) => {
   const { context, query } = input;
 
   return `Context information is below.
@@ -42,9 +40,7 @@ DEFAULT_SUMMARY_PROMPT_TMPL = (
 )
 */
 
-export const defaultSummaryPrompt: Prompt = (input: {
-  [key: string]: string;
-}) => {
+export const defaultSummaryPrompt: SimplePrompt = (input) => {
   const { context } = input;
 
   return `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.
@@ -56,3 +52,30 @@ ${context}
 SUMMARY:"""
 `;
 };
+
+/*
+DEFAULT_REFINE_PROMPT_TMPL = (
+    "The original question is as follows: {query_str}\n"
+    "We have provided an existing answer: {existing_answer}\n"
+    "We have the opportunity to refine the existing answer "
+    "(only if needed) with some more context below.\n"
+    "------------\n"
+    "{context_msg}\n"
+    "------------\n"
+    "Given the new context, refine the original answer to better "
+    "answer the question. "
+    "If the context isn't useful, return the original answer."
+)
+*/
+
+export const defaultRefinePrompt: SimplePrompt = (input) => {
+  const { query, existingAnswer, context } = input;
+
+  return `The original question is as follows: ${query}
+We have provided an existing answer: ${existingAnswer}
+We have the opportunity to refine the existing answer (only if needed) with some more context below.
+------------
+${context}
+------------
+Given the new context, refine the original answer to better answer the question. If the context isn't useful, return the original answer.`;
+};
diff --git a/packages/core/src/QueryEngine.ts b/packages/core/src/QueryEngine.ts
index fb3acbf3bb65390e5911e2426fd056802ff01f26..10dace35954c1f89cb4e4662f1e607d5167d3a49 100644
--- a/packages/core/src/QueryEngine.ts
+++ b/packages/core/src/QueryEngine.ts
@@ -1,7 +1,22 @@
 import { Response } from "./Response";
+import { ResponseSynthesizer } from "./ResponseSynthesizer";
+import { BaseRetriever } from "./Retriever";
 
-export class BaseQueryEngine {
-  async aquery(q: string): Promise<Response> {
-    return new Response();
+export interface BaseQueryEngine {
+  aquery(query: string): Promise<Response>;
+}
+
+export class RetrieverQueryEngine {
+  retriever: BaseRetriever;
+  responseSynthesizer: ResponseSynthesizer;
+
+  constructor(retriever: BaseRetriever) {
+    this.retriever = retriever;
+    this.responseSynthesizer = new ResponseSynthesizer();
+  }
+
+  async aquery(query: string) {
+    const nodes = await this.retriever.aretrieve(query);
+    return this.responseSynthesizer.asynthesize(query, nodes);
   }
 }
diff --git a/packages/core/src/Reader.ts b/packages/core/src/Reader.ts
index 90a25a44eb362a3be7dcf2d5465156af5979f0e4..9ceea3c1f4b80ac84ccc147e8c85cf8b73262dd5 100644
--- a/packages/core/src/Reader.ts
+++ b/packages/core/src/Reader.ts
@@ -5,13 +5,13 @@ export interface BaseReader {
 }
 
 export class SimpleDirectoryReader implements BaseReader {
-  async loadData(options) {
+  async loadData(_options: any) {
     return new Document("1", "");
   }
 }
 
 export class PDFReader implements BaseReader {
-  async loadData(options) {
+  async loadData(_options: any) {
     return new Document("1", "");
   }
 }
diff --git a/packages/core/src/Response.ts b/packages/core/src/Response.ts
index daebe68bde27e28f4762a5cf9f4c3f579af73023..03e0bb823c156bdc67538edade17f467ed8250c6 100644
--- a/packages/core/src/Response.ts
+++ b/packages/core/src/Response.ts
@@ -10,6 +10,10 @@ export class Response {
   }
 
   getFormattedSources() {
-    console.log("getFormattedSources");
+    throw new Error("Not implemented yet");
+  }
+
+  toString() {
+    return this.response ?? "";
   }
 }
diff --git a/packages/core/src/ResponseSynthesizer.ts b/packages/core/src/ResponseSynthesizer.ts
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3b26d3a0abb1f2eef5fcf7df2746eb01a2b35d6b 100644
--- a/packages/core/src/ResponseSynthesizer.ts
+++ b/packages/core/src/ResponseSynthesizer.ts
@@ -0,0 +1,49 @@
+import { ChatOpenAILLMPredictor } from "./LLMPredictor";
+import { NodeWithScore } from "./Node";
+import { SimplePrompt, defaultTextQaPrompt } from "./Prompt";
+import { Response } from "./Response";
+
+interface BaseResponseBuilder {
+  agetResponse(query: string, textChunks: string[]): Promise<string>;
+}
+
+export class SimpleResponseBuilder {
+  llmPredictor: ChatOpenAILLMPredictor;
+  textQATemplate: SimplePrompt;
+
+  constructor() {
+    this.llmPredictor = new ChatOpenAILLMPredictor();
+    this.textQATemplate = defaultTextQaPrompt;
+  }
+
+  async agetResponse(query: string, textChunks: string[]): Promise<string> {
+    const input = {
+      query,
+      context: textChunks.join("\n\n"),
+    };
+
+    const prompt = this.textQATemplate(input);
+    return this.llmPredictor.predict(prompt, {});
+  }
+}
+
+export function getResponseBuilder(): BaseResponseBuilder {
+  return new SimpleResponseBuilder();
+}
+
+export class ResponseSynthesizer {
+  responseBuilder: BaseResponseBuilder;
+
+  constructor() {
+    this.responseBuilder = getResponseBuilder();
+  }
+
+  async asynthesize(query: string, nodes: NodeWithScore[]) {
+    let textChunks: string[] = nodes.map((node) => node.node.text);
+    const response = await this.responseBuilder.agetResponse(query, textChunks);
+    return new Response(
+      response,
+      nodes.map((node) => node.node)
+    );
+  }
+}
diff --git a/packages/core/src/Retriever.ts b/packages/core/src/Retriever.ts
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e9b03cc87dccc523d02309efbc71b355a291c5f8 100644
--- a/packages/core/src/Retriever.ts
+++ b/packages/core/src/Retriever.ts
@@ -0,0 +1,43 @@
+import { VectorStoreIndex } from "./BaseIndex";
+import { BaseEmbedding, OpenAIEmbedding, getTopKEmbeddings } from "./Embedding";
+import { NodeWithScore } from "./Node";
+import { DEFAULT_SIMILARITY_TOP_K } from "./constants";
+
+export interface BaseRetriever {
+  aretrieve(query: string): Promise<any>;
+}
+
+export class VectorIndexRetriever implements BaseRetriever {
+  index: VectorStoreIndex;
+  similarityTopK = DEFAULT_SIMILARITY_TOP_K;
+  embeddingService: BaseEmbedding;
+
+  constructor(index: VectorStoreIndex, embeddingService: BaseEmbedding) {
+    this.index = index;
+    this.embeddingService = embeddingService;
+  }
+
+  async aretrieve(query: string): Promise<NodeWithScore[]> {
+    const queryEmbedding = await this.embeddingService.aGetQueryEmbedding(
+      query
+    );
+    const [similarities, ids] = getTopKEmbeddings(
+      queryEmbedding,
+      this.index.nodesWithEmbeddings.map((node) => node.embedding),
+      undefined,
+      this.index.nodesWithEmbeddings.map((node) => node.node.docId)
+    );
+
+    let nodesWithScores: NodeWithScore[] = [];
+
+    for (let i = 0; i < ids.length; i++) {
+      const node = this.index.indexStruct.nodesDict[ids[i]];
+      nodesWithScores.push({
+        node: node,
+        score: similarities[i],
+      });
+    }
+
+    return nodesWithScores;
+  }
+}
diff --git a/packages/core/src/TextSplitter.ts b/packages/core/src/TextSplitter.ts
index ee86d1e80cb76ac693404d446eebbc2a90133c1d..ba188df5b11a353aa2d127361e0e40fe8557d7e0 100644
--- a/packages/core/src/TextSplitter.ts
+++ b/packages/core/src/TextSplitter.ts
@@ -2,145 +2,241 @@
 
 import { DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP } from "./constants";
 
-class TokenTextSplitter {
-  private _separator: string;
-  private _chunk_size: number;
-  private _chunk_overlap: number;
+class TextSplit {
+  textChunk: string;
+  numCharOverlap: number | undefined;
+
+  constructor(
+    textChunk: string,
+    numCharOverlap: number | undefined = undefined
+  ) {
+    this.textChunk = textChunk;
+    this.numCharOverlap = numCharOverlap;
+  }
+}
+
+type SplitRep = [text: string, numTokens: number];
+
+export class SentenceSplitter {
+  private chunkSize: number;
+  private chunkOverlap: number;
   private tokenizer: any;
-  private _backup_separators: string[];
-  private callback_manager: any;
+  private tokenizerDecoder: any;
+  private paragraphSeparator: string;
+  private chunkingTokenizerFn: any;
+  // private _callback_manager: any;
 
   constructor(
-    separator: string = " ",
-    chunk_size: number = DEFAULT_CHUNK_SIZE,
-    chunk_overlap: number = DEFAULT_CHUNK_OVERLAP,
+    chunkSize: number = DEFAULT_CHUNK_SIZE,
+    chunkOverlap: number = DEFAULT_CHUNK_OVERLAP,
     tokenizer: any = null,
-    backup_separators: string[] = ["\n"]
-    // callback_manager: any = null
+    tokenizerDecoder: any = null,
+    paragraphSeparator: string = "\n\n\n",
+    chunkingTokenizerFn: any = undefined
+    // callback_manager: any = undefined
   ) {
-    if (chunk_overlap > chunk_size) {
+    if (chunkOverlap > chunkSize) {
       throw new Error(
-        `Got a larger chunk overlap (${chunk_overlap}) than chunk size (${chunk_size}), should be smaller.`
+        `Got a larger chunk overlap (${chunkOverlap}) than chunk size (${chunkSize}), should be smaller.`
       );
     }
-    this._separator = separator;
-    this._chunk_size = chunk_size;
-    this._chunk_overlap = chunk_overlap;
-    this.tokenizer = tokenizer || globals_helper.tokenizer;
-    this._backup_separators = backup_separators;
-    // this.callback_manager = callback_manager || new CallbackManager([]);
+    this.chunkSize = chunkSize;
+    this.chunkOverlap = chunkOverlap;
+    // this._callback_manager = callback_manager || new CallbackManager([]);
+
+    if (chunkingTokenizerFn == undefined) {
+      // define a callable mapping a string to a list of strings
+      const defaultChunkingTokenizerFn = (text: string) => {
+        var result = text.match(/[^.?!]+[.!?]+[\])'"`’”]*|.+/g);
+        return result;
+      };
+
+      chunkingTokenizerFn = defaultChunkingTokenizerFn;
+    }
+
+    if (tokenizer == undefined || tokenizerDecoder == undefined) {
+      const tiktoken = require("tiktoken-node");
+      let enc = new tiktoken.getEncoding("gpt2");
+      const default_tokenizer = (text: string) => {
+        return enc.encode(text);
+      };
+      const defaultTokenizerDecoder = (text: string) => {
+        return enc.decode(text);
+      };
+      tokenizer = default_tokenizer;
+      tokenizerDecoder = defaultTokenizerDecoder;
+    }
+    this.tokenizer = tokenizer;
+    this.tokenizerDecoder = tokenizerDecoder;
+
+    this.paragraphSeparator = paragraphSeparator;
+    this.chunkingTokenizerFn = chunkingTokenizerFn;
   }
 
-  private _reduceChunkSize(
-    start_idx: number,
-    cur_idx: number,
-    splits: string[]
-  ): number {
-    let current_doc_total = this.tokenizer(
-      splits.slice(start_idx, cur_idx).join(this._separator)
-    ).length;
-    while (current_doc_total > this._chunk_size) {
-      const percent_to_reduce =
-        (current_doc_total - this._chunk_size) / current_doc_total;
-      const num_to_reduce =
-        parseInt(percent_to_reduce.toString()) * (cur_idx - start_idx) + 1;
-      cur_idx -= num_to_reduce;
-      current_doc_total = this.tokenizer(
-        splits.slice(start_idx, cur_idx).join(this._separator)
-      ).length;
+  private getEffectiveChunkSize(extraInfoStr?: string): number {
+    // get "effective" chunk size by removing the metadata
+    let effectiveChunkSize;
+    if (extraInfoStr != undefined) {
+      const numExtraTokens = this.tokenizer(`${extraInfoStr}\n\n`).length + 1;
+      effectiveChunkSize = this.chunkSize - numExtraTokens;
+      if (effectiveChunkSize <= 0) {
+        throw new Error(
+          "Effective chunk size is non positive after considering extra_info"
+        );
+      }
+    } else {
+      effectiveChunkSize = this.chunkSize;
     }
-    return cur_idx;
+    return effectiveChunkSize;
   }
 
-  _preprocessSplits(splits: Array<string>, chunk_size: number): Array<string> {
-    const new_splits: Array<string> = [];
-    for (const split of splits) {
-      const num_cur_tokens = tokenizer(split).length;
-      if (num_cur_tokens <= chunk_size) {
-        new_splits.push(split);
+  getParagraphSplits(text: string, effectiveChunkSize?: number): string[] {
+    // get paragraph splits
+    let paragraphSplits: string[] = text.split(this.paragraphSeparator);
+    let idx = 0;
+    if (effectiveChunkSize == undefined) {
+      return paragraphSplits;
+    }
+
+    // merge paragraphs that are too small
+    while (idx < paragraphSplits.length) {
+      if (
+        idx < paragraphSplits.length - 1 &&
+        paragraphSplits[idx].length < effectiveChunkSize
+      ) {
+        paragraphSplits[idx] = [
+          paragraphSplits[idx],
+          paragraphSplits[idx + 1],
+        ].join(this.paragraphSeparator);
+        paragraphSplits.splice(idx + 1, 1);
       } else {
-        let cur_splits: Array<string> = [split];
-        if (backup_separators) {
-          for (const sep of backup_separators) {
-            if (split.includes(sep)) {
-              cur_splits = split.split(sep);
-              break;
-            }
-          }
-        } else {
-          cur_splits = [split];
-        }
+        idx += 1;
+      }
+    }
+    return paragraphSplits;
+  }
 
-        const cur_splits2: Array<string> = [];
-        for (const cur_split of cur_splits) {
-          const num_cur_tokens = tokenizer(cur_split).length;
-          if (num_cur_tokens <= chunk_size) {
-            cur_splits2.push(cur_split);
-          } else {
-            // split cur_split according to chunk size of the token numbers
-            const cur_split_chunks: Array<string> = [];
-            let end_idx = cur_split.length;
-            while (tokenizer(cur_split.slice(0, end_idx)).length > chunk_size) {
-              for (let i = 1; i < end_idx; i++) {
-                const tmp_split = cur_split.slice(0, end_idx - i);
-                if (tokenizer(tmp_split).length <= chunk_size) {
-                  cur_split_chunks.push(tmp_split);
-                  cur_splits2.push(cur_split.slice(end_idx - i, end_idx));
-                  end_idx = cur_split.length;
-                  break;
-                }
-              }
-            }
-            cur_split_chunks.push(cur_split);
-            cur_splits2.push(...cur_split_chunks);
-          }
-        }
-        new_splits.push(...cur_splits2);
+  getSentenceSplits(text: string, effectiveChunkSize?: number): string[] {
+    let paragraphSplits = this.getParagraphSplits(text, effectiveChunkSize);
+    // Next we split the text using the chunk tokenizer fn/
+    let splits = [];
+    for (const parText of paragraphSplits) {
+      let sentenceSplits = this.chunkingTokenizerFn(parText);
+      for (const sentence_split of sentenceSplits) {
+        splits.push(sentence_split.trim());
       }
     }
-    return new_splits;
+    return splits;
   }
 
-  _postprocessSplits(docs: TextSplit[]): TextSplit[] {
-    const new_docs: TextSplit[] = [];
-    for (const doc of docs) {
-      if (doc.text_chunk.replace(" ", "") == "") {
-        continue;
+  private processSentenceSplits(
+    sentenceSplits: string[],
+    effectiveChunkSize: number
+  ): SplitRep[] {
+    // Process entence splits
+    // Primarily check if any sentences exceed the chunk size. If they don't,
+    // force split by tokenizer
+    let newSplits: SplitRep[] = [];
+    for (const split of sentenceSplits) {
+      let splitTokens = this.tokenizer(split);
+      const split_len = splitTokens.length;
+      if (split_len <= effectiveChunkSize) {
+        newSplits.push([split, split_len]);
+      } else {
+        for (let i = 0; i < split_len; i += effectiveChunkSize) {
+          const cur_split = this.tokenizerDecoder(
+            splitTokens.slice(i, i + effectiveChunkSize)
+          );
+          newSplits.push([cur_split, effectiveChunkSize]);
+        }
       }
-      new_docs.push(doc);
     }
-    return new_docs;
+    return newSplits;
   }
 
-  splitText(text: string, extra_info_str?: string): string[] {
-    const text_splits = this.splitTextWithOverlaps(text);
-    const chunks = text_splits.map((text_split) => text_split.text_chunk);
-    return chunks;
+  combineTextSplits(
+    newSentenceSplits: SplitRep[],
+    effectiveChunkSize: number
+  ): TextSplit[] {
+    // go through sentence splits, combien to chunks that are within the chunk size
+
+    // docs represents final list of text chunks
+    let docs: TextSplit[] = [];
+    // curDocList represents the current list of sentence splits (that)
+    // will be merged into a chunk
+    let curDocList: string[] = [];
+    let bufferTokens = 0;
+    let curDocTokens = 0;
+    // curDocBuffer represents the current document buffer
+    let curDocBuffer: SplitRep[] = [];
+
+    for (let i = 0; i < newSentenceSplits.length; i++) {
+      // update buffer
+      curDocBuffer.push(newSentenceSplits[i]);
+      bufferTokens += newSentenceSplits[i][1] + 1;
+
+      while (bufferTokens > this.chunkOverlap) {
+        // remove first element from curDocBuffer
+        let first_element = curDocBuffer.shift();
+        if (first_element == undefined) {
+          throw new Error("first_element is undefined");
+        }
+        bufferTokens -= first_element[1];
+        bufferTokens -= 1;
+      }
+
+      // if adding newSentenceSplits[i] to curDocBuffer would exceed effectiveChunkSize,
+      // then we need to add the current curDocBuffer to docs
+      if (curDocTokens + newSentenceSplits[i][1] > effectiveChunkSize) {
+        // push curent doc list to docs
+        docs.push(new TextSplit(curDocList.join(" ").trim()));
+        // reset docs list with buffer
+        curDocTokens = 0;
+        curDocList = [];
+        for (let j = 0; j < curDocBuffer.length; j++) {
+          curDocList.push(curDocBuffer[j][0]);
+          curDocTokens += curDocBuffer[j][1] + 1;
+        }
+      }
+
+      curDocList.push(newSentenceSplits[i][0]);
+      curDocTokens += newSentenceSplits[i][1] + 1;
+    }
+    docs.push(new TextSplit(curDocList.join(" ").trim()));
+    return docs;
   }
 
-  splitTextWithOverlaps(text: string) {}
+  splitTextWithOverlaps(text: string, extraInfoStr?: string): TextSplit[] {
+    // Split incoming text and return chunks with overlap size.
+    // Has a preference for complete sentences, phrases, and minimal overlap.
 
-  truncateText(text: string, separator: string, chunk_size: number): string {
+    // here is the typescript code (skip callback manager)
     if (text == "") {
-      return "";
-    }
-    // First we naively split the large input into a bunch of smaller ones.
-    let splits: string[] = text.split(separator);
-    splits = preprocessSplits(splits, chunk_size);
-
-    let start_idx = 0;
-    let cur_idx = 0;
-    let cur_total = 0;
-    while (cur_idx < splits.length) {
-      let cur_token = splits[cur_idx];
-      let num_cur_tokens = Math.max(tokenizer(cur_token).length, 1);
-      if (cur_total + num_cur_tokens > chunk_size) {
-        cur_idx = reduce_chunk_size(start_idx, cur_idx, splits);
-        break;
-      }
-      cur_total += num_cur_tokens;
-      cur_idx += 1;
+      return [];
     }
-    return splits.slice(start_idx, cur_idx).join(separator);
+
+    let effectiveChunkSize = this.getEffectiveChunkSize(extraInfoStr);
+    let sentenceSplits = this.getSentenceSplits(text, effectiveChunkSize);
+
+    // Check if any sentences exceed the chunk size. If they don't,
+    // force split by tokenizer
+    let newSentenceSplits = this.processSentenceSplits(
+      sentenceSplits,
+      effectiveChunkSize
+    );
+
+    // combine sentence splits into chunks of text that can then be returned
+    let combinedTextSplits = this.combineTextSplits(
+      newSentenceSplits,
+      effectiveChunkSize
+    );
+
+    return combinedTextSplits;
+  }
+
+  splitText(text: string, extraInfoStr?: string): string[] {
+    const text_splits = this.splitTextWithOverlaps(text);
+    const chunks = text_splits.map((text_split) => text_split.textChunk);
+    return chunks;
   }
 }
diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts
index b300580cb16bc72251bc935931020dd14f7d1879..a97ed2b273de9aaf0d38762229c3df8cc406c89f 100644
--- a/packages/core/src/openai.ts
+++ b/packages/core/src/openai.ts
@@ -4,6 +4,13 @@ import {
   CreateCompletionResponse,
   CreateChatCompletionRequest,
   CreateChatCompletionResponse,
+  CreateEmbeddingRequest,
+  CreateEmbeddingResponse,
+  CreateModerationRequest,
+  CreateModerationResponse,
+  CreateEditRequest,
+  CreateEditResponse,
+  Configuration,
 } from "openai";
 import { AxiosRequestConfig, AxiosResponse } from "axios";
 import fetchAdapter from "./fetchAdapter";
@@ -28,6 +35,67 @@ export class OpenAIWrapper extends OpenAIApi {
       ...options,
     });
   }
+
+  createEmbedding(
+    createEmbeddingRequest: CreateEmbeddingRequest,
+    options?: AxiosRequestConfig<any> | undefined
+  ): Promise<AxiosResponse<CreateEmbeddingResponse, any>> {
+    return super.createEmbedding(createEmbeddingRequest, {
+      adapter: fetchAdapter,
+      ...options,
+    });
+  }
+
+  createModeration(
+    createModerationRequest: CreateModerationRequest,
+    options?: AxiosRequestConfig<any> | undefined
+  ): Promise<AxiosResponse<CreateModerationResponse, any>> {
+    return super.createModeration(createModerationRequest, {
+      adapter: fetchAdapter,
+      ...options,
+    });
+  }
+
+  createEdit(
+    createEditRequest: CreateEditRequest,
+    options?: AxiosRequestConfig<any> | undefined
+  ): Promise<AxiosResponse<CreateEditResponse, any>> {
+    return super.createEdit(createEditRequest, {
+      adapter: fetchAdapter,
+      ...options,
+    });
+  }
+}
+
+export class OpenAISession {
+  openAIKey: string | null = null;
+  openai: OpenAIWrapper;
+
+  constructor(openAIKey: string | null = null) {
+    if (openAIKey) {
+      this.openAIKey = openAIKey;
+    } else if (process.env.OPENAI_API_KEY) {
+      this.openAIKey = process.env.OPENAI_API_KEY;
+    } else {
+      throw new Error("Set OpenAI Key in OPENAI_API_KEY env variable");
+    }
+
+    const configuration = new Configuration({
+      apiKey: this.openAIKey,
+    });
+
+    this.openai = new OpenAIWrapper(configuration);
+  }
+}
+
+let defaultOpenAISession: OpenAISession | null = null;
+
+export function getOpenAISession(openAIKey: string | null = null) {
+  if (!defaultOpenAISession) {
+    defaultOpenAISession = new OpenAISession(openAIKey);
+  }
+
+  return defaultOpenAISession;
 }
 
 export * from "openai";
diff --git a/packages/core/src/tests/Document.test.ts b/packages/core/src/tests/Document.test.ts
index 6ef7e02e70a5fd4c6fad122b3f712eddba58e012..de799d517ce5f34315363f0082c8c0c90ae4e0dc 100644
--- a/packages/core/src/tests/Document.test.ts
+++ b/packages/core/src/tests/Document.test.ts
@@ -2,7 +2,7 @@ import { Document } from "../Document";
 
 describe("Document", () => {
   test("initializes", () => {
-    const doc = new Document("docId", "text");
+    const doc = new Document("text", "docId");
     expect(doc).toBeDefined();
   });
 });
diff --git a/packages/core/src/tests/TextSplitter.test.ts b/packages/core/src/tests/TextSplitter.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..1cfd7abf614ffc4db55507b4052e677893dc7166
--- /dev/null
+++ b/packages/core/src/tests/TextSplitter.test.ts
@@ -0,0 +1,43 @@
+import { SentenceSplitter } from "../TextSplitter";
+
+describe("SentenceSplitter", () => {
+  test("initializes", () => {
+    const sentenceSplitter = new SentenceSplitter();
+    expect(sentenceSplitter).toBeDefined();
+  });
+
+  test("splits paragraphs w/o effective chunk size", () => {
+    const sentenceSplitter = new SentenceSplitter(
+      undefined, undefined, undefined, undefined, "\n"
+    );
+    // generate the same line as above but correct syntax errors
+    let splits = sentenceSplitter.getParagraphSplits("This is a paragraph.\nThis is another paragraph.", undefined);
+    expect(splits).toEqual(["This is a paragraph.", "This is another paragraph."]);
+  });
+
+  test("splits paragraphs with effective chunk size", () => {
+    const sentenceSplitter = new SentenceSplitter(
+      undefined, undefined, undefined, undefined, "\n"
+    );
+    // generate the same line as above but correct syntax errors
+    let splits = sentenceSplitter.getParagraphSplits("This is a paragraph.\nThis is another paragraph.", 1000);
+    expect(splits).toEqual(["This is a paragraph.\nThis is another paragraph."]);
+  });
+  
+  test("splits sentences", () => {
+    const sentenceSplitter = new SentenceSplitter();
+    let splits = sentenceSplitter.getSentenceSplits("This is a sentence. This is another sentence.", undefined);
+    expect(splits).toEqual(["This is a sentence.", "This is another sentence."]);
+  });
+
+  test("overall split text", () => {
+    let sentenceSplitter = new SentenceSplitter(5, 0);
+    let splits = sentenceSplitter.splitText("This is a sentence. This is another sentence.");
+    expect(splits).toEqual(["This is a sentence.", "This is another sentence."]);
+
+    sentenceSplitter = new SentenceSplitter(1000);
+    splits = sentenceSplitter.splitText("This is a sentence. This is another sentence.");
+    expect(splits).toEqual(["This is a sentence. This is another sentence."]);
+  });
+
+});
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b34e20b6b74adea7ec53001992f5c179456684fd..508749e672d5c9817cdb3b4f77dfe658c840ecad 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -29,12 +29,18 @@ importers:
       prettier-plugin-tailwindcss:
         specifier: ^0.3.0
         version: 0.3.0(prettier@2.8.8)
+      tiktoken-node:
+        specifier: latest
+        version: 0.0.6
       ts-jest:
         specifier: ^29.1.0
         version: 29.1.0(@babel/core@7.22.5)(jest@29.5.0)(typescript@4.9.5)
       turbo:
         specifier: latest
         version: 1.10.3
+      wink-nlp:
+        specifier: latest
+        version: 1.14.1
 
   apps/docs:
     dependencies:
@@ -128,13 +134,19 @@ importers:
       openai:
         specifier: ^3.3.0
         version: 3.3.0
+      uuid:
+        specifier: ^9.0.0
+        version: 9.0.0
     devDependencies:
       '@types/lodash':
         specifier: ^4.14.195
         version: 4.14.195
       '@types/node':
-        specifier: ^20.3.1
-        version: 20.3.1
+        specifier: ^18
+        version: 18.6.0
+      '@types/uuid':
+        specifier: ^9.0.2
+        version: 9.0.2
       node-stdlib-browser:
         specifier: ^1.2.0
         version: 1.2.0
@@ -1113,10 +1125,6 @@ packages:
     resolution: {integrity: sha512-WZ/6I1GL0DNAo4bb01lGGKTHH8BHJyECepf11kWONg3OJoHq2WYOm16Es1V54Er7NTUXsbDCpKRKdmBc4X2xhA==}
     dev: true
 
-  /@types/node@20.3.1:
-    resolution: {integrity: sha512-EhcH/wvidPy1WeML3TtYFGR83UzjxeWRen9V402T8aUGYsCHOmfoisV3ZSg03gAFIbLq8TnWOJ0f4cALtnSEUg==}
-    dev: true
-
   /@types/prettier@2.7.3:
     resolution: {integrity: sha512-+68kP9yzs4LMp7VNh8gdzMSPZFL44MLGqiHWvttYJe+6qnuVr4Ek9wSBQoveqY/r+LwjCcU29kNVkidwim+kYA==}
     dev: true
@@ -1153,6 +1161,10 @@ packages:
       '@types/node': 18.6.0
     dev: true
 
+  /@types/uuid@9.0.2:
+    resolution: {integrity: sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==}
+    dev: true
+
   /@types/yargs-parser@21.0.0:
     resolution: {integrity: sha512-iO9ZQHkZxHn4mSakYV0vFHAVDyEOIJQrV2uZ06HxEPcx+mt8swXoZHIbaaJ2crJYFfErySgktuTZ3BeLz+XmFA==}
     dev: true
@@ -5211,6 +5223,11 @@ packages:
     resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
     dev: true
 
+  /tiktoken-node@0.0.6:
+    resolution: {integrity: sha512-MiprfzPhoKhCflzl0Jyds0VKibAgUGHfJLvBCAXPpum6Lru6ZoKQGsl8lJP0B94LPpby2B2WveOB2tZVfEZQOQ==}
+    engines: {node: '>= 14'}
+    dev: true
+
   /timers-browserify@2.0.12:
     resolution: {integrity: sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==}
     engines: {node: '>=0.6.0'}
@@ -5518,6 +5535,11 @@ packages:
       which-typed-array: 1.1.9
     dev: true
 
+  /uuid@9.0.0:
+    resolution: {integrity: sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==}
+    hasBin: true
+    dev: false
+
   /v8-compile-cache-lib@3.0.1:
     resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
     dev: true
@@ -5593,6 +5615,10 @@ packages:
     dependencies:
       isexe: 2.0.0
 
+  /wink-nlp@1.14.1:
+    resolution: {integrity: sha512-RIdUZI3ei3OB6OY5f3jNo74fmsfPV7cfwiJ2fvBM1xzGnnl2CjRJmwGwsO04n0xl28vDTtxj6AlhIb74XQLoqQ==}
+    dev: true
+
   /word-wrap@1.2.3:
     resolution: {integrity: sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==}
     engines: {node: '>=0.10.0'}