diff --git a/apps/simple/gptllama.ts b/apps/simple/gptllama.ts index 5497b5e492c3c93a37a454085fe1d211435c07d4..1ecb2a1b01d63d6d854559c95bb118fecccf3b6e 100644 --- a/apps/simple/gptllama.ts +++ b/apps/simple/gptllama.ts @@ -6,7 +6,7 @@ import readline from "node:readline/promises"; import { ChatMessage, LlamaDeuce, OpenAI } from "llamaindex"; (async () => { - const gpt4 = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.9 }); + const gpt4 = new OpenAI({ model: "gpt-4", temperature: 0.9 }); const l2 = new LlamaDeuce({ model: "Llama-2-70b-chat-4bit", temperature: 0.9, diff --git a/examples/directory.ts b/examples/directory.ts new file mode 100644 index 0000000000000000000000000000000000000000..bc4dd592b88c78bf3de53f552108a8b2977aa540 --- /dev/null +++ b/examples/directory.ts @@ -0,0 +1,24 @@ +import { SimpleDirectoryReader } from "llamaindex"; + +function callback( + category: string, + name: string, + status: any, + message?: string, +): boolean { + console.log(category, name, status, message); + if (name.endsWith(".pdf")) { + console.log("I DON'T WANT PDF FILES!"); + return false; + } + return true; +} + +async function main() { + // Load page + const reader = new SimpleDirectoryReader(callback); + const params = { directoryPath: "./data" }; + await reader.loadData(params); +} + +main().catch(console.error); diff --git a/examples/html.ts b/examples/html.ts new file mode 100644 index 0000000000000000000000000000000000000000..76672986180590d13a4b42c9912428ffe7598bb1 --- /dev/null +++ b/examples/html.ts @@ -0,0 +1,21 @@ +import { HTMLReader, VectorStoreIndex } from "llamaindex"; + +async function main() { + // Load page + const reader = new HTMLReader(); + const documents = await reader.loadData("data/18-1_Changelog.html"); + + // Split text and create embeddings. Store them in a VectorStoreIndex + const index = await VectorStoreIndex.fromDocuments(documents); + + // Query the index + const queryEngine = index.asQueryEngine(); + const response = await queryEngine.query( + "What were the notable changes in 18.1?", + ); + + // Output response + console.log(response.toString()); +} + +main().catch(console.error); diff --git a/examples/llmStream.ts b/examples/llmStream.ts new file mode 100644 index 0000000000000000000000000000000000000000..728999681a898034d01a5111de6c8af656b43f45 --- /dev/null +++ b/examples/llmStream.ts @@ -0,0 +1,47 @@ +import { ChatMessage, SimpleChatEngine } from "llamaindex"; +import { stdin as input, stdout as output } from "node:process"; +import readline from "node:readline/promises"; +import { Anthropic } from "../../packages/core/src/llm/LLM"; + +async function main() { + const query: string = ` +Where is Istanbul? + `; + + // const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }); + const llm = new Anthropic(); + const message: ChatMessage = { content: query, role: "user" }; + + //TODO: Add callbacks later + + //Stream Complete + //Note: Setting streaming flag to true or false will auto-set your return type to + //either an AsyncGenerator or a Response. + // Omitting the streaming flag automatically sets streaming to false + + const chatEngine: SimpleChatEngine = new SimpleChatEngine({ + chatHistory: undefined, + llm: llm, + }); + + const rl = readline.createInterface({ input, output }); + while (true) { + const query = await rl.question("Query: "); + + if (!query) { + break; + } + + //Case 1: .chat(query, undefined, true) => Stream + //Case 2: .chat(query, undefined, false) => Response object + //Case 3: .chat(query, undefined) => Response object + const chatStream = await chatEngine.chat(query, undefined, true); + var accumulated_result = ""; + for await (const part of chatStream) { + accumulated_result += part; + process.stdout.write(part); + } + } +} + +main(); diff --git a/examples/mongo.ts b/examples/mongo.ts new file mode 100644 index 0000000000000000000000000000000000000000..5b5f735ffb3f0ada9906d29fc429afa10deedf4c --- /dev/null +++ b/examples/mongo.ts @@ -0,0 +1,68 @@ +import { MongoClient } from "mongodb"; +import { Document } from "../../packages/core/src/Node"; +import { VectorStoreIndex } from "../../packages/core/src/indices"; +import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader"; + +import { stdin as input, stdout as output } from "node:process"; +import readline from "node:readline/promises"; + +async function main() { + //Dummy test code + const query: object = { _id: "waldo" }; + const options: object = {}; + const projections: object = { embedding: 0 }; + const limit: number = Infinity; + const uri: string = process.env.MONGODB_URI ?? "fake_uri"; + const client: MongoClient = new MongoClient(uri); + + //Where the real code starts + const MR = new SimpleMongoReader(client); + const documents: Document[] = await MR.loadData( + "data", + "posts", + 1, + {}, + options, + projections, + ); + + // + //If you need to look at low-level details of + // a queryEngine (for example, needing to check each individual node) + // + + // Split text and create embeddings. Store them in a VectorStoreIndex + // var storageContext = await storageContextFromDefaults({}); + // var serviceContext = serviceContextFromDefaults({}); + // const docStore = storageContext.docStore; + + // for (const doc of documents) { + // docStore.setDocumentHash(doc.id_, doc.hash); + // } + // const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents); + // console.log(nodes); + + // + //Making Vector Store from documents + // + + const index = await VectorStoreIndex.fromDocuments(documents); + // Create query engine + const queryEngine = index.asQueryEngine(); + + const rl = readline.createInterface({ input, output }); + while (true) { + const query = await rl.question("Query: "); + + if (!query) { + break; + } + + const response = await queryEngine.query(query); + + // Output response + console.log(response.toString()); + } +} + +main(); diff --git a/examples/vectorIndexCustomize.ts b/examples/vectorIndexCustomize.ts index 5ad55cff6c50defaca130c1e1cefe43857564741..b24e91416b0de23f97feb2659af0c9b55fbe57e7 100644 --- a/examples/vectorIndexCustomize.ts +++ b/examples/vectorIndexCustomize.ts @@ -3,6 +3,7 @@ import { OpenAI, RetrieverQueryEngine, serviceContextFromDefaults, + SimilarityPostprocessor, VectorStoreIndex, } from "llamaindex"; import essay from "./essay"; @@ -21,8 +22,16 @@ async function main() { const retriever = index.asRetriever(); retriever.similarityTopK = 5; + const nodePostprocessor = new SimilarityPostprocessor({ + similarityCutoff: 0.7, + }); // TODO: cannot pass responseSynthesizer into retriever query engine - const queryEngine = new RetrieverQueryEngine(retriever); + const queryEngine = new RetrieverQueryEngine( + retriever, + undefined, + undefined, + [nodePostprocessor], + ); const response = await queryEngine.query( "What did the author do growing up?", diff --git a/examples/vectorIndexFromVectorStore.ts b/examples/vectorIndexFromVectorStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..311bc8c72225e73b1b2cc6f116b1899de22ed796 --- /dev/null +++ b/examples/vectorIndexFromVectorStore.ts @@ -0,0 +1,197 @@ +import { + OpenAI, + ResponseSynthesizer, + RetrieverQueryEngine, + serviceContextFromDefaults, + TextNode, + TreeSummarize, + VectorIndexRetriever, + VectorStore, + VectorStoreIndex, + VectorStoreQuery, + VectorStoreQueryResult, +} from "llamaindex"; + +import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone"; + +/** + * Please do not use this class in production; it's only for demonstration purposes. + */ +class PineconeVectorStore<T extends RecordMetadata = RecordMetadata> + implements VectorStore +{ + storesText = true; + isEmbeddingQuery = false; + + indexName!: string; + pineconeClient!: Pinecone; + index!: Index<T>; + + constructor({ indexName, client }: { indexName: string; client: Pinecone }) { + this.indexName = indexName; + this.pineconeClient = client; + this.index = client.index<T>(indexName); + } + + client() { + return this.pineconeClient; + } + + async query( + query: VectorStoreQuery, + kwargs?: any, + ): Promise<VectorStoreQueryResult> { + let queryEmbedding: number[] = []; + if (query.queryEmbedding) { + if (typeof query.alpha === "number") { + const alpha = query.alpha; + queryEmbedding = query.queryEmbedding.map((v) => v * alpha); + } else { + queryEmbedding = query.queryEmbedding; + } + } + + // Current LlamaIndexTS implementation only support exact match filter, so we use kwargs instead. + const filter = kwargs?.filter || {}; + + const response = await this.index.query({ + filter, + vector: queryEmbedding, + topK: query.similarityTopK, + includeValues: true, + includeMetadata: true, + }); + + console.log( + `Numbers of vectors returned by Pinecone after preFilters are applied: ${ + response?.matches?.length || 0 + }.`, + ); + + const topKIds: string[] = []; + const topKNodes: TextNode[] = []; + const topKScores: number[] = []; + + const metadataToNode = (metadata?: T): Partial<TextNode> => { + if (!metadata) { + throw new Error("metadata is undefined."); + } + + const nodeContent = metadata["_node_content"]; + if (!nodeContent) { + throw new Error("nodeContent is undefined."); + } + + if (typeof nodeContent !== "string") { + throw new Error("nodeContent is not a string."); + } + + return JSON.parse(nodeContent); + }; + + if (response.matches) { + for (const match of response.matches) { + const node = new TextNode({ + ...metadataToNode(match.metadata), + embedding: match.values, + }); + + topKIds.push(match.id); + topKNodes.push(node); + topKScores.push(match.score ?? 0); + } + } + + const result = { + ids: topKIds, + nodes: topKNodes, + similarities: topKScores, + }; + + return result; + } + + add(): Promise<string[]> { + return Promise.resolve([]); + } + + delete(): Promise<void> { + throw new Error("Method `delete` not implemented."); + } + + persist(): Promise<void> { + throw new Error("Method `persist` not implemented."); + } +} + +/** + * The goal of this example is to show how to use Pinecone as a vector store + * for LlamaIndexTS with(out) preFilters. + * + * It should not be used in production like that, + * as you might want to find a proper PineconeVectorStore implementation. + */ +async function main() { + process.env.PINECONE_API_KEY = "Your Pinecone API Key."; + process.env.PINECONE_ENVIRONMENT = "Your Pinecone Environment."; + process.env.PINECONE_PROJECT_ID = "Your Pinecone Project ID."; + process.env.PINECONE_INDEX_NAME = "Your Pinecone Index Name."; + process.env.OPENAI_API_KEY = "Your OpenAI API Key."; + process.env.OPENAI_API_ORGANIZATION = "Your OpenAI API Organization."; + + const getPineconeVectorStore = async () => { + return new PineconeVectorStore({ + indexName: process.env.PINECONE_INDEX_NAME || "index-name", + client: new Pinecone(), + }); + }; + + const getServiceContext = () => { + const openAI = new OpenAI({ + model: "gpt-4", + apiKey: process.env.OPENAI_API_KEY, + }); + + return serviceContextFromDefaults({ + llm: openAI, + }); + }; + + const getQueryEngine = async (filter: unknown) => { + const vectorStore = await getPineconeVectorStore(); + const serviceContext = getServiceContext(); + + const vectorStoreIndex = await VectorStoreIndex.fromVectorStore( + vectorStore, + serviceContext, + ); + + const retriever = new VectorIndexRetriever({ + index: vectorStoreIndex, + similarityTopK: 500, + }); + + const responseSynthesizer = new ResponseSynthesizer({ + serviceContext, + responseBuilder: new TreeSummarize(serviceContext), + }); + + return new RetrieverQueryEngine(retriever, responseSynthesizer, { + filter, + }); + }; + + // whatever is a key from your metadata + const queryEngine = await getQueryEngine({ + whatever: { + $gte: 1, + $lte: 100, + }, + }); + + const response = await queryEngine.query("How many results do you have?"); + + console.log(response.toString()); +} + +main().catch(console.error); diff --git a/examples/vision.ts b/examples/vision.ts new file mode 100644 index 0000000000000000000000000000000000000000..27797d00dbc1e40ebc9e696747ab5d92fa58e31d --- /dev/null +++ b/examples/vision.ts @@ -0,0 +1,15 @@ +import { OpenAI } from "llamaindex"; + +(async () => { + const llm = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.1 }); + + // complete api + const response1 = await llm.complete("How are you?"); + console.log(response1.message.content); + + // chat api + const response2 = await llm.chat([ + { content: "Tell me a joke!", role: "user" }, + ]); + console.log(response2.message.content); +})();