From 4ae690612a51e346f1555fc4a07411453fa5b850 Mon Sep 17 00:00:00 2001 From: Logan Markewich <logan.markewich@live.com> Date: Tue, 18 Jul 2023 13:09:07 -0600 Subject: [PATCH] flesh out more --- apps/docs/docs/advanced.md | 6 ++++- apps/docs/docs/api/_category_.yml | 2 +- apps/docs/docs/end_to_end.md | 4 ++++ apps/docs/docs/modules/_category_.yml | 2 +- .../docs/modules/high_level/chat_engine.md | 14 ++++++++++-- .../docs/modules/high_level/data_index.md | 22 +++++++++++++++++-- .../docs/modules/high_level/data_loader.md | 13 +++++++++++ .../modules/high_level/documents_and_nodes.md | 6 ++--- .../docs/modules/high_level/query_engine.md | 11 ++++++++-- apps/docs/docs/modules/index.md | 4 ++-- .../docs/modules/low_level/data_loader.md | 5 ----- apps/docs/docs/modules/low_level/embedding.md | 14 +++++++++++- apps/docs/docs/modules/low_level/llm.md | 14 +++++++++++- apps/docs/docs/modules/low_level/retriever.md | 12 +++++++++- .../docs/modules/low_level/simple_prompt.md | 2 ++ apps/docs/docs/modules/low_level/storage.md | 4 +++- .../docs/modules/low_level/text_splitter.md | 4 +++- 17 files changed, 115 insertions(+), 24 deletions(-) create mode 100644 apps/docs/docs/modules/high_level/data_loader.md delete mode 100644 apps/docs/docs/modules/low_level/data_loader.md diff --git a/apps/docs/docs/advanced.md b/apps/docs/docs/advanced.md index dd35a00cb..805d03624 100644 --- a/apps/docs/docs/advanced.md +++ b/apps/docs/docs/advanced.md @@ -1,5 +1,5 @@ --- -sidebar_position: 4 +sidebar_position: 5 --- # Advanced Features @@ -21,3 +21,7 @@ npx ts-node subquestion.ts SubQuestionQueryEngine is implemented with Tools. The basic idea of Tools is that they are executable options for the large language model. In this case, our SubQuestionQueryEngine relies on QueryEngineTool, which as you guessed it is a tool to run queries on a QueryEngine. This allows us to give the model an option to query different documents for different questions for example. You could also imagine that the SubQuestionQueryEngine could use a Tool that searches for something on the web or gets an answer using Wolfram Alpha. You can learn more about Tools by taking a look at the LlamaIndex Python documentation https://gpt-index.readthedocs.io/en/latest/core_modules/agent_modules/tools/root.html + +## ListIndexLLMRetriever + +Todo: Insert text. diff --git a/apps/docs/docs/api/_category_.yml b/apps/docs/docs/api/_category_.yml index 01a501229..79eb4b8a2 100644 --- a/apps/docs/docs/api/_category_.yml +++ b/apps/docs/docs/api/_category_.yml @@ -1,2 +1,2 @@ label: "API" -position: 4 \ No newline at end of file +position: 6 \ No newline at end of file diff --git a/apps/docs/docs/end_to_end.md b/apps/docs/docs/end_to_end.md index eb47c4727..e4cc66696 100644 --- a/apps/docs/docs/end_to_end.md +++ b/apps/docs/docs/end_to_end.md @@ -1,3 +1,7 @@ +--- +sidebar_position: 3 +--- + # End to End Examples We include several end-to-end examples using LlamaIndex.TS in the repository diff --git a/apps/docs/docs/modules/_category_.yml b/apps/docs/docs/modules/_category_.yml index 83298fde6..7af5ad40f 100644 --- a/apps/docs/docs/modules/_category_.yml +++ b/apps/docs/docs/modules/_category_.yml @@ -1,3 +1,3 @@ label: "Modules" collapsed: false -position: 3 \ No newline at end of file +position: 4 \ No newline at end of file diff --git a/apps/docs/docs/modules/high_level/chat_engine.md b/apps/docs/docs/modules/high_level/chat_engine.md index c928feddb..5b00b89d7 100644 --- a/apps/docs/docs/modules/high_level/chat_engine.md +++ b/apps/docs/docs/modules/high_level/chat_engine.md @@ -1,5 +1,15 @@ --- -sidebar_position: 3 +sidebar_position: 4 --- -# ChatEngine \ No newline at end of file +# ChatEngine + +The chat engine is a quick and simple way to chat with the data in your index. + +```typescript +const retriever = index.asRetriever(); +const chatEngine = new ContextChatEngine({ retriever }); + +// start chatting +const response = await chatEngine.chat(query); +``` diff --git a/apps/docs/docs/modules/high_level/data_index.md b/apps/docs/docs/modules/high_level/data_index.md index c28e771e1..3645d34df 100644 --- a/apps/docs/docs/modules/high_level/data_index.md +++ b/apps/docs/docs/modules/high_level/data_index.md @@ -1,5 +1,23 @@ --- -sidebar_position: 1 +sidebar_position: 2 --- -# Index \ No newline at end of file +# Index + +An index is the basic container and organization for your data. LlamaIndex.TS supports two indexes: + +- `ListIndex` - will send every `Node` in the index to the LLM in order to generate a response +- `VectorStoreIndex` - will send the top-k `Node`s to the LLM when generating a response. The default top-k is 2. + +```typescript +import { + Document, + VectorStoreIndex, +} from "llamaindex"; + +const document = new Document({ text: "test" }); + +const index = await VectorStoreIndex.fromDocuments( + [document] +); +``` \ No newline at end of file diff --git a/apps/docs/docs/modules/high_level/data_loader.md b/apps/docs/docs/modules/high_level/data_loader.md new file mode 100644 index 000000000..8943d37e2 --- /dev/null +++ b/apps/docs/docs/modules/high_level/data_loader.md @@ -0,0 +1,13 @@ +--- +sidebar_position: 1 +--- + +# Reader / Loader + +LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class. Currently, `.txt` and `.pdf` files are supported, with more planned in the future! + +```typescript +import { SimpleDirectoryReader } from "llamaindex"; + +documents = new SimpleDirectoryReader().loadData("./data"); +``` diff --git a/apps/docs/docs/modules/high_level/documents_and_nodes.md b/apps/docs/docs/modules/high_level/documents_and_nodes.md index 20489b842..08ba6b73a 100644 --- a/apps/docs/docs/modules/high_level/documents_and_nodes.md +++ b/apps/docs/docs/modules/high_level/documents_and_nodes.md @@ -6,8 +6,8 @@ sidebar_position: 0 `Document`s and `Node`s are the basic building blocks of any index. While the API for these objects is similar, `Document` objects represent entire files, while `Node`s are smaller pieces of that original document, that are suitable for an LLM and Q&A. -## Usage Pattern - ```typescript +import { Document } from "llamaindex"; -``` \ No newline at end of file +document = new Document({ text: "text", metadata: { "key": "val" }}); +``` diff --git a/apps/docs/docs/modules/high_level/query_engine.md b/apps/docs/docs/modules/high_level/query_engine.md index f44cdc40b..88a926c76 100644 --- a/apps/docs/docs/modules/high_level/query_engine.md +++ b/apps/docs/docs/modules/high_level/query_engine.md @@ -1,5 +1,12 @@ --- -sidebar_position: 2 +sidebar_position: 3 --- -# QueryEngine \ No newline at end of file +# QueryEngine + +A query engine wraps a `Retriever` and a `ResponseSynthesizer` into a pipeline, that will use the query string to fetech nodes and then send them to the LLM to generate a response. + +```typescript +const queryEngine = index.asQueryEngine(); +const response = queryEngine.query("query string"); +``` \ No newline at end of file diff --git a/apps/docs/docs/modules/index.md b/apps/docs/docs/modules/index.md index 3213983d5..d51ba9c05 100644 --- a/apps/docs/docs/modules/index.md +++ b/apps/docs/docs/modules/index.md @@ -8,6 +8,8 @@ LlamaIndex.TS offers several core modules, seperated into high-level modules for - **Node**: The basic data building block. Most commonly, these are parts of the document split into manageable pieces that are small enough to be fed into an embedding model and LLM. +- **Reader/Loader**: A reader or loader is something that takes in a document in the real world and transforms into a Document class that can then be used in your Index and queries. We currently support plain text files and PDFs with many many more to come. + - **Indexes**: indexes store the Nodes and the embeddings of those nodes. - **QueryEngine**: Query engines are what generate the query you put in and give you back the result. Query engines generally combine a pre-built prompt with selected nodes from your Index to give the LLM the context it needs to answer your query. @@ -22,8 +24,6 @@ LlamaIndex.TS offers several core modules, seperated into high-level modules for - **Embedding**: An embedding is represented as a vector of floating point numbers. OpenAI's text-embedding-ada-002 is our default embedding model and each embedding it generates consists of 1,536 floating point numbers. Another popular embedding model is BERT which uses 768 floating point numbers to represent each Node. We provide a number of utilities to work with embeddings including 3 similarity calculation options and Maximum Marginal Relevance -- **Reader/Loader**: A reader or loader is something that takes in a document in the real world and transforms into a Document class that can then be used in your Index and queries. We currently support plain text files and PDFs with many many more to come. - - **TextSplitter**: Text splitting strategies are incredibly important to the overall efficacy of the embedding search. Currently, while we do have a default, there's no one size fits all solution. Depending on the source documents, you may want to use different splitting sizes and strategies. Currently we support spliltting by fixed size, splitting by fixed size with overlapping sections, splitting by sentence, and splitting by paragraph. - **Retriever**: The Retriever is what actually chooses the Nodes to retrieve from the index. Here, you may wish to try retrieving more or fewer Nodes per query, changing your similarity function, or creating your own retriever for each individual use case in your application. For example, you may wish to have a separate retriever for code content vs. text content. diff --git a/apps/docs/docs/modules/low_level/data_loader.md b/apps/docs/docs/modules/low_level/data_loader.md deleted file mode 100644 index d603f12d9..000000000 --- a/apps/docs/docs/modules/low_level/data_loader.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -sidebar_position: 2 ---- - -# Reader / Loader \ No newline at end of file diff --git a/apps/docs/docs/modules/low_level/embedding.md b/apps/docs/docs/modules/low_level/embedding.md index e31780261..edf16a925 100644 --- a/apps/docs/docs/modules/low_level/embedding.md +++ b/apps/docs/docs/modules/low_level/embedding.md @@ -2,4 +2,16 @@ sidebar_position: 1 --- -# Embedding \ No newline at end of file +# Embedding + +The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI. + +This can be explicitly set in the `ServiceContext` object. + +```typescript +import { OpenAIEmbedding, ServiceContext } from "llamaindex"; + +const openaiEmbeds = new OpenAIEmbedding(); + +const serviceContext = new ServiceContext({ embedModel: openaiEmbeds }); +``` \ No newline at end of file diff --git a/apps/docs/docs/modules/low_level/llm.md b/apps/docs/docs/modules/low_level/llm.md index dc14840bc..ce943a85f 100644 --- a/apps/docs/docs/modules/low_level/llm.md +++ b/apps/docs/docs/modules/low_level/llm.md @@ -2,4 +2,16 @@ sidebar_position: 0 --- -# LLM \ No newline at end of file +# LLM + +The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`. + +The LLM can be explicitly set in the `ServiceContext` object. + +```typescript +import { ChatGPTLLMPredictor, ServiceContext } from "llamaindex"; + +const openaiLLM = new ChatGPTLLMPredictor({ model: "gpt-3.5-turbo" }); + +const serviceContext = new ServiceContext({ llmPredictor: openaiLLM }); +``` diff --git a/apps/docs/docs/modules/low_level/retriever.md b/apps/docs/docs/modules/low_level/retriever.md index 52c81fd51..2976ae2e0 100644 --- a/apps/docs/docs/modules/low_level/retriever.md +++ b/apps/docs/docs/modules/low_level/retriever.md @@ -2,4 +2,14 @@ sidebar_position: 5 --- -# Retriever \ No newline at end of file +# Retriever + +A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string. For example, a `ListIndexRetriever` will fetch all nodes no matter the query. Meanwhile, a `VectorIndexRetriever` will only fetch the top-k most similar nodes. + +```typescript +const retriever = vector_index.asRetriever() +retriever.similarityTopK = 3; + +// Fetch nodes! +const nodes = await retriever.aretrieve("query string"); +``` diff --git a/apps/docs/docs/modules/low_level/simple_prompt.md b/apps/docs/docs/modules/low_level/simple_prompt.md index f1960bcd3..e10aed104 100644 --- a/apps/docs/docs/modules/low_level/simple_prompt.md +++ b/apps/docs/docs/modules/low_level/simple_prompt.md @@ -3,3 +3,5 @@ sidebar_position: 4 --- # SimplePrompt + +Prompts are the basic inputs to an LLM. These can contain variables that get filled in later, or just plain text that is sent as-is to a model. diff --git a/apps/docs/docs/modules/low_level/storage.md b/apps/docs/docs/modules/low_level/storage.md index c6c26edfc..b7423946e 100644 --- a/apps/docs/docs/modules/low_level/storage.md +++ b/apps/docs/docs/modules/low_level/storage.md @@ -2,4 +2,6 @@ sidebar_position: 6 --- -# Storage \ No newline at end of file +# Storage + +Todo: Find out how this works. \ No newline at end of file diff --git a/apps/docs/docs/modules/low_level/text_splitter.md b/apps/docs/docs/modules/low_level/text_splitter.md index 1c97febe0..0375e03ff 100644 --- a/apps/docs/docs/modules/low_level/text_splitter.md +++ b/apps/docs/docs/modules/low_level/text_splitter.md @@ -2,4 +2,6 @@ sidebar_position: 3 --- -# TextSplitter \ No newline at end of file +# TextSplitter + +Todo: Talk about text splitter. Alternatively, replace this with node parser? Or combine them? -- GitLab