diff --git a/CITATION.cff b/CITATION.cff index 986d0c78b5b64bfac17a890c173753babbab9a64..8c0ad930f5d60d3e04223d372d9996de362dc4e0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ authors: - family-names: "Liu" given-names: "Jerry" orcid: "https://orcid.org/0000-0002-6694-3517" -title: "GPT Index" +title: "LlamaIndex" doi: 10.5281/zenodo.1234 date-released: 2022-11-1 url: "https://github.com/jerryjliu/gpt_index" \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7905856b96d0b2a6207050e58d13cf2c9ab600fb..ab185fe3db6842959e49d0b621451738291b650d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,9 +1,9 @@ -# 💡 Contributing to GPT Index +# 💡 Contributing to LlamaIndex > ⚠️ **NOTE**: We are rebranding GPT Index as LlamaIndex! > **2/19/2023**: We are still in the middle of the transition. If you are interested in contributing to LlamaIndex, make sure to follow the below steps. For testing, please do `import gpt_index` instead of `import llama_index`. -Interested in contributing to GPT Index? Here's how to get started! +Interested in contributing to LlamaIndex? Here's how to get started! ## Contributions that we're looking for: - Bug fixes @@ -16,10 +16,10 @@ Also, join our Discord for discussions: https://discord.gg/dGcwcsnxhU. ## Environment Setup -GPT Index is a Python package. We've tested primarily with Python versions >= 3.8. Here's a quick +LlamaIndex is a Python package. We've tested primarily with Python versions >= 3.8. Here's a quick and dirty guide to getting your environment setup. -First, create a fork of GPT Index, by clicking the "Fork" button on the [GPT Index Github page](https://github.com/jerryjliu/gpt_index). +First, create a fork of LlamaIndex, by clicking the "Fork" button on the [LlamaIndex Github page](https://github.com/jerryjliu/gpt_index). Following [these steps](https://docs.github.com/en/get-started/quickstart/fork-a-repo) for more details on how to fork the repo and clone the forked repo. @@ -75,7 +75,7 @@ Example notebooks can be found in this folder: https://github.com/jerryjliu/gpt_ ### Creating a pull request See [these instructions](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) -to open a pull request against the main GPT Index repo. +to open a pull request against the main LlamaIndex repo. diff --git a/README.md b/README.md index 52ab3530977496454956c5eb6d7c4c8a542a7dfc..f4ecb15a679087ac68934715bcea7c4ab2ad15e1 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,14 @@ > ⚠️ **NOTE**: We are rebranding GPT Index as LlamaIndex! We will carry out this transition gradually. +> **2/25/2023**: By default, our docs/notebooks/instructions now reference "LlamaIndex" +instead of "GPT Index". + > **2/19/2023**: By default, our docs/notebooks/instructions now use the `llama-index` package. However the `gpt-index` package still exists as a duplicate! + > **2/16/2023**: We have a duplicate `llama-index` pip package. Simply replace all imports of `gpt_index` with `llama_index` if you choose to `pip install llama-index`. -GPT Index (LlamaIndex) is a project consisting of a set of data structures designed to make it easier to +LlamaIndex (GPT Index) is a project consisting of a set of data structures designed to make it easier to use large external knowledge bases with LLMs. PyPi: @@ -31,8 +35,8 @@ LlamaHub (community library of data loaders): https://llamahub.ai #### Proposed Solution -At its core, GPT Index contains a toolkit of **index data structures** designed to easily connect LLM's with your external data. -GPT Index helps to provide the following advantages: +At its core, LlamaIndex contains a toolkit of **index data structures** designed to easily connect LLM's with your external data. +LlamaIndex helps to provide the following advantages: - Remove concerns over prompt size limitations. - Abstract common usage patterns to reduce boilerplate code in your LLM app. - Provide data connectors to your common data sources (Google Docs, Slack, etc.). @@ -98,14 +102,14 @@ All requirements should be contained within the `setup.py` file. To run the pack ## 📖 Citation -Reference to cite if you use GPT Index in a paper: +Reference to cite if you use LlamaIndex in a paper: ``` -@software{Liu_GPT_Index_2022, +@software{Liu_LlamaIndex_2022, author = {Liu, Jerry}, doi = {10.5281/zenodo.1234}, month = {11}, -title = {{GPT Index}}, +title = {{LlamaIndex}}, url = {https://github.com/jerryjliu/gpt_index},year = {2022} } ``` diff --git a/docs/conf.py b/docs/conf.py index 099b404eeadbab99cc41344f2270623aa5c4974a..4d37d641e5006bc3e0d3406fdff7929bcd0e3919 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ sys.path.insert(0, os.path.abspath("../")) # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = "GPT Index" +project = "LlamaIndex" copyright = "2022, Jerry Liu" author = "Jerry Liu" diff --git a/docs/gallery/app_showcase.md b/docs/gallery/app_showcase.md index c04b6a9c4159496b5efeb9e80f28d5a24f43aab2..b14b5e79c7f781d2564e42f04ea2c6d281701432 100644 --- a/docs/gallery/app_showcase.md +++ b/docs/gallery/app_showcase.md @@ -1,26 +1,26 @@ # 😎 App Showcase -Here is a sample of some of the incredible applications and tools built on top of GPT Index! +Here is a sample of some of the incredible applications and tools built on top of LlamaIndex! ###### Meru - Dense Data Retrieval API -Hosted API service. Includes a "Dense Data Retrieval" API built on top of GPT Index where users can upload their documents and query them. +Hosted API service. Includes a "Dense Data Retrieval" API built on top of LlamaIndex where users can upload their documents and query them. [[Website]](https://www.usemeru.com/densedataretrieval) ###### Algovera -Build AI workflows using building blocks. Many workflows built on top of GPT Index. +Build AI workflows using building blocks. Many workflows built on top of LlamaIndex. [[Website]](https://app.algovera.ai/workflows). -###### ChatGPT GPT Index +###### ChatGPT LlamaIndex Interface that allows users to upload long docs and chat with the bot. [[Tweet thread]](https://twitter.com/s_jobs6/status/1618346125697875968?s=20&t=RJhQu2mD0-zZNGfq65xodA) ###### AgentHQ -A web tool to build agents, interacting with GPT Index data structures.[[Website]](https://app.agent-hq.io/) +A web tool to build agents, interacting with LlamaIndex data structures.[[Website]](https://app.agent-hq.io/) ###### PapersGPT diff --git a/docs/getting_started/starter_example.md b/docs/getting_started/starter_example.md index 891f191c1f175c3e3971e51fc68625fef69fe621..90311a84322948772bca437136040231dc296665 100644 --- a/docs/getting_started/starter_example.md +++ b/docs/getting_started/starter_example.md @@ -1,10 +1,10 @@ # Starter Tutorial -Here is a starter example for using GPT Index. Make sure you've followed the [installation](installation.md) steps first. +Here is a starter example for using LlamaIndex. Make sure you've followed the [installation](installation.md) steps first. ### Download -GPT Index examples can be found in the `examples` folder of the GPT Index repository. +LlamaIndex examples can be found in the `examples` folder of the LlamaIndex repository. We first want to download this `examples` folder. An easy way to do this is to just clone the repo: ```bash @@ -28,7 +28,7 @@ We now want to navigate to the following folder: $ cd examples/paul_graham_essay ``` -This contains GPT Index examples around Paul Graham's essay, ["What I Worked On"](http://paulgraham.com/worked.html). A comprehensive set of examples are already provided in `TestEssay.ipynb`. For the purposes of this tutorial, we can focus on a simple example of getting GPT Index up and running. +This contains LlamaIndex examples around Paul Graham's essay, ["What I Worked On"](http://paulgraham.com/worked.html). A comprehensive set of examples are already provided in `TestEssay.ipynb`. For the purposes of this tutorial, we can focus on a simple example of getting LlamaIndex up and running. ### Build and Query Index @@ -77,5 +77,5 @@ index = GPTSimpleVectorIndex.load_from_disk('index.json') ### Next Steps -That's it! For more information on GPT Index features, please check out the numerous "How-To Guides" to the left. +That's it! For more information on LlamaIndex features, please check out the numerous "How-To Guides" to the left. Additionally, if you would like to play around with Example Notebooks, check out [this link](/reference/example_notebooks.rst). diff --git a/docs/guides/index_guide.md b/docs/guides/index_guide.md index e93b6eb139715f95961368d84e73c3dfea13a77c..62929f96dd78794a0f47f9c3022c915b439582e5 100644 --- a/docs/guides/index_guide.md +++ b/docs/guides/index_guide.md @@ -3,7 +3,7 @@ This guide describes how each index works with diagrams. We also visually highlight our "Response Synthesis" modes. Some terminology: -- **Node**: Corresponds to a chunk of text from a Document. GPT Index takes in Document objects and internally parses/chunks them into Node objects. +- **Node**: Corresponds to a chunk of text from a Document. LlamaIndex takes in Document objects and internally parses/chunks them into Node objects. - **Response Synthesis**: Our module which synthesizes a response given the retrieved Node. You can see how to [specify different response modes](setting-response-mode) here. See below for an illustration of how each response mode works. @@ -16,7 +16,7 @@ The list index simply stores Nodes as a sequential chain. ### Querying -During query time, if no other query parameters are specified, GPT Index simply all Nodes in the list into +During query time, if no other query parameters are specified, LlamaIndex simply all Nodes in the list into our Reponse Synthesis module.  @@ -72,7 +72,7 @@ Response Synthesis module. ## Response Synthesis -GPT Index offers different methods of synthesizing a response. The way to toggle this can be found in our +LlamaIndex offers different methods of synthesizing a response. The way to toggle this can be found in our [Usage Pattern Guide](setting-response-mode). Below, we visually highlight how each response mode works. ### Create and Refine diff --git a/docs/guides/primer.md b/docs/guides/primer.md index 95b95f497e51c38a3b4cde57ebe2b0ee710bbff1..a13a48b653710374a96d253b2d8505e69910f15e 100644 --- a/docs/guides/primer.md +++ b/docs/guides/primer.md @@ -1,7 +1,7 @@ -# A Primer to using GPT Index +# A Primer to using LlamaIndex -At its core, GPT Index contains a toolkit of **index data structures** designed to easily connect LLM's with your external data. -GPT Index helps to provide the following advantages: +At its core, LlamaIndex contains a toolkit of **index data structures** designed to easily connect LLM's with your external data. +LlamaIndex helps to provide the following advantages: - Remove concerns over prompt size limitations. - Abstract common usage patterns to reduce boilerplate code in your LLM app. - Provide data connectors to your common data sources (Google Docs, Slack, etc.). @@ -15,35 +15,35 @@ Each data structure offers distinct use cases and a variety of customizable para - Text Generation (Stories, TODO's, emails, etc.) - and more! -This primer is intended to help you get the most out of GPT Index. It gives a high-level overview of the following: -1. The general usage pattern of GPT Index. -2. Mapping Use Cases to GPT Index data Structures +This primer is intended to help you get the most out of LlamaIndex. It gives a high-level overview of the following: +1. The general usage pattern of LlamaIndex. +2. Mapping Use Cases to LlamaIndex data Structures 3. How Each Index Works -## 1. General Usage Pattern of GPT Index +## 1. General Usage Pattern of LlamaIndex -The general usage pattern of GPT Index is as follows: +The general usage pattern of LlamaIndex is as follows: 1. Load in documents (either manually, or through a data loader). 2. Index Construction. 3. [Optional, Advanced] Building indices on top of other indices 4. Query the index. See our [Usage Pattern Guide](/guides/usage_pattern.md) for a guide -on the overall steps involved with using GPT Index. +on the overall steps involved with using LlamaIndex. If you are just starting out, take a look at the [Starter Example](/getting_started/starter_example.md) first. -## 2. Mapping Use Cases to GPT Index Data Structures +## 2. Mapping Use Cases to LlamaIndex Data Structures -GPT Index data structures offer distinct use cases and advantages. For instance, the Vector Store-based indices e.g. `GPTSimpleVectorIndex` are a good general purpose tool for document retrieval. +LlamaIndex data structures offer distinct use cases and advantages. For instance, the Vector Store-based indices e.g. `GPTSimpleVectorIndex` are a good general purpose tool for document retrieval. The list index `GPTListIndex` is a good tool for combining answers across documents/nodes. The tree index `GPTTreeIndex` and keyword indices can be used to "route" queries to the right subindices. -[A complete guide on GPT Index use cases](/guides/use_cases.md). +[A complete guide on LlamaIndex use cases](/guides/use_cases.md). -This guide should paint a picture of how you can use GPT Index to solve your own data needs. +This guide should paint a picture of how you can use LlamaIndex to solve your own data needs. ## 3. How Each Index Works diff --git a/docs/guides/usage_pattern.md b/docs/guides/usage_pattern.md index 84c5c35dc82b19c6425c9a481c193c31dde7acd9..eb451bc5677cc97aee14c689389e4f358253d5ae 100644 --- a/docs/guides/usage_pattern.md +++ b/docs/guides/usage_pattern.md @@ -1,6 +1,6 @@ -# GPT Index Usage Pattern +# LlamaIndex Usage Pattern -The general usage pattern of GPT Index is as follows: +The general usage pattern of LlamaIndex is as follows: 1. Load in documents (either manually, or through a data loader). 2. Index Construction. 3. [Optional, Advanced] Building indices on top of other indices @@ -19,7 +19,7 @@ documents = SimpleDirectoryReader('data').load_data() ``` -You can also choose to construct documents manually. GPT Index exposes the `Document` struct. +You can also choose to construct documents manually. LlamaIndex exposes the `Document` struct. ```python from llama_index import Document @@ -39,7 +39,7 @@ index = GPTSimpleVectorIndex(documents) ``` -Depending on which index you use, GPT Index may make LLM calls in order to build the index. +Depending on which index you use, LlamaIndex may make LLM calls in order to build the index. ### Inserting Documents @@ -131,7 +131,7 @@ index2 = GPTSimpleVectorIndex(documents2) # Set summary text # you can set the summary manually, or you can -# generate the summary itself using GPT Index +# generate the summary itself using LlamaIndex index1.set_text("summary1") index2.set_text("summary2") diff --git a/docs/guides/use_cases.md b/docs/guides/use_cases.md index 7ed8bd6551f12a1b8192a45553b00ea193410b64..6ca87e674b72c34d448efa31052a1435e6264167 100644 --- a/docs/guides/use_cases.md +++ b/docs/guides/use_cases.md @@ -1,6 +1,6 @@ -# GPT Index Use Cases -GPT Index data structures and parameters offer distinct use cases and advantages. -This guide should paint a picture of how you can use GPT Index to solve your own data needs. +# LlamaIndex Use Cases +LlamaIndex data structures and parameters offer distinct use cases and advantages. +This guide should paint a picture of how you can use LlamaIndex to solve your own data needs. We go through each use case, and describe the index tools you can use for each use case. ## By Use Cases @@ -20,9 +20,9 @@ Our Vector Store Indices are good to start with because they generalize to a bro For a more detailed/advanced treatment of different use cases and how they map to indices, please see below. -### Use Case: Connecting GPT Index to an External Data Source of Documents +### Use Case: Connecting LlamaIndex to an External Data Source of Documents -To connect GPT Index to a large external data source of documents, you will want to [use one of our data connectors](/how_to/data_connectors.md), or construct `Document` objects manually (see the [primer guide](/guides/primer.md) for how). +To connect LlamaIndex to a large external data source of documents, you will want to [use one of our data connectors](/how_to/data_connectors.md), or construct `Document` objects manually (see the [primer guide](/guides/primer.md) for how). Then you will likely want to use a [Vector Store Index](vector-store-index). @@ -72,7 +72,7 @@ index2 = GPTSimpleVectorIndex(slack_docs) # Set summary text # you can set the summary manually, or you can -# generate the summary itself using GPT Index +# generate the summary itself using LlamaIndex index1.set_text("summary1") index2.set_text("summary2") @@ -105,7 +105,7 @@ index2 = GPTSimpleVectorIndex(slack_docs) # Set summary text # you can set the summary manually, or you can -# generate the summary itself using GPT Index +# generate the summary itself using LlamaIndex index1.set_text("summary1") index2.set_text("summary2") @@ -145,7 +145,7 @@ index2 = GPTSimpleVectorIndex(chapter2) # Set summary text # you can set the summary manually, or you can -# generate the summary itself using GPT Index +# generate the summary itself using LlamaIndex index1.set_text("summary1") index2.set_text("summary2") diff --git a/docs/how_to/composability.md b/docs/how_to/composability.md index 33ad4027aad43091beb956450d9ac0e56b2c3f71..cd16aa96ed3a912286571ac3f4ef1c9cd9b2a011 100644 --- a/docs/how_to/composability.md +++ b/docs/how_to/composability.md @@ -1,7 +1,7 @@ # Composability -GPT Index offers **composability** of your indices, meaning that you can build indices on top of other indices. This allows you to more effectively index your entire document tree in order to feed custom knowledge to GPT. +LlamaIndex offers **composability** of your indices, meaning that you can build indices on top of other indices. This allows you to more effectively index your entire document tree in order to feed custom knowledge to GPT. Composability allows you to to define lower-level indices for each document, and higher-order indices over a collection of documents. To see how this works, imagine defining 1) a tree index for the text within each document, and 2) a list index over each tree index (one document) within your collection. @@ -37,7 +37,7 @@ index2.set_text("<summary2>") index3.set_text("<summary3>") ``` -You may choose to manually specify the summary text, or use GPT Index itself to generate +You may choose to manually specify the summary text, or use LlamaIndex itself to generate a summary, for instance with the following: ```python diff --git a/docs/how_to/cost_analysis.md b/docs/how_to/cost_analysis.md index d2b829b1182fe3d74983a55fbbf5aefc4da96c43..ed0f54d651abb5076835382dccd4b034dc66a06c 100644 --- a/docs/how_to/cost_analysis.md +++ b/docs/how_to/cost_analysis.md @@ -10,7 +10,7 @@ Each call to an LLM will cost some amount of money - for instance, OpenAI's Davi The cost of building and querying each index is a TODO in the reference documentation. In the meantime, we provide the following information: 1. A high-level overview of the cost structure of the indices. -2. A token predictor that you can use directly within GPT Index! +2. A token predictor that you can use directly within LlamaIndex! ### Overview of Cost Structure @@ -44,7 +44,7 @@ Here are some notes regarding each of the indices: ### Token Predictor Usage -GPT Index offers token **predictors** to predict token usage of LLM and embedding calls. +LlamaIndex offers token **predictors** to predict token usage of LLM and embedding calls. This allows you to estimate your costs during 1) index construction, and 2) index querying, before any respective LLM calls are made. diff --git a/docs/how_to/custom_llms.md b/docs/how_to/custom_llms.md index ce871287dd8f1cef81054e710f3644455b22bd2d..25cc90a2bbe618e6a291224c31a83a8e99ad4b8c 100644 --- a/docs/how_to/custom_llms.md +++ b/docs/how_to/custom_llms.md @@ -1,15 +1,15 @@ # Defining LLMs -The goal of GPT Index is to provide a toolkit of data structures that can organize external information in a manner that +The goal of LlamaIndex is to provide a toolkit of data structures that can organize external information in a manner that is easily compatible with the prompt limitations of an LLM. Therefore LLMs are always used to construct the final answer. Depending on the [type of index](/reference/indices.rst) being used, LLMs may also be used during index construction, insertion, and query traversal. -GPT Index uses Langchain's [LLM](https://langchain.readthedocs.io/en/latest/modules/llms.html) +LlamaIndex uses Langchain's [LLM](https://langchain.readthedocs.io/en/latest/modules/llms.html) and [LLMChain](https://langchain.readthedocs.io/en/latest/modules/chains.html) module to define the underlying abstraction. We introduce a wrapper class, -[`LLMPredictor`](/reference/llm_predictor.rst), for integration into GPT Index. +[`LLMPredictor`](/reference/llm_predictor.rst), for integration into LlamaIndex. We also introduce a [`PromptHelper` class](/reference/prompt_helper.rst), to allow the user to explicitly set certain constraint parameters, such as diff --git a/docs/how_to/custom_prompts.md b/docs/how_to/custom_prompts.md index 80eb03c0f53e39e03840491f509aad70d737b387..4f660d61236e5e6b230e280fb21640f30cd62bcc 100644 --- a/docs/how_to/custom_prompts.md +++ b/docs/how_to/custom_prompts.md @@ -1,9 +1,9 @@ # Defining Prompts -Prompting is the fundamental input that gives LLMs their expressive power. GPT Index uses prompts to build the index, do insertion, +Prompting is the fundamental input that gives LLMs their expressive power. LlamaIndex uses prompts to build the index, do insertion, perform traversal during querying, and to synthesize the final answer. -GPT Index uses a finite set of *prompt types*, described [here](/reference/prompts.rst). +LlamaIndex uses a finite set of *prompt types*, described [here](/reference/prompts.rst). All index classes, along with their associated queries, utilize a subset of these prompts. The user may provide their own prompt. If the user does not provide their own prompt, default prompts are used. diff --git a/docs/how_to/data_connectors.md b/docs/how_to/data_connectors.md index ccf1d6d096a319a7823744e3d00959b99f915f57..e4f7900f9775ffdb72ac15460431686c46cb9130 100644 --- a/docs/how_to/data_connectors.md +++ b/docs/how_to/data_connectors.md @@ -1,7 +1,7 @@ # Data Connectors (LlamaHub 🦙) Our data connectors are offered through [LlamaHub](https://llamahub.ai/) 🦙. -LlamaHub is an open-source repository containing data loaders that you can easily plug and play into any GPT Index application. +LlamaHub is an open-source repository containing data loaders that you can easily plug and play into any LlamaIndex application.  diff --git a/docs/how_to/embeddings.md b/docs/how_to/embeddings.md index 13b5ef7961337bd2c115408ab511560bde2b59b5..9daaee75e05b091c575ff910a7f22fd5f7da7725 100644 --- a/docs/how_to/embeddings.md +++ b/docs/how_to/embeddings.md @@ -1,6 +1,6 @@ # Embedding support -GPT Index provides support for embeddings in the following format: +LlamaIndex provides support for embeddings in the following format: - Adding embeddings to Document objects - Using a Vector Store as an underlying index (e.g. `GPTSimpleVectorIndex`, `GPTFaissIndex`) - Querying our list and tree indices with embeddings. @@ -22,7 +22,7 @@ guide for more details. ## Using an Embedding Query Mode in List/Tree Index -GPT Index provides embedding support to our tree and list indices. In addition to each node storing text, each node can optionally store an embedding. +LlamaIndex provides embedding support to our tree and list indices. In addition to each node storing text, each node can optionally store an embedding. During query-time, we can use embeddings to do max-similarity retrieval of nodes before calling the LLM to synthesize an answer. Since similarity lookup using embeddings (e.g. using cosine similarity) does not require a LLM call, embeddings serve as a cheaper lookup mechanism instead of using LLMs to traverse nodes. @@ -55,12 +55,12 @@ An example notebook is given [here](https://github.com/jerryjliu/gpt_index/blob/ (custom-embeddings)= ## Custom Embeddings -GPT Index allows you to define custom embedding modules. By default, we use `text-embedding-ada-002` from OpenAI. +LlamaIndex allows you to define custom embedding modules. By default, we use `text-embedding-ada-002` from OpenAI. You can also choose to plug in embeddings from Langchain's [embeddings](https://langchain.readthedocs.io/en/latest/reference/modules/embeddings.html) module. We introduce a wrapper class, -[`LangchainEmbedding`](/reference/embeddings.rst), for integration into GPT Index. +[`LangchainEmbedding`](/reference/embeddings.rst), for integration into LlamaIndex. An example snippet is shown below (to use Hugging Face embeddings) on the GPTListIndex: diff --git a/docs/how_to/update.md b/docs/how_to/update.md index db4c2b61e8d63c29b12ff50d22756401cc5c6e97..14b0a4f8cb80a5b44e89b1c887c8f7433da4da89 100644 --- a/docs/how_to/update.md +++ b/docs/how_to/update.md @@ -1,6 +1,6 @@ # Updating an Index -Every GPT Index data structure allows **insertion**, **deletion**, and **update**. +Every LlamaIndex data structure allows **insertion**, **deletion**, and **update**. ### Insertion diff --git a/docs/how_to/using_with_langchain.md b/docs/how_to/using_with_langchain.md index ad2e810c50229f348b9c0e16ff5afdea53ec1165..fa9d95e7fe58151bd3ab5bdcedfe5864a8626bb2 100644 --- a/docs/how_to/using_with_langchain.md +++ b/docs/how_to/using_with_langchain.md @@ -1,8 +1,8 @@ # Using with Langchain 🦜🔗 -We provide a demo notebook showing how you can use GPT Index with Langchain. +We provide a demo notebook showing how you can use LlamaIndex with Langchain. We provide the following examples: -- Using GPT Index as a callable tool with a Langchain agent -- Using GPT Index as a memory module; this allows you to insert arbitrary amounts of conversation history with a Langchain chatbot! +- Using LlamaIndex as a callable tool with a Langchain agent +- Using LlamaIndex as a memory module; this allows you to insert arbitrary amounts of conversation history with a Langchain chatbot! Please see the [notebook here](https://github.com/jerryjliu/gpt_index/blob/main/examples/langchain_demo/LangchainDemo.ipynb) \ No newline at end of file diff --git a/docs/how_to/vector_stores.md b/docs/how_to/vector_stores.md index cef919c5657466f756f67f8c833b0c55f5bab669..60f3401f2300504e145bc20bc1c14359293d7c76 100644 --- a/docs/how_to/vector_stores.md +++ b/docs/how_to/vector_stores.md @@ -1,13 +1,13 @@ # Using Vector Stores -GPT Index offers multiple integration points with vector stores / vector databases: +LlamaIndex offers multiple integration points with vector stores / vector databases: -1. GPT Index can load data from vector stores, similar to any other data connector. This data can then be used within GPT Index data structures. -2. GPT Index can use a vector store itself as an index. Like any other index, this index can store documents and be used to answer queries. +1. LlamaIndex can load data from vector stores, similar to any other data connector. This data can then be used within LlamaIndex data structures. +2. LlamaIndex can use a vector store itself as an index. Like any other index, this index can store documents and be used to answer queries. ## Loading Data from Vector Stores using Data Connector -GPT Index supports loading data from the following sources. See [Data Connectors](data_connectors.md) for more details and API documentation. +LlamaIndex supports loading data from the following sources. See [Data Connectors](data_connectors.md) for more details and API documentation. - Chroma (`ChromaReader`) [Installation](https://docs.trychroma.com/getting-started) - Qdrant (`QdrantReader`) [Installation](https://qdrant.tech/documentation/install/) [Python Client](https://qdrant.tech/documentation/install/#python-client) @@ -59,7 +59,7 @@ For instance, this is an example usage of the Pinecone data loader `PineconeRead ## Using a Vector Store as an Index -GPT Index also supports using a vector store itself as an index. +LlamaIndex also supports using a vector store itself as an index. These are found in the following classes: - `GPTSimpleVectorIndex` - `GPTFaissIndex` @@ -71,7 +71,7 @@ These are found in the following classes: An API reference of each vector index is [found here](/reference/indices/vector_store.md). -Similar to any other index within GPT Index (tree, keyword table, list), this index can be constructed upon any collection +Similar to any other index within LlamaIndex (tree, keyword table, list), this index can be constructed upon any collection of documents. We use the vector store within the index to store embeddings for the input text chunks. Once constructed, the index can be used for querying. diff --git a/docs/index.rst b/docs/index.rst index 7d22021173a7809a5654578ba0a0bd28f397606c..fef9b94a3ac00b475c16487f2137a8a2b4078941 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,13 +34,13 @@ Context Proposed Solution ^^^^^^^^^^^^^^^^^ -That's where the **GPT Index** comes in. GPT Index is a simple, flexible interface between your external data and LLMs. It resolves the following pain points: +That's where the **GPT Index** comes in. LlamaIndex is a simple, flexible interface between your external data and LLMs. It resolves the following pain points: - Provides simple data structures to resolve prompt size limitations. - Offers data connectors to your external data sources. - Offers you a comprehensive toolset trading off cost and performance. -At the core of GPT Index is a **data structure**. Instead of relying on world knowledge encoded in the model weights, a GPT Index data structure does the following: +At the core of LlamaIndex is a **data structure**. Instead of relying on world knowledge encoded in the model weights, a GPT Index data structure does the following: - Uses a pre-trained LLM primarily for *reasoning*/*summarization* instead of prior knowledge. - Takes as input a large corpus of text data and build a structured index over it (using an LLM or heuristics). diff --git a/docs/reference/llm_predictor.rst b/docs/reference/llm_predictor.rst index 654b7a8c2a6b61e13dbbc3b7f09cab0b94449f9d..f248d4a2499a016db2351fe22679a4afbb56d72b 100644 --- a/docs/reference/llm_predictor.rst +++ b/docs/reference/llm_predictor.rst @@ -3,7 +3,7 @@ LLMPredictor ================= -Our LLMPredictor is a wrapper around Langchain's `LLMChain` that allows easy integration into GPT Index. +Our LLMPredictor is a wrapper around Langchain's `LLMChain` that allows easy integration into LlamaIndex. .. automodule:: gpt_index.langchain_helpers.chain_wrapper :members: diff --git a/gpt_index/__init__.py b/gpt_index/__init__.py index c1a3792639a2ce53ffd096f414c7674b0faa2143..788dcb6244c7e0f991a7c4451cfe9369597fc621 100644 --- a/gpt_index/__init__.py +++ b/gpt_index/__init__.py @@ -1,4 +1,4 @@ -"""Init file of GPT Index.""" +"""Init file of LlamaIndex.""" from pathlib import Path with open(Path(__file__).absolute().parents[0] / "VERSION") as _f: diff --git a/gpt_index/data_structs/data_structs.py b/gpt_index/data_structs/data_structs.py index 93436dacd0c5ecb58493d5432f362c98f6001c8d..c4cd8d524dce48ca42602e57681896f625143814 100644 --- a/gpt_index/data_structs/data_structs.py +++ b/gpt_index/data_structs/data_structs.py @@ -13,7 +13,7 @@ from gpt_index.utils import get_new_int_id @dataclass class IndexStruct(BaseDocument, DataClassJsonMixin): - """A base data struct for a GPT index.""" + """A base data struct for a LlamaIndex.""" # NOTE: the text field, inherited from BaseDocument, # represents a summary of the content of the index struct. diff --git a/gpt_index/embeddings/utils.py b/gpt_index/embeddings/utils.py index b854a6fa5a966671ea193573c1ecc21b6ad63014..1af024c6f07bfeaa0a4168b47d3e87519586a2a0 100644 --- a/gpt_index/embeddings/utils.py +++ b/gpt_index/embeddings/utils.py @@ -1,4 +1,4 @@ -"""Embedding utils for gpt index.""" +"""Embedding utils for LlamaIndex.""" from typing import List diff --git a/gpt_index/indices/__init__.py b/gpt_index/indices/__init__.py index 10f6e05ee4f8421b751e647fe3062d17da843799..d1523729d92a353bd84bdaa8853e550c15e9ec5e 100644 --- a/gpt_index/indices/__init__.py +++ b/gpt_index/indices/__init__.py @@ -1,4 +1,4 @@ -"""GPT Index data structures.""" +"""LlamaIndex data structures.""" # indices from gpt_index.indices.keyword_table.base import GPTKeywordTableIndex diff --git a/gpt_index/indices/base.py b/gpt_index/indices/base.py index 00b841867ec87d86448341a6f79452ac26ec29fd..f5214b2b38d97ffc0b1056b8b46897c8de75a544 100644 --- a/gpt_index/indices/base.py +++ b/gpt_index/indices/base.py @@ -40,7 +40,7 @@ DOCUMENTS_INPUT = Union[BaseDocument, "BaseGPTIndex"] class BaseGPTIndex(Generic[IS]): - """Base GPT Index. + """Base LlamaIndex. Args: documents (Optional[Sequence[BaseDocument]]): List of documents to diff --git a/gpt_index/indices/common/struct_store/schema.py b/gpt_index/indices/common/struct_store/schema.py index 0315e4ef464b92f2527bfb32782ae5adca718106..3086d3370d6dd651d2c717a792e6c9464bb87161 100644 --- a/gpt_index/indices/common/struct_store/schema.py +++ b/gpt_index/indices/common/struct_store/schema.py @@ -12,7 +12,7 @@ class SQLContextContainer(DataClassJsonMixin): A container interface to store context for a given table. Context can be built from unstructured documents (e.g. using SQLContextBuilder). - Context can also be dumped to an underlying GPT Index data structure. + Context can also be dumped to an underlying LlamaIndex data structure. Contains both the raw context_dict as well as any index_structure. diff --git a/gpt_index/indices/keyword_table/base.py b/gpt_index/indices/keyword_table/base.py index 786e5687fb17190706262576793f2de8b4640eef..98faa82fad52382e08df14292577ef38dd2ba892 100644 --- a/gpt_index/indices/keyword_table/base.py +++ b/gpt_index/indices/keyword_table/base.py @@ -1,6 +1,6 @@ """Keyword-table based index. -Similar to a "hash table" in concept. GPT Index first tries +Similar to a "hash table" in concept. LlamaIndex first tries to extract keywords from the source text, and stores the keywords as keys per item. It similarly extracts keywords from the query text. Then, it tries to match those keywords to diff --git a/gpt_index/indices/knowledge_graph/base.py b/gpt_index/indices/knowledge_graph/base.py index 22a7dca377abb6277bb7ec95b1de24d1ed872f75..ed6e3572bea9a0e9040bd98e58224ae214cfb5db 100644 --- a/gpt_index/indices/knowledge_graph/base.py +++ b/gpt_index/indices/knowledge_graph/base.py @@ -1,6 +1,6 @@ """Keyword-table based index. -Similar to a "hash table" in concept. GPT Index first tries +Similar to a "hash table" in concept. LlamaIndex first tries to extract keywords from the source text, and stores the keywords as keys per item. It similarly extracts keywords from the query text. Then, it tries to match those keywords to diff --git a/gpt_index/indices/list/base.py b/gpt_index/indices/list/base.py index d678313b5701c6198f354d685d79419c49fca4e0..cfd04f47a842705325177aea7dea3668a1cc9770 100644 --- a/gpt_index/indices/list/base.py +++ b/gpt_index/indices/list/base.py @@ -1,6 +1,6 @@ """List index. -A simple data structure where GPT Index iterates through document chunks +A simple data structure where LlamaIndex iterates through document chunks in sequence in order to answer a given query. """ diff --git a/gpt_index/indices/query/base.py b/gpt_index/indices/query/base.py index 04c8e876492dc0b4e6bcb6ee8c19827692dfba45..038a49c9a33b7424e4247b274d6b4110b1b80266 100644 --- a/gpt_index/indices/query/base.py +++ b/gpt_index/indices/query/base.py @@ -38,7 +38,7 @@ class BaseQueryRunner: class BaseGPTIndexQuery(Generic[IS]): - """Base GPT Index Query. + """Base LlamaIndex Query. Helper class that is used to query an index. Can be called within `query` method of a BaseGPTIndex object, or instantiated independently. diff --git a/gpt_index/indices/tree/inserter.py b/gpt_index/indices/tree/inserter.py index 3c0a11e955cc7298378f76112b4e4a9fc1302c23..5f01bd2154e14e3ff71c14eda72e33bb235fc29d 100644 --- a/gpt_index/indices/tree/inserter.py +++ b/gpt_index/indices/tree/inserter.py @@ -16,7 +16,7 @@ from gpt_index.schema import BaseDocument class GPTIndexInserter: - """GPT Index inserter.""" + """LlamaIndex inserter.""" def __init__( self, diff --git a/gpt_index/langchain_helpers/memory_wrapper.py b/gpt_index/langchain_helpers/memory_wrapper.py index 6af9d1a0133bbdf496857f225254cdf042edd0b3..83ab6243135f471a5b2e59ca69c011fba3a141e8 100644 --- a/gpt_index/langchain_helpers/memory_wrapper.py +++ b/gpt_index/langchain_helpers/memory_wrapper.py @@ -1,4 +1,4 @@ -"""Langchain memory wrapper (for GPT Index).""" +"""Langchain memory wrapper (for LlamaIndex).""" from typing import Any, Dict, List, Optional @@ -24,14 +24,14 @@ def get_prompt_input_key(inputs: Dict[str, Any], memory_variables: List[str]) -> class GPTIndexMemory(Memory): - """Langchain memory wrapper (for GPT Index). + """Langchain memory wrapper (for LlamaIndex). Args: human_prefix (str): Prefix for human input. Defaults to "Human". ai_prefix (str): Prefix for AI output. Defaults to "AI". memory_key (str): Key for memory. Defaults to "history". - index (BaseGPTIndex): GPT Index instance. - query_kwargs (Dict[str, Any]): Keyword arguments for GPT Index query. + index (BaseGPTIndex): LlamaIndex instance. + query_kwargs (Dict[str, Any]): Keyword arguments for LlamaIndex query. input_key (Optional[str]): Input key. Defaults to None. output_key (Optional[str]): Output key. Defaults to None. diff --git a/gpt_index/prompts/base.py b/gpt_index/prompts/base.py index 7d31d14e71a85ec7b6245e0d2ffd714cce527961..89ab18aede69045fb44d425d802912f0a184e654 100644 --- a/gpt_index/prompts/base.py +++ b/gpt_index/prompts/base.py @@ -11,7 +11,7 @@ PMT = TypeVar("PMT", bound="Prompt") class Prompt: - """Prompt class for GPT Index. + """Prompt class for LlamaIndex. Wrapper around langchain's prompt class. Adds ability to: - enforce certain prompt types diff --git a/gpt_index/readers/__init__.py b/gpt_index/readers/__init__.py index 3b57d5cef1b31fef32708f799e49f222f2b5e62a..da973029082f83c60ad442f53a6f6ff4cd21cc11 100644 --- a/gpt_index/readers/__init__.py +++ b/gpt_index/readers/__init__.py @@ -1,6 +1,6 @@ -"""Data Connectors for GPT Index. +"""Data Connectors for LlamaIndex. -This module contains the data connectors for GPT Index. Each connector inherits +This module contains the data connectors for LlamaIndex. Each connector inherits from a `BaseReader` class, connects to a data source, and loads Document objects from that data source. diff --git a/gpt_index/readers/database.py b/gpt_index/readers/database.py index 4098c3e4bb1452ddb95150e467c23f0d503b34e1..f6123515a2b85e287009c54b28274e0560245836 100644 --- a/gpt_index/readers/database.py +++ b/gpt_index/readers/database.py @@ -13,7 +13,7 @@ from gpt_index.readers.schema.base import Document class DatabaseReader(BaseReader): """Simple Database reader. - Concatenates each row into Document used by GPT Index. + Concatenates each row into Document used by LlamaIndex. Args: sql_database (Optional[SQLDatabase]): SQL database to use, diff --git a/gpt_index/readers/faiss.py b/gpt_index/readers/faiss.py index 82f076eb7558c8648d1b13d5ccec126b5cb6a5f3..9512ff4d69fd5c7cf88aa0e9cd21d00617f6745e 100644 --- a/gpt_index/readers/faiss.py +++ b/gpt_index/readers/faiss.py @@ -12,7 +12,7 @@ class FaissReader(BaseReader): """Faiss reader. Retrieves documents through an existing in-memory Faiss index. - These documents can then be used in a downstream GPT Index data structure. + These documents can then be used in a downstream LlamaIndex data structure. If you wish use Faiss itself as an index to to organize documents, insert documents, and perform queries on them, please use GPTFaissIndex. diff --git a/gpt_index/readers/mongo.py b/gpt_index/readers/mongo.py index 2bc13de0428fd82ed9d665fc257405cf2de8929a..4aff20e3d3fafb9636ba6696bc89f964c68c8a1b 100644 --- a/gpt_index/readers/mongo.py +++ b/gpt_index/readers/mongo.py @@ -9,7 +9,7 @@ from gpt_index.readers.schema.base import Document class SimpleMongoReader(BaseReader): """Simple mongo reader. - Concatenates each Mongo doc into Document used by GPT Index. + Concatenates each Mongo doc into Document used by LlamaIndex. Args: host (str): Mongo host. diff --git a/gpt_index/readers/weaviate/data_structs.py b/gpt_index/readers/weaviate/data_structs.py index 460d2ae4fea64197bd58b237d3e9f7ac8ad5604e..3f42ab7bec5aadeea91e63b76f063f2a9bcad9cf 100644 --- a/gpt_index/readers/weaviate/data_structs.py +++ b/gpt_index/readers/weaviate/data_structs.py @@ -1,6 +1,6 @@ -"""Weaviate-specific serializers for GPT Index data structures. +"""Weaviate-specific serializers for LlamaIndex data structures. -Contain conversion to and from dataclasses that GPT Index uses. +Contain conversion to and from dataclasses that LlamaIndex uses. """ @@ -94,7 +94,7 @@ class BaseWeaviateIndexStruct(Generic[IS]): @classmethod @abstractmethod def _entry_to_gpt_index(cls, entry: Dict) -> IS: - """Convert to gpt index list.""" + """Convert to LlamaIndex list.""" @classmethod def to_gpt_index_list( @@ -104,7 +104,7 @@ class BaseWeaviateIndexStruct(Generic[IS]): vector: Optional[List[float]] = None, object_limit: Optional[int] = None, ) -> List[IS]: - """Convert to gpt index list.""" + """Convert to LlamaIndex list.""" validate_client(client) class_name = cls._class_name(class_prefix) properties = cls._get_common_properties() + cls._get_properties() @@ -133,11 +133,11 @@ class BaseWeaviateIndexStruct(Generic[IS]): @classmethod @abstractmethod def _from_gpt_index(cls, client: Any, index: IS, class_prefix: str) -> str: - """Convert from gpt index.""" + """Convert from LlamaIndex.""" @classmethod def from_gpt_index(cls, client: Any, index: IS, class_prefix: str) -> str: - """Convert from gpt index.""" + """Convert from LlamaIndex.""" validate_client(client) index_id = cls._from_gpt_index(client, index, class_prefix) client.batch.flush() @@ -180,7 +180,7 @@ class WeaviateNode(BaseWeaviateIndexStruct[Node]): @classmethod def _entry_to_gpt_index(cls, entry: Dict) -> Node: - """Convert to gpt index list.""" + """Convert to LlamaIndex list.""" extra_info_str = entry["extra_info"] if extra_info_str == "": extra_info = None @@ -205,7 +205,7 @@ class WeaviateNode(BaseWeaviateIndexStruct[Node]): @classmethod def _from_gpt_index(cls, client: Any, node: Node, class_prefix: str) -> str: - """Convert from gpt index.""" + """Convert from LlamaIndex.""" node_dict = node.to_dict() vector = node_dict.pop("embedding") extra_info = node_dict.pop("extra_info")