Skip to content
Snippets Groups Projects
Unverified Commit 620c63cd authored by Alex Yang's avatar Alex Yang Committed by GitHub
Browse files

feat: add `@llamaindex/readers` package (#1404)

parent cb51ad90
No related branches found
No related tags found
No related merge requests found
Showing
with 150 additions and 22 deletions
---
"@llamaindex/core": patch
"llamaindex": patch
---
feat: add `@llamaindex/readers` package
If you are using import `llamaindex/readers/...`,
you will need to install `@llamaindex/core` and change import path to `@llamaindex/readers/...`.
......@@ -15,6 +15,7 @@
"@llamaindex/cloud": "workspace:*",
"@llamaindex/core": "workspace:*",
"@llamaindex/openai": "workspace:*",
"@llamaindex/readers": "workspace:*",
"@mdx-js/mdx": "^3.1.0",
"@radix-ui/react-dialog": "^1.1.2",
"@radix-ui/react-icons": "^1.3.0",
......@@ -32,6 +33,7 @@
"fumadocs-twoslash": "^2.0.0",
"fumadocs-ui": "14.0.2",
"hast-util-to-jsx-runtime": "^2.3.2",
"llamaindex": "workspace:*",
"lucide-react": "^0.436.0",
"next": "15.0.1",
"next-themes": "^0.3.0",
......
......@@ -5,7 +5,7 @@ description: Install llamaindex by running a single command.
import { Tab, Tabs } from "fumadocs-ui/components/tabs";
<Tabs items={["npm", "yarn", "pnpm"]}>
<Tabs groupId="install-llamaindex" items={["npm", "yarn", "pnpm"]} persist>
```shell tab="npm"
npm install llamaindex
```
......
......@@ -2,5 +2,12 @@
"title": "LlamaIndex",
"description": "The Data framework for LLM",
"root": true,
"pages": ["---Guide---", "what-is-llamaindex", "index", "setup", "starter"]
"pages": [
"---Guide---",
"what-is-llamaindex",
"index",
"setup",
"starter",
"readers"
]
}
---
title: Document and Nodes
description: llamaindex readers is a collection of readers for different file formats.
---
import { Tab, Tabs } from "fumadocs-ui/components/tabs";
import { Accordion, Accordions } from 'fumadocs-ui/components/accordion';
<Accordions>
<Accordion title="Install @llamaindex/readers">
If you want to only use reader modules, you can install `@llamaindex/readers`
<Tabs groupId="install-llamaindex" items={["npm", "yarn", "pnpm"]} persist>
```shell tab="npm"
npm install @llamaindex/readers
```
```shell tab="yarn"
yarn add @llamaindex/readers
```
```shell tab="pnpm"
pnpm add @llamaindex/readers
```
</Tabs>
</Accordion>
</Accordions>
We offer readers for different file formats.
<Tabs groupId="llamaindex-or-readers" items={["llamaindex", "@llamaindex/readers"]} persist>
```ts twoslash tab="llamaindex"
import { CSVReader } from 'llamaindex'
import { PDFReader } from 'llamaindex'
import { JSONReader } from 'llamaindex'
import { MarkdownReader } from 'llamaindex'
import { HTMLReader } from 'llamaindex'
// you can find more readers in the documentation
```
```ts twoslash tab="@llamaindex/readers"
import { CSVReader } from '@llamaindex/readers/csv'
import { PDFReader } from '@llamaindex/readers/pdf'
import { JSONReader } from '@llamaindex/readers/json'
import { MarkdownReader } from '@llamaindex/readers/markdown'
import { HTMLReader } from '@llamaindex/readers/html'
// you can find more readers in the documentation
```
</Tabs>
## SimpleDirectoryReader
`SimpleDirectoryReader` is the simplest way to load data from local files into LlamaIndex.
<Tabs groupId="llamaindex-or-readers" items={["llamaindex", "@llamaindex/readers"]} persist>
```ts twoslash tab="llamaindex"
import { SimpleDirectoryReader } from "llamaindex";
const reader = new SimpleDirectoryReader()
const documents = await reader.loadData("./data")
// ^?
const texts = documents.map(doc => doc.getText())
// ^?
```
```ts twoslash tab="@llamaindex/readers"
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
const reader = new SimpleDirectoryReader()
const documents = await reader.loadData("./data")
// ^?
const texts = documents.map(doc => doc.getText())
// ^?
```
</Tabs>
## Load file natively using Node.js Customization Hooks
We have a helper utility to allow you to import a file in Node.js script.
<Tabs groupId="llamaindex-or-readers" items={["llamaindex", "@llamaindex/readers"]} persist>
```shell tab="llamaindex"
node --import llamaindex/register ./script.js
```
```shell tab="@llamaindex/readers"
node --import @llamaindex/readers/node ./script.js
```
</Tabs>
```ts
import csv from './path/to/data.csv';
const text = csv.getText()
```
\ No newline at end of file
{
"title": "Loading",
"description": "File Readers Collection",
"pages": ["index"]
}
import {
AstraDBVectorStore,
PapaCSVReader,
CSVReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
......@@ -9,7 +9,7 @@ const collectionName = "movie_reviews";
async function main() {
try {
const reader = new PapaCSVReader(false);
const reader = new CSVReader(false);
const docs = await reader.loadData("./data/movie_reviews.csv");
const astraVS = new AstraDBVectorStore({ contentKey: "reviewtext" });
......
import {
ChromaVectorStore,
PapaCSVReader,
CSVReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
......@@ -12,9 +12,7 @@ async function main() {
try {
console.log(`Loading data from ${sourceFile}`);
const reader = new PapaCSVReader(false, ", ", "\n", {
header: true,
});
const reader = new CSVReader(false, ", ", "\n");
const docs = await reader.loadData(sourceFile);
console.log("Creating ChromaDB vector store");
......
import {
CSVReader,
MilvusVectorStore,
PapaCSVReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
......@@ -9,7 +9,7 @@ const collectionName = "movie_reviews";
async function main() {
try {
const reader = new PapaCSVReader(false);
const reader = new CSVReader(false);
const docs = await reader.loadData("./data/movie_reviews.csv");
const vectorStore = new MilvusVectorStore({ collection: collectionName });
......
/* eslint-disable turbo/no-undeclared-env-vars */
import { SimpleMongoReader } from "@llamaindex/readers/mongo";
import * as dotenv from "dotenv";
import {
MongoDBAtlasVectorSearch,
SimpleMongoReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
......
......@@ -7,6 +7,7 @@
"@azure/identity": "^4.4.1",
"@datastax/astra-db-ts": "^1.4.1",
"@llamaindex/core": "^0.3.4",
"@llamaindex/readers": "^0.0.1",
"@notionhq/client": "^2.2.15",
"@pinecone-database/pinecone": "^3.0.2",
"@vercel/postgres": "^0.10.0",
......
......@@ -18,6 +18,7 @@
"start:json": "node --import tsx ./src/json.ts"
},
"dependencies": {
"@llamaindex/readers": "*",
"llamaindex": "*"
},
"devDependencies": {
......
import { AudioTranscriptReader } from "@llamaindex/readers/assembly-ai";
import { program } from "commander";
import { TranscribeParams, VectorStoreIndex } from "llamaindex";
import { AudioTranscriptReader } from "llamaindex/readers/AssemblyAIReader";
import { stdin as input, stdout as output } from "node:process";
import { createInterface } from "node:readline/promises";
......
import { CSVReader } from "@llamaindex/readers/csv";
import {
getResponseSynthesizer,
OpenAI,
......@@ -5,13 +6,12 @@ import {
Settings,
VectorStoreIndex,
} from "llamaindex";
import { PapaCSVReader } from "llamaindex/readers/CSVReader";
Settings.llm = new OpenAI({ model: "gpt-4" });
async function main() {
// Load CSV
const reader = new PapaCSVReader();
const reader = new CSVReader();
const path = "../data/titanic_train.csv";
const documents = await reader.loadData(path);
......
import { TextFileReader } from "@llamaindex/readers/text";
import type { Document, Metadata } from "llamaindex";
import { FileReader } from "llamaindex";
import {
FILE_EXT_TO_READER,
FileReader,
SimpleDirectoryReader,
} from "llamaindex/readers/SimpleDirectoryReader";
import { TextFileReader } from "llamaindex/readers/TextFileReader";
} from "llamaindex";
class ZipReader extends FileReader {
loadDataAsContent(fileContent: Uint8Array): Promise<Document<Metadata>[]> {
......
import { DiscordReader } from "llamaindex";
import { DiscordReader } from "@llamaindex/readers/discord";
async function main() {
// Create an instance of the DiscordReader. Set token here or DISCORD_TOKEN environment variable
......
import { DocxReader } from "@llamaindex/readers/docx";
import { VectorStoreIndex } from "llamaindex";
import { DocxReader } from "llamaindex/readers/DocxReader";
const FILE_PATH = "../data/stars.docx";
const SAMPLE_QUERY = "Information about Zodiac";
......
import { HTMLReader } from "@llamaindex/readers/html";
import { VectorStoreIndex } from "llamaindex";
import { HTMLReader } from "llamaindex/readers/HTMLReader";
async function main() {
// Load page
......
import { JSONReader } from "llamaindex";
import { JSONReader } from "@llamaindex/readers/json";
async function main() {
// Data
......
import { MarkdownReader } from "@llamaindex/readers/markdown";
import { VectorStoreIndex } from "llamaindex";
import { MarkdownReader } from "llamaindex/readers/MarkdownReader";
const FILE_PATH = "../data/planets.md";
const SAMPLE_QUERY = "List all planets";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment