Skip to content
Snippets Groups Projects
Unverified Commit 7d793652 authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

feat: add examples and docs for readers (#323)

parent 55569220
No related branches found
No related tags found
No related merge requests found
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
## Reader Examples
These examples show how to use a specific reader class by loading a document and running a test query.
1. Make sure you are in `examples` directory
```bash
cd ./examples
```
2. Prepare `OPENAI_API_KEY` environment variable:
```bash
export OPENAI_API_KEY=your_openai_api_key
```
3. Run the following command to load documents and test query:
- MarkdownReader Example
```bash
npx ts-node readers/load-md.ts
```
- DocxReader Example
```bash
npx ts-node readers/load-docx.ts
```
- PdfReader Example
```bash
npx ts-node readers/load-pdf.ts
```
- HtmlReader Example
```bash
npx ts-node readers/load-html.ts
```
- CsvReader Example
```bash
npx ts-node readers/load-csv.ts
```
- NotionReader Example
```bash
export NOTION_TOKEN=your_notion_token
npx ts-node readers/load-notion.ts
```
- AssemblyAI Example
```bash
export ASSEMBLYAI_API_KEY=your_assemblyai_api_key
npx ts-node readers/load-assemblyai.ts
```
File moved
File moved
import { DocxReader, VectorStoreIndex } from "llamaindex";
const FILE_PATH = "./data/stars.docx";
const SAMPLE_QUERY = "Information about Zodiac";
async function main() {
// Load docx file
console.log("Loading data...");
const reader = new DocxReader();
const documents = await reader.loadData(FILE_PATH);
// Create embeddings
console.log("Creating embeddings...");
const index = await VectorStoreIndex.fromDocuments(documents);
// Test query
const queryEngine = index.asQueryEngine();
const response = await queryEngine.query(SAMPLE_QUERY);
console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString());
}
main();
File moved
import { MarkdownReader, VectorStoreIndex } from "llamaindex";
const FILE_PATH = "./data/planets.md";
const SAMPLE_QUERY = "List all planets";
async function main() {
// Load markdown file
console.log("Loading data...");
const reader = new MarkdownReader();
const documents = await reader.loadData(FILE_PATH);
// Create embeddings
console.log("Creating embeddings...");
const index = await VectorStoreIndex.fromDocuments(documents);
// Test query
const queryEngine = index.asQueryEngine();
const response = await queryEngine.query(SAMPLE_QUERY);
console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString());
}
main();
File moved
File moved
...@@ -21,6 +21,7 @@ export * from "./nodeParsers"; ...@@ -21,6 +21,7 @@ export * from "./nodeParsers";
export * from "./postprocessors"; export * from "./postprocessors";
export * from "./readers/AssemblyAI"; export * from "./readers/AssemblyAI";
export * from "./readers/CSVReader"; export * from "./readers/CSVReader";
export * from "./readers/DocxReader";
export * from "./readers/HTMLReader"; export * from "./readers/HTMLReader";
export * from "./readers/MarkdownReader"; export * from "./readers/MarkdownReader";
export * from "./readers/NotionReader"; export * from "./readers/NotionReader";
......
import { DocxReader } from "../../readers/DocxReader";
describe("DocxReader", () => {
let docxReader: DocxReader;
beforeEach(() => {
docxReader = new DocxReader();
});
describe("loadData", () => {
it("should load data from a docx file, return an array of documents and contain text", async () => {
const filePath = "../../examples/data/stars.docx";
const docs = await docxReader.loadData(filePath);
const docContent = docs.map((doc) => doc.text).join("");
expect(docs).toBeInstanceOf(Array);
expect(docContent).toContain("Venturing into the zodiac");
});
});
});
import { MarkdownReader } from "../../readers/MarkdownReader";
describe("MarkdownReader", () => {
let markdownReader: MarkdownReader;
beforeEach(() => {
markdownReader = new MarkdownReader();
});
describe("loadData", () => {
it("should load data from a markdown file, return an array of documents and contain text", async () => {
const filePath = "../../examples/data/planets.md";
const docs = await markdownReader.loadData(filePath);
const docContent = docs.map((doc) => doc.text).join("");
expect(docs).toBeInstanceOf(Array);
expect(docContent).toContain("Solar System");
});
});
});
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment