diff --git a/examples/data/planets.md b/examples/data/planets.md new file mode 100644 index 0000000000000000000000000000000000000000..97958b76a7b16ef831ec5a7f2527aa5975480a3e Binary files /dev/null and b/examples/data/planets.md differ diff --git a/examples/data/stars.docx b/examples/data/stars.docx new file mode 100644 index 0000000000000000000000000000000000000000..e7c9f0f884093072bc5cf12befb693fe2857ffaa Binary files /dev/null and b/examples/data/stars.docx differ diff --git a/examples/readers/README.md b/examples/readers/README.md new file mode 100644 index 0000000000000000000000000000000000000000..15be84a34000f64eac8cf812919576935caf0efb --- /dev/null +++ b/examples/readers/README.md @@ -0,0 +1,61 @@ +## Reader Examples + +These examples show how to use a specific reader class by loading a document and running a test query. + +1. Make sure you are in `examples` directory + +```bash +cd ./examples +``` + +2. Prepare `OPENAI_API_KEY` environment variable: + +```bash +export OPENAI_API_KEY=your_openai_api_key +``` + +3. Run the following command to load documents and test query: + +- MarkdownReader Example + +```bash +npx ts-node readers/load-md.ts +``` + +- DocxReader Example + +```bash +npx ts-node readers/load-docx.ts +``` + +- PdfReader Example + +```bash +npx ts-node readers/load-pdf.ts +``` + +- HtmlReader Example + +```bash +npx ts-node readers/load-html.ts +``` + +- CsvReader Example + +```bash +npx ts-node readers/load-csv.ts +``` + +- NotionReader Example + +```bash +export NOTION_TOKEN=your_notion_token +npx ts-node readers/load-notion.ts +``` + +- AssemblyAI Example + +```bash +export ASSEMBLYAI_API_KEY=your_assemblyai_api_key +npx ts-node readers/load-assemblyai.ts +``` diff --git a/examples/assemblyai.ts b/examples/readers/load-assemblyai.ts similarity index 100% rename from examples/assemblyai.ts rename to examples/readers/load-assemblyai.ts diff --git a/examples/csv.ts b/examples/readers/load-csv.ts similarity index 100% rename from examples/csv.ts rename to examples/readers/load-csv.ts diff --git a/examples/readers/load-docx.ts b/examples/readers/load-docx.ts new file mode 100644 index 0000000000000000000000000000000000000000..61a8b314a6038e35bb7551493f7bb828c78d5a66 --- /dev/null +++ b/examples/readers/load-docx.ts @@ -0,0 +1,22 @@ +import { DocxReader, VectorStoreIndex } from "llamaindex"; + +const FILE_PATH = "./data/stars.docx"; +const SAMPLE_QUERY = "Information about Zodiac"; + +async function main() { + // Load docx file + console.log("Loading data..."); + const reader = new DocxReader(); + const documents = await reader.loadData(FILE_PATH); + + // Create embeddings + console.log("Creating embeddings..."); + const index = await VectorStoreIndex.fromDocuments(documents); + + // Test query + const queryEngine = index.asQueryEngine(); + const response = await queryEngine.query(SAMPLE_QUERY); + console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString()); +} + +main(); diff --git a/examples/html.ts b/examples/readers/load-html.ts similarity index 100% rename from examples/html.ts rename to examples/readers/load-html.ts diff --git a/examples/readers/load-md.ts b/examples/readers/load-md.ts new file mode 100644 index 0000000000000000000000000000000000000000..bebc7a2ec14c04e2109712bc6f3a46223c952ccb --- /dev/null +++ b/examples/readers/load-md.ts @@ -0,0 +1,22 @@ +import { MarkdownReader, VectorStoreIndex } from "llamaindex"; + +const FILE_PATH = "./data/planets.md"; +const SAMPLE_QUERY = "List all planets"; + +async function main() { + // Load markdown file + console.log("Loading data..."); + const reader = new MarkdownReader(); + const documents = await reader.loadData(FILE_PATH); + + // Create embeddings + console.log("Creating embeddings..."); + const index = await VectorStoreIndex.fromDocuments(documents); + + // Test query + const queryEngine = index.asQueryEngine(); + const response = await queryEngine.query(SAMPLE_QUERY); + console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString()); +} + +main(); diff --git a/examples/notion.ts b/examples/readers/load-notion.ts similarity index 100% rename from examples/notion.ts rename to examples/readers/load-notion.ts diff --git a/examples/pdf.ts b/examples/readers/load-pdf.ts similarity index 100% rename from examples/pdf.ts rename to examples/readers/load-pdf.ts diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 8676bd393b200e231a57082a18fe336bccdfd385..c0eda73767afac895eb462b69e07d8b47548bdd8 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -21,6 +21,7 @@ export * from "./nodeParsers"; export * from "./postprocessors"; export * from "./readers/AssemblyAI"; export * from "./readers/CSVReader"; +export * from "./readers/DocxReader"; export * from "./readers/HTMLReader"; export * from "./readers/MarkdownReader"; export * from "./readers/NotionReader"; diff --git a/packages/core/src/tests/readers/DocxReader.test.ts b/packages/core/src/tests/readers/DocxReader.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..dc6ae4eb912752e63bf47776feb27f5f9d71569f --- /dev/null +++ b/packages/core/src/tests/readers/DocxReader.test.ts @@ -0,0 +1,20 @@ +import { DocxReader } from "../../readers/DocxReader"; + +describe("DocxReader", () => { + let docxReader: DocxReader; + + beforeEach(() => { + docxReader = new DocxReader(); + }); + + describe("loadData", () => { + it("should load data from a docx file, return an array of documents and contain text", async () => { + const filePath = "../../examples/data/stars.docx"; + const docs = await docxReader.loadData(filePath); + const docContent = docs.map((doc) => doc.text).join(""); + + expect(docs).toBeInstanceOf(Array); + expect(docContent).toContain("Venturing into the zodiac"); + }); + }); +}); diff --git a/packages/core/src/tests/readers/MarkdownReader.test.ts b/packages/core/src/tests/readers/MarkdownReader.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..7ab4f22bec7789a88c6be1127d3dfef7cd2606c9 --- /dev/null +++ b/packages/core/src/tests/readers/MarkdownReader.test.ts @@ -0,0 +1,20 @@ +import { MarkdownReader } from "../../readers/MarkdownReader"; + +describe("MarkdownReader", () => { + let markdownReader: MarkdownReader; + + beforeEach(() => { + markdownReader = new MarkdownReader(); + }); + + describe("loadData", () => { + it("should load data from a markdown file, return an array of documents and contain text", async () => { + const filePath = "../../examples/data/planets.md"; + const docs = await markdownReader.loadData(filePath); + const docContent = docs.map((doc) => doc.text).join(""); + + expect(docs).toBeInstanceOf(Array); + expect(docContent).toContain("Solar System"); + }); + }); +});