feat: add examples and docs for readers (#323)

7d793652 · Thuc Pham · GitHub · 55569220 · 7d793652 · 7d793652
Unverified Commit 7d793652 authored 1 year ago by Thuc Pham Committed by GitHub 1 year ago
--- a/examples/data/planets.md
+++ b/examples/data/planets.md
--- a/examples/data/stars.docx
+++ b/examples/data/stars.docx
--- a/examples/readers/README.md
+++ b/examples/readers/README.md
+## Reader Examples
+These examples show how to use a specific reader class by loading a document and running a test query.
+1. Make sure you are in `examples` directory
+```bash
+cd ./examples
+```
+2. Prepare `OPENAI_API_KEY` environment variable:
+```bash
+export OPENAI_API_KEY=your_openai_api_key
+```
+3. Run the following command to load documents and test query:
+- MarkdownReader Example
+```bash
+npx ts-node readers/load-md.ts
+```
+- DocxReader Example
+```bash
+npx ts-node readers/load-docx.ts
+```
+- PdfReader Example
+```bash
+npx ts-node readers/load-pdf.ts
+```
+- HtmlReader Example
+```bash
+npx ts-node readers/load-html.ts
+```
+- CsvReader Example
+```bash
+npx ts-node readers/load-csv.ts
+```
+- NotionReader Example
+```bash
+export NOTION_TOKEN=your_notion_token
+npx ts-node readers/load-notion.ts
+```
+- AssemblyAI Example
+```bash
+export ASSEMBLYAI_API_KEY=your_assemblyai_api_key
+npx ts-node readers/load-assemblyai.ts
+```
--- a/examples/assemblyai.ts
+++ b/examples/assemblyai.ts
--- a/examples/csv.ts
+++ b/examples/csv.ts
--- a/examples/readers/load-docx.ts
+++ b/examples/readers/load-docx.ts
+import { DocxReader, VectorStoreIndex } from "llamaindex";
+const FILE_PATH = "./data/stars.docx";
+const SAMPLE_QUERY = "Information about Zodiac";
+async function main() {
+  // Load docx file
+  console.log("Loading data...");
+  const reader = new DocxReader();
+  const documents = await reader.loadData(FILE_PATH);
+  // Create embeddings
+  console.log("Creating embeddings...");
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  // Test query
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query(SAMPLE_QUERY);
+  console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString());
+}
+main();
--- a/examples/html.ts
+++ b/examples/html.ts
--- a/examples/readers/load-md.ts
+++ b/examples/readers/load-md.ts
+import { MarkdownReader, VectorStoreIndex } from "llamaindex";
+const FILE_PATH = "./data/planets.md";
+const SAMPLE_QUERY = "List all planets";
+async function main() {
+  // Load markdown file
+  console.log("Loading data...");
+  const reader = new MarkdownReader();
+  const documents = await reader.loadData(FILE_PATH);
+  // Create embeddings
+  console.log("Creating embeddings...");
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  // Test query
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query(SAMPLE_QUERY);
+  console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString());
+}
+main();
--- a/examples/notion.ts
+++ b/examples/notion.ts
--- a/examples/pdf.ts
+++ b/examples/pdf.ts
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -21,6 +21,7 @@ export * from "./nodeParsers";
 export * from "./postprocessors";
 export * from "./readers/AssemblyAI";
 export * from "./readers/CSVReader";
+export * from "./readers/DocxReader";
 export * from "./readers/HTMLReader";
 export * from "./readers/MarkdownReader";
 export * from "./readers/NotionReader";

--- a/packages/core/src/tests/readers/DocxReader.test.ts
+++ b/packages/core/src/tests/readers/DocxReader.test.ts
+import { DocxReader } from "../../readers/DocxReader";
+describe("DocxReader", () => {
+  let docxReader: DocxReader;
+  beforeEach(() => {
+    docxReader = new DocxReader();
+  });
+  describe("loadData", () => {
+    it("should load data from a docx file, return an array of documents and contain text", async () => {
+      const filePath = "../../examples/data/stars.docx";
+      const docs = await docxReader.loadData(filePath);
+      const docContent = docs.map((doc) => doc.text).join("");
+      expect(docs).toBeInstanceOf(Array);
+      expect(docContent).toContain("Venturing into the zodiac");
+    });
+  });
+});
--- a/packages/core/src/tests/readers/MarkdownReader.test.ts
+++ b/packages/core/src/tests/readers/MarkdownReader.test.ts
+import { MarkdownReader } from "../../readers/MarkdownReader";
+describe("MarkdownReader", () => {
+  let markdownReader: MarkdownReader;
+  beforeEach(() => {
+    markdownReader = new MarkdownReader();
+  });
+  describe("loadData", () => {
+    it("should load data from a markdown file, return an array of documents and contain text", async () => {
+      const filePath = "../../examples/data/planets.md";
+      const docs = await markdownReader.loadData(filePath);
+      const docContent = docs.map((doc) => doc.text).join("");
+      expect(docs).toBeInstanceOf(Array);
+      expect(docContent).toContain("Solar System");
+    });
+  });
+});