From a6c1eab76214f473bd142f1f83acd5e73cf2e0b2 Mon Sep 17 00:00:00 2001 From: Jingyi Zhao <zhao.elton@gmail.com> Date: Sat, 30 Nov 2024 01:39:59 -0500 Subject: [PATCH] feat: data connector for obsidian note taking app (#1529) --- examples/readers/package.json | 3 ++- examples/readers/src/obsidian.ts | 12 ++++++++++ packages/readers/package.json | 19 +++++++++++++++ packages/readers/src/obsidian.ts | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 examples/readers/src/obsidian.ts create mode 100644 packages/readers/src/obsidian.ts diff --git a/examples/readers/package.json b/examples/readers/package.json index 4cc21d942..092449c8e 100644 --- a/examples/readers/package.json +++ b/examples/readers/package.json @@ -15,7 +15,8 @@ "start:llamaparse-dir": "node --import tsx ./src/simple-directory-reader-with-llamaparse.ts", "start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts", "start:discord": "node --import tsx ./src/discord.ts", - "start:json": "node --import tsx ./src/json.ts" + "start:json": "node --import tsx ./src/json.ts", + "start:obsidian": "node --import tsx ./src/obsidian.ts" }, "dependencies": { "@llamaindex/readers": "*", diff --git a/examples/readers/src/obsidian.ts b/examples/readers/src/obsidian.ts new file mode 100644 index 000000000..7fc233d32 --- /dev/null +++ b/examples/readers/src/obsidian.ts @@ -0,0 +1,12 @@ +import { ObsidianReader } from "@llamaindex/readers/obsidian"; + +const obsidianReader = new ObsidianReader( + "/Users/jingyi/Documents/jingyi-vault", +); + +obsidianReader.loadData().then((documents) => { + console.log("documents:", documents.length); + documents.forEach((doc) => { + console.log(`document (${doc.id_}):`, doc.getText()); + }); +}); diff --git a/packages/readers/package.json b/packages/readers/package.json index ff25bea1c..e1cc00489 100644 --- a/packages/readers/package.json +++ b/packages/readers/package.json @@ -142,6 +142,24 @@ "default": "./notion/dist/index.js" } }, + "./obsidian": { + "edge-light": { + "types": "./obsidian/dist/index.edge-light.d.ts", + "default": "./obsidian/dist/index.edge-light.js" + }, + "workerd": { + "types": "./obsidian/dist/index.workerd.d.ts", + "default": "./obsidian/dist/index.workerd.js" + }, + "require": { + "types": "./obsidian/dist/index.d.cts", + "default": "./obsidian/dist/index.cjs" + }, + "import": { + "types": "./obsidian/dist/index.d.ts", + "default": "./obsidian/dist/index.js" + } + }, "./pdf": { "require": { "types": "./pdf/dist/index.d.cts", @@ -176,6 +194,7 @@ "markdown", "mongo", "notion", + "obsidian", "pdf", "text", "node" diff --git a/packages/readers/src/obsidian.ts b/packages/readers/src/obsidian.ts new file mode 100644 index 000000000..9913fc75c --- /dev/null +++ b/packages/readers/src/obsidian.ts @@ -0,0 +1,41 @@ +import { type BaseReader, Document } from "@llamaindex/core/schema"; +import * as fs from "node:fs"; +import path from "node:path"; +import { MarkdownReader } from "./markdown"; + +export class ObsidianReader implements BaseReader<Document> { + protected inputDir: string; + protected docs: Document[] = []; + + constructor(inputDir: string) { + this.inputDir = inputDir; + } + + private async processPath(file: fs.Dirent, filepath: string) { + if (file.isDirectory() && !file.name.startsWith(".")) { + await this.readFromPath(filepath); + } else if (file.isFile() && file.name.endsWith(".md")) { + await this.convertToDocuments(filepath); + } else { + console.log(`Skipping ${filepath}`); + } + } + + private async readFromPath(dir: string) { + const files = await fs.promises.readdir(dir, { withFileTypes: true }); + for (const file of files) { + const filepath = path.join(dir, file.name); + await this.processPath(file, filepath); + } + } + + private async convertToDocuments(filepath: string) { + const content = await new MarkdownReader().loadData(filepath); + this.docs.push(...content); + } + + async loadData(): Promise<Document[]> { + await this.readFromPath(this.inputDir); + return this.docs; + } +} -- GitLab