From a6c1eab76214f473bd142f1f83acd5e73cf2e0b2 Mon Sep 17 00:00:00 2001
From: Jingyi Zhao <zhao.elton@gmail.com>
Date: Sat, 30 Nov 2024 01:39:59 -0500
Subject: [PATCH] feat: data connector for obsidian note taking app (#1529)

---
 examples/readers/package.json    |  3 ++-
 examples/readers/src/obsidian.ts | 12 ++++++++++
 packages/readers/package.json    | 19 +++++++++++++++
 packages/readers/src/obsidian.ts | 41 ++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 examples/readers/src/obsidian.ts
 create mode 100644 packages/readers/src/obsidian.ts

diff --git a/examples/readers/package.json b/examples/readers/package.json
index 4cc21d942..092449c8e 100644
--- a/examples/readers/package.json
+++ b/examples/readers/package.json
@@ -15,7 +15,8 @@
     "start:llamaparse-dir": "node --import tsx ./src/simple-directory-reader-with-llamaparse.ts",
     "start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
     "start:discord": "node --import tsx ./src/discord.ts",
-    "start:json": "node --import tsx ./src/json.ts"
+    "start:json": "node --import tsx ./src/json.ts",
+    "start:obsidian": "node --import tsx ./src/obsidian.ts"
   },
   "dependencies": {
     "@llamaindex/readers": "*",
diff --git a/examples/readers/src/obsidian.ts b/examples/readers/src/obsidian.ts
new file mode 100644
index 000000000..7fc233d32
--- /dev/null
+++ b/examples/readers/src/obsidian.ts
@@ -0,0 +1,12 @@
+import { ObsidianReader } from "@llamaindex/readers/obsidian";
+
+const obsidianReader = new ObsidianReader(
+  "/Users/jingyi/Documents/jingyi-vault",
+);
+
+obsidianReader.loadData().then((documents) => {
+  console.log("documents:", documents.length);
+  documents.forEach((doc) => {
+    console.log(`document (${doc.id_}):`, doc.getText());
+  });
+});
diff --git a/packages/readers/package.json b/packages/readers/package.json
index ff25bea1c..e1cc00489 100644
--- a/packages/readers/package.json
+++ b/packages/readers/package.json
@@ -142,6 +142,24 @@
         "default": "./notion/dist/index.js"
       }
     },
+    "./obsidian": {
+      "edge-light": {
+        "types": "./obsidian/dist/index.edge-light.d.ts",
+        "default": "./obsidian/dist/index.edge-light.js"
+      },
+      "workerd": {
+        "types": "./obsidian/dist/index.workerd.d.ts",
+        "default": "./obsidian/dist/index.workerd.js"
+      },
+      "require": {
+        "types": "./obsidian/dist/index.d.cts",
+        "default": "./obsidian/dist/index.cjs"
+      },
+      "import": {
+        "types": "./obsidian/dist/index.d.ts",
+        "default": "./obsidian/dist/index.js"
+      }
+    },
     "./pdf": {
       "require": {
         "types": "./pdf/dist/index.d.cts",
@@ -176,6 +194,7 @@
     "markdown",
     "mongo",
     "notion",
+    "obsidian",
     "pdf",
     "text",
     "node"
diff --git a/packages/readers/src/obsidian.ts b/packages/readers/src/obsidian.ts
new file mode 100644
index 000000000..9913fc75c
--- /dev/null
+++ b/packages/readers/src/obsidian.ts
@@ -0,0 +1,41 @@
+import { type BaseReader, Document } from "@llamaindex/core/schema";
+import * as fs from "node:fs";
+import path from "node:path";
+import { MarkdownReader } from "./markdown";
+
+export class ObsidianReader implements BaseReader<Document> {
+  protected inputDir: string;
+  protected docs: Document[] = [];
+
+  constructor(inputDir: string) {
+    this.inputDir = inputDir;
+  }
+
+  private async processPath(file: fs.Dirent, filepath: string) {
+    if (file.isDirectory() && !file.name.startsWith(".")) {
+      await this.readFromPath(filepath);
+    } else if (file.isFile() && file.name.endsWith(".md")) {
+      await this.convertToDocuments(filepath);
+    } else {
+      console.log(`Skipping ${filepath}`);
+    }
+  }
+
+  private async readFromPath(dir: string) {
+    const files = await fs.promises.readdir(dir, { withFileTypes: true });
+    for (const file of files) {
+      const filepath = path.join(dir, file.name);
+      await this.processPath(file, filepath);
+    }
+  }
+
+  private async convertToDocuments(filepath: string) {
+    const content = await new MarkdownReader().loadData(filepath);
+    this.docs.push(...content);
+  }
+
+  async loadData(): Promise<Document[]> {
+    await this.readFromPath(this.inputDir);
+    return this.docs;
+  }
+}
-- 
GitLab