From 20bc466ca114ed1558dc3eb798cd504ad0cd89b1 Mon Sep 17 00:00:00 2001
From: Alex Yang <himself65@outlook.com>
Date: Mon, 22 Apr 2024 15:14:06 -0500
Subject: [PATCH] chore: bump notion reader (#753)

---
 examples/readers/package.json             | 19 +++---
 examples/readers/src/notion.ts            |  4 +-
 packages/core/package.json                |  7 ++-
 packages/core/src/readers/NotionReader.ts | 39 ++++++------
 packages/edge/package.json                | 15 ++++-
 packages/edge/scripts/update-deps.js      |  2 +
 pnpm-lock.yaml                            | 75 +++++++++++------------
 7 files changed, 88 insertions(+), 73 deletions(-)

diff --git a/examples/readers/package.json b/examples/readers/package.json
index 8202eb59c..3889180ed 100644
--- a/examples/readers/package.json
+++ b/examples/readers/package.json
@@ -3,20 +3,21 @@
   "private": true,
   "type": "module",
   "scripts": {
-    "start": "node --loader ts-node/esm ./src/simple-directory-reader.ts",
-    "start:csv": "node --loader ts-node/esm ./src/csv.ts",
-    "start:docx": "node --loader ts-node/esm ./src/docx.ts",
-    "start:html": "node --loader ts-node/esm ./src/html.ts",
-    "start:markdown": "node --loader ts-node/esm ./src/markdown.ts",
-    "start:pdf": "node --loader ts-node/esm ./src/pdf.ts",
-    "start:llamaparse": "node --loader ts-node/esm ./src/llamaparse.ts"
+    "start": "node --import tsx ./src/simple-directory-reader.ts",
+    "start:csv": "node --import tsx ./src/csv.ts",
+    "start:docx": "node --import tsx ./src/docx.ts",
+    "start:html": "node --import tsx ./src/html.ts",
+    "start:markdown": "node --import tsx ./src/markdown.ts",
+    "start:pdf": "node --import tsx ./src/pdf.ts",
+    "start:llamaparse": "node --import tsx ./src/llamaparse.ts",
+    "start:notion": "node --import tsx ./src/notion.ts"
   },
   "dependencies": {
     "llamaindex": "*"
   },
   "devDependencies": {
     "@types/node": "^20.12.7",
-    "ts-node": "^10.9.2",
-    "typescript": "^5.4.3"
+    "tsx": "^4.7.2",
+    "typescript": "^5.4.5"
   }
 }
diff --git a/examples/readers/src/notion.ts b/examples/readers/src/notion.ts
index d6450af4b..439e92700 100644
--- a/examples/readers/src/notion.ts
+++ b/examples/readers/src/notion.ts
@@ -7,7 +7,7 @@ import { createInterface } from "node:readline/promises";
 
 program
   .argument("[page]", "Notion page id (must be provided)")
-  .action(async (page, _options, command) => {
+  .action(async (page, _options) => {
     // Initializing a client
 
     if (!process.env.NOTION_TOKEN) {
@@ -55,7 +55,7 @@ program
           .filter((page) => page !== null);
         console.log("Found pages:");
         console.table(pages);
-        console.log(`To run, run ts-node ${command.name()} [page id]`);
+        console.log(`To run, run with [page id]`);
         return;
       }
     }
diff --git a/packages/core/package.json b/packages/core/package.json
index 18409fbab..a38c95727 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -12,7 +12,6 @@
     "@llamaindex/cloud": "0.0.5",
     "@llamaindex/env": "workspace:*",
     "@mistralai/mistralai": "^0.1.3",
-    "@notionhq/client": "^2.2.15",
     "@pinecone-database/pinecone": "^2.2.0",
     "@qdrant/js-client-rest": "^1.8.2",
     "@types/lodash": "^4.17.0",
@@ -31,7 +30,7 @@
     "mammoth": "^1.7.1",
     "md-utils-ts": "^2.0.0",
     "mongodb": "^6.5.0",
-    "notion-md-crawler": "^0.0.2",
+    "notion-md-crawler": "^1.0.0",
     "ollama": "^0.5.0",
     "openai": "^4.38.0",
     "papaparse": "^5.4.1",
@@ -45,7 +44,11 @@
     "wikipedia": "^2.1.2",
     "wink-nlp": "^1.14.3"
   },
+  "peerDependencies": {
+    "@notionhq/client": "^2.2.15"
+  },
   "devDependencies": {
+    "@notionhq/client": "^2.2.15",
     "@swc/cli": "^0.3.12",
     "@swc/core": "^1.4.16",
     "concurrently": "^8.2.2",
diff --git a/packages/core/src/readers/NotionReader.ts b/packages/core/src/readers/NotionReader.ts
index bbac5d08f..4c645e7c2 100644
--- a/packages/core/src/readers/NotionReader.ts
+++ b/packages/core/src/readers/NotionReader.ts
@@ -1,21 +1,9 @@
-import type { Client } from "@notionhq/client";
-import type { Crawler, Pages } from "notion-md-crawler";
+import type { Crawler, CrawlerOptions, Page } from "notion-md-crawler";
 import { crawler, pageToString } from "notion-md-crawler";
 import { Document } from "../Node.js";
 import type { BaseReader } from "./type.js";
 
-type OptionalSerializers = Parameters<Crawler>[number]["serializers"];
-
-/**
- * Options for initializing the NotionReader class
- * @typedef {Object} NotionReaderOptions
- * @property {Client} client - The Notion Client object for API interactions
- * @property {OptionalSerializers} [serializers] - Option to customize serialization. See [the url](https://github.com/TomPenguin/notion-md-crawler/tree/main) for details.
- */
-type NotionReaderOptions = {
-  client: Client;
-  serializers?: OptionalSerializers;
-};
+type NotionReaderOptions = Pick<CrawlerOptions, "client" | "serializers">;
 
 /**
  * Notion pages are retrieved recursively and converted to Document objects.
@@ -25,7 +13,7 @@ type NotionReaderOptions = {
  * Please refer to [this document](https://www.notion.so/help/create-integrations-with-the-notion-api) for details.
  */
 export class NotionReader implements BaseReader {
-  private crawl: ReturnType<Crawler>;
+  private readonly crawl: ReturnType<Crawler>;
 
   /**
    * Constructor for the NotionReader class
@@ -37,10 +25,10 @@ export class NotionReader implements BaseReader {
 
   /**
    * Converts Pages to an array of Document objects
-   * @param {Pages} pages - The Notion pages to convert (Return value of `loadPages`)
+   * @param {Page} pages - The Notion pages to convert (Return value of `loadPages`)
    * @returns {Document[]} An array of Document objects
    */
-  toDocuments(pages: Pages): Document[] {
+  toDocuments(pages: Page[]): Document[] {
     return Object.values(pages).map((page) => {
       const text = pageToString(page);
       return new Document({
@@ -54,10 +42,21 @@ export class NotionReader implements BaseReader {
   /**
    * Loads recursively the Notion page with the specified root page ID.
    * @param {string} rootPageId - The root Notion page ID
-   * @returns {Promise<Pages>} A Promise that resolves to a Pages object(Convertible with the `toDocuments` method)
+   * @returns {Promise<Page[]>} A Promise that resolves to a Pages object(Convertible with the `toDocuments` method)
    */
-  async loadPages(rootPageId: string): Promise<Pages> {
-    return this.crawl(rootPageId);
+  async loadPages(rootPageId: string): Promise<Page[]> {
+    const iter = this.crawl(rootPageId);
+    const pages: Page[] = [];
+    for await (const result of iter) {
+      if (result.success) {
+        pages.push(result.page);
+      } else {
+        console.error(
+          `Failed to load page (${result.failure.parentId}): ${result.failure.reason}`,
+        );
+      }
+    }
+    return pages;
   }
 
   /**
diff --git a/packages/edge/package.json b/packages/edge/package.json
index 274dc56e8..2915237c9 100644
--- a/packages/edge/package.json
+++ b/packages/edge/package.json
@@ -11,7 +11,6 @@
     "@llamaindex/cloud": "0.0.5",
     "@llamaindex/env": "workspace:*",
     "@mistralai/mistralai": "^0.1.3",
-    "@notionhq/client": "^2.2.15",
     "@pinecone-database/pinecone": "^2.2.0",
     "@qdrant/js-client-rest": "^1.8.2",
     "@types/lodash": "^4.17.0",
@@ -30,7 +29,7 @@
     "mammoth": "^1.7.1",
     "md-utils-ts": "^2.0.0",
     "mongodb": "^6.5.0",
-    "notion-md-crawler": "^0.0.2",
+    "notion-md-crawler": "^1.0.0",
     "ollama": "^0.5.0",
     "openai": "^4.38.0",
     "papaparse": "^5.4.1",
@@ -82,5 +81,17 @@
     "update:deps": "node scripts/update-deps.js",
     "build:core": "pnpm --filter llamaindex build && cp -r ../core/dist . && rm -rf dist/cjs",
     "build": "pnpm run update:deps && pnpm run build:core && pnpm copy"
+  },
+  "devDependencies": {
+    "@notionhq/client": "^2.2.15",
+    "@swc/cli": "^0.3.12",
+    "@swc/core": "^1.4.16",
+    "concurrently": "^8.2.2",
+    "glob": "^10.3.12",
+    "madge": "^7.0.0",
+    "typescript": "^5.4.5"
+  },
+  "peerDependencies": {
+    "@notionhq/client": "^2.2.15"
   }
 }
diff --git a/packages/edge/scripts/update-deps.js b/packages/edge/scripts/update-deps.js
index f8c913d52..49d87278a 100644
--- a/packages/edge/scripts/update-deps.js
+++ b/packages/edge/scripts/update-deps.js
@@ -10,6 +10,8 @@ const edgePackagePath = path.join(process.cwd(), "package.json");
 const edgePackage = readJson(edgePackagePath);
 const corePackage = readJson(corePackagePath);
 edgePackage.dependencies = corePackage.dependencies;
+edgePackage.devDependencies = corePackage.devDependencies;
+edgePackage.peerDependencies = corePackage.peerDependencies;
 edgePackage.version = corePackage.version;
 writeJson(edgePackagePath, edgePackage);
 execSync("pnpm install --lockfile-only", { stdio: "inherit" });
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index a0a8004b2..b5b611bc0 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -178,12 +178,12 @@ importers:
       '@types/node':
         specifier: ^20.12.7
         version: 20.12.7
-      ts-node:
-        specifier: ^10.9.2
-        version: 10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.3)
+      tsx:
+        specifier: ^4.7.2
+        version: 4.7.2
       typescript:
-        specifier: ^5.4.3
-        version: 5.4.3
+        specifier: ^5.4.5
+        version: 5.4.5
 
   packages/core:
     dependencies:
@@ -208,9 +208,6 @@ importers:
       '@mistralai/mistralai':
         specifier: ^0.1.3
         version: 0.1.3(encoding@0.1.13)
-      '@notionhq/client':
-        specifier: ^2.2.15
-        version: 2.2.15(encoding@0.1.13)
       '@pinecone-database/pinecone':
         specifier: ^2.2.0
         version: 2.2.0
@@ -266,8 +263,8 @@ importers:
         specifier: ^6.5.0
         version: 6.5.0
       notion-md-crawler:
-        specifier: ^0.0.2
-        version: 0.0.2(encoding@0.1.13)
+        specifier: ^1.0.0
+        version: 1.0.0(encoding@0.1.13)
       ollama:
         specifier: ^0.5.0
         version: 0.5.0
@@ -305,6 +302,9 @@ importers:
         specifier: ^1.14.3
         version: 1.14.3
     devDependencies:
+      '@notionhq/client':
+        specifier: ^2.2.15
+        version: 2.2.15(encoding@0.1.13)
       '@swc/cli':
         specifier: ^0.3.12
         version: 0.3.12(@swc/core@1.4.16(@swc/helpers@0.5.2))(chokidar@3.6.0)
@@ -371,9 +371,6 @@ importers:
       '@mistralai/mistralai':
         specifier: ^0.1.3
         version: 0.1.3(encoding@0.1.13)
-      '@notionhq/client':
-        specifier: ^2.2.15
-        version: 2.2.15(encoding@0.1.13)
       '@pinecone-database/pinecone':
         specifier: ^2.2.0
         version: 2.2.0
@@ -429,8 +426,8 @@ importers:
         specifier: ^6.5.0
         version: 6.5.0
       notion-md-crawler:
-        specifier: ^0.0.2
-        version: 0.0.2(encoding@0.1.13)
+        specifier: ^1.0.0
+        version: 1.0.0(encoding@0.1.13)
       ollama:
         specifier: ^0.5.0
         version: 0.5.0
@@ -467,6 +464,28 @@ importers:
       wink-nlp:
         specifier: ^1.14.3
         version: 1.14.3
+    devDependencies:
+      '@notionhq/client':
+        specifier: ^2.2.15
+        version: 2.2.15(encoding@0.1.13)
+      '@swc/cli':
+        specifier: ^0.3.12
+        version: 0.3.12(@swc/core@1.4.16(@swc/helpers@0.5.2))(chokidar@3.6.0)
+      '@swc/core':
+        specifier: ^1.4.16
+        version: 1.4.16(@swc/helpers@0.5.2)
+      concurrently:
+        specifier: ^8.2.2
+        version: 8.2.2
+      glob:
+        specifier: ^10.3.12
+        version: 10.3.12
+      madge:
+        specifier: ^7.0.0
+        version: 7.0.0(typescript@5.4.5)
+      typescript:
+        specifier: ^5.4.5
+        version: 5.4.5
 
   packages/edge/e2e/test-edge-runtime:
     dependencies:
@@ -6125,8 +6144,8 @@ packages:
     resolution: {integrity: sha512-IO9QvjUMWxPQQhs60oOu10CRkWCiZzSUkzbXGGV9pviYl1fXYcvkzQ5jV9z8Y6un8ARoVRl4EtC6v6jNqbaJ/w==}
     engines: {node: '>=14.16'}
 
-  notion-md-crawler@0.0.2:
-    resolution: {integrity: sha512-lE3/DFMrg7GSbl1sBfDuLVLyxw+yjdarPVm1JGfQ6eONEbNGgO+BdZxpwwZQ1uYeEJurAXMXb/AXT8GKYjKAyg==}
+  notion-md-crawler@1.0.0:
+    resolution: {integrity: sha512-mdB6zn/i32qO2C7X7wZLDpWvFryO3bPYMuBfFgmTPomnfEtIejdQJNVaZzw2GapM82lfWZ5dfsZp3s3UL4p1Fg==}
 
   npm-run-path@2.0.2:
     resolution: {integrity: sha512-lJxZYlT4DW/bRUtFh1MQIWqmLwQfAxnqWG4HhEdjMlkrJYnJn0Jrr2u3mgxqaWsdiBc76TYkTG/mhrnYTuzfHw==}
@@ -15765,7 +15784,7 @@ snapshots:
 
   normalize-url@8.0.1: {}
 
-  notion-md-crawler@0.0.2(encoding@0.1.13):
+  notion-md-crawler@1.0.0(encoding@0.1.13):
     dependencies:
       '@notionhq/client': 2.2.15(encoding@0.1.13)
       md-utils-ts: 2.0.0
@@ -17780,26 +17799,6 @@ snapshots:
 
   ts-graphviz@1.8.2: {}
 
-  ts-node@10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.3):
-    dependencies:
-      '@cspotcode/source-map-support': 0.8.1
-      '@tsconfig/node10': 1.0.9
-      '@tsconfig/node12': 1.0.11
-      '@tsconfig/node14': 1.0.3
-      '@tsconfig/node16': 1.0.4
-      '@types/node': 20.12.7
-      acorn: 8.11.3
-      acorn-walk: 8.3.2
-      arg: 4.1.3
-      create-require: 1.1.1
-      diff: 4.0.2
-      make-error: 1.3.6
-      typescript: 5.4.3
-      v8-compile-cache-lib: 3.0.1
-      yn: 3.1.1
-    optionalDependencies:
-      '@swc/core': 1.4.16(@swc/helpers@0.5.2)
-
   ts-node@10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.5):
     dependencies:
       '@cspotcode/source-map-support': 0.8.1
-- 
GitLab