From 20bc466ca114ed1558dc3eb798cd504ad0cd89b1 Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Mon, 22 Apr 2024 15:14:06 -0500 Subject: [PATCH] chore: bump notion reader (#753) --- examples/readers/package.json | 19 +++--- examples/readers/src/notion.ts | 4 +- packages/core/package.json | 7 ++- packages/core/src/readers/NotionReader.ts | 39 ++++++------ packages/edge/package.json | 15 ++++- packages/edge/scripts/update-deps.js | 2 + pnpm-lock.yaml | 75 +++++++++++------------ 7 files changed, 88 insertions(+), 73 deletions(-) diff --git a/examples/readers/package.json b/examples/readers/package.json index 8202eb59c..3889180ed 100644 --- a/examples/readers/package.json +++ b/examples/readers/package.json @@ -3,20 +3,21 @@ "private": true, "type": "module", "scripts": { - "start": "node --loader ts-node/esm ./src/simple-directory-reader.ts", - "start:csv": "node --loader ts-node/esm ./src/csv.ts", - "start:docx": "node --loader ts-node/esm ./src/docx.ts", - "start:html": "node --loader ts-node/esm ./src/html.ts", - "start:markdown": "node --loader ts-node/esm ./src/markdown.ts", - "start:pdf": "node --loader ts-node/esm ./src/pdf.ts", - "start:llamaparse": "node --loader ts-node/esm ./src/llamaparse.ts" + "start": "node --import tsx ./src/simple-directory-reader.ts", + "start:csv": "node --import tsx ./src/csv.ts", + "start:docx": "node --import tsx ./src/docx.ts", + "start:html": "node --import tsx ./src/html.ts", + "start:markdown": "node --import tsx ./src/markdown.ts", + "start:pdf": "node --import tsx ./src/pdf.ts", + "start:llamaparse": "node --import tsx ./src/llamaparse.ts", + "start:notion": "node --import tsx ./src/notion.ts" }, "dependencies": { "llamaindex": "*" }, "devDependencies": { "@types/node": "^20.12.7", - "ts-node": "^10.9.2", - "typescript": "^5.4.3" + "tsx": "^4.7.2", + "typescript": "^5.4.5" } } diff --git a/examples/readers/src/notion.ts b/examples/readers/src/notion.ts index d6450af4b..439e92700 100644 --- a/examples/readers/src/notion.ts +++ b/examples/readers/src/notion.ts @@ -7,7 +7,7 @@ import { createInterface } from "node:readline/promises"; program .argument("[page]", "Notion page id (must be provided)") - .action(async (page, _options, command) => { + .action(async (page, _options) => { // Initializing a client if (!process.env.NOTION_TOKEN) { @@ -55,7 +55,7 @@ program .filter((page) => page !== null); console.log("Found pages:"); console.table(pages); - console.log(`To run, run ts-node ${command.name()} [page id]`); + console.log(`To run, run with [page id]`); return; } } diff --git a/packages/core/package.json b/packages/core/package.json index 18409fbab..a38c95727 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -12,7 +12,6 @@ "@llamaindex/cloud": "0.0.5", "@llamaindex/env": "workspace:*", "@mistralai/mistralai": "^0.1.3", - "@notionhq/client": "^2.2.15", "@pinecone-database/pinecone": "^2.2.0", "@qdrant/js-client-rest": "^1.8.2", "@types/lodash": "^4.17.0", @@ -31,7 +30,7 @@ "mammoth": "^1.7.1", "md-utils-ts": "^2.0.0", "mongodb": "^6.5.0", - "notion-md-crawler": "^0.0.2", + "notion-md-crawler": "^1.0.0", "ollama": "^0.5.0", "openai": "^4.38.0", "papaparse": "^5.4.1", @@ -45,7 +44,11 @@ "wikipedia": "^2.1.2", "wink-nlp": "^1.14.3" }, + "peerDependencies": { + "@notionhq/client": "^2.2.15" + }, "devDependencies": { + "@notionhq/client": "^2.2.15", "@swc/cli": "^0.3.12", "@swc/core": "^1.4.16", "concurrently": "^8.2.2", diff --git a/packages/core/src/readers/NotionReader.ts b/packages/core/src/readers/NotionReader.ts index bbac5d08f..4c645e7c2 100644 --- a/packages/core/src/readers/NotionReader.ts +++ b/packages/core/src/readers/NotionReader.ts @@ -1,21 +1,9 @@ -import type { Client } from "@notionhq/client"; -import type { Crawler, Pages } from "notion-md-crawler"; +import type { Crawler, CrawlerOptions, Page } from "notion-md-crawler"; import { crawler, pageToString } from "notion-md-crawler"; import { Document } from "../Node.js"; import type { BaseReader } from "./type.js"; -type OptionalSerializers = Parameters<Crawler>[number]["serializers"]; - -/** - * Options for initializing the NotionReader class - * @typedef {Object} NotionReaderOptions - * @property {Client} client - The Notion Client object for API interactions - * @property {OptionalSerializers} [serializers] - Option to customize serialization. See [the url](https://github.com/TomPenguin/notion-md-crawler/tree/main) for details. - */ -type NotionReaderOptions = { - client: Client; - serializers?: OptionalSerializers; -}; +type NotionReaderOptions = Pick<CrawlerOptions, "client" | "serializers">; /** * Notion pages are retrieved recursively and converted to Document objects. @@ -25,7 +13,7 @@ type NotionReaderOptions = { * Please refer to [this document](https://www.notion.so/help/create-integrations-with-the-notion-api) for details. */ export class NotionReader implements BaseReader { - private crawl: ReturnType<Crawler>; + private readonly crawl: ReturnType<Crawler>; /** * Constructor for the NotionReader class @@ -37,10 +25,10 @@ export class NotionReader implements BaseReader { /** * Converts Pages to an array of Document objects - * @param {Pages} pages - The Notion pages to convert (Return value of `loadPages`) + * @param {Page} pages - The Notion pages to convert (Return value of `loadPages`) * @returns {Document[]} An array of Document objects */ - toDocuments(pages: Pages): Document[] { + toDocuments(pages: Page[]): Document[] { return Object.values(pages).map((page) => { const text = pageToString(page); return new Document({ @@ -54,10 +42,21 @@ export class NotionReader implements BaseReader { /** * Loads recursively the Notion page with the specified root page ID. * @param {string} rootPageId - The root Notion page ID - * @returns {Promise<Pages>} A Promise that resolves to a Pages object(Convertible with the `toDocuments` method) + * @returns {Promise<Page[]>} A Promise that resolves to a Pages object(Convertible with the `toDocuments` method) */ - async loadPages(rootPageId: string): Promise<Pages> { - return this.crawl(rootPageId); + async loadPages(rootPageId: string): Promise<Page[]> { + const iter = this.crawl(rootPageId); + const pages: Page[] = []; + for await (const result of iter) { + if (result.success) { + pages.push(result.page); + } else { + console.error( + `Failed to load page (${result.failure.parentId}): ${result.failure.reason}`, + ); + } + } + return pages; } /** diff --git a/packages/edge/package.json b/packages/edge/package.json index 274dc56e8..2915237c9 100644 --- a/packages/edge/package.json +++ b/packages/edge/package.json @@ -11,7 +11,6 @@ "@llamaindex/cloud": "0.0.5", "@llamaindex/env": "workspace:*", "@mistralai/mistralai": "^0.1.3", - "@notionhq/client": "^2.2.15", "@pinecone-database/pinecone": "^2.2.0", "@qdrant/js-client-rest": "^1.8.2", "@types/lodash": "^4.17.0", @@ -30,7 +29,7 @@ "mammoth": "^1.7.1", "md-utils-ts": "^2.0.0", "mongodb": "^6.5.0", - "notion-md-crawler": "^0.0.2", + "notion-md-crawler": "^1.0.0", "ollama": "^0.5.0", "openai": "^4.38.0", "papaparse": "^5.4.1", @@ -82,5 +81,17 @@ "update:deps": "node scripts/update-deps.js", "build:core": "pnpm --filter llamaindex build && cp -r ../core/dist . && rm -rf dist/cjs", "build": "pnpm run update:deps && pnpm run build:core && pnpm copy" + }, + "devDependencies": { + "@notionhq/client": "^2.2.15", + "@swc/cli": "^0.3.12", + "@swc/core": "^1.4.16", + "concurrently": "^8.2.2", + "glob": "^10.3.12", + "madge": "^7.0.0", + "typescript": "^5.4.5" + }, + "peerDependencies": { + "@notionhq/client": "^2.2.15" } } diff --git a/packages/edge/scripts/update-deps.js b/packages/edge/scripts/update-deps.js index f8c913d52..49d87278a 100644 --- a/packages/edge/scripts/update-deps.js +++ b/packages/edge/scripts/update-deps.js @@ -10,6 +10,8 @@ const edgePackagePath = path.join(process.cwd(), "package.json"); const edgePackage = readJson(edgePackagePath); const corePackage = readJson(corePackagePath); edgePackage.dependencies = corePackage.dependencies; +edgePackage.devDependencies = corePackage.devDependencies; +edgePackage.peerDependencies = corePackage.peerDependencies; edgePackage.version = corePackage.version; writeJson(edgePackagePath, edgePackage); execSync("pnpm install --lockfile-only", { stdio: "inherit" }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a0a8004b2..b5b611bc0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -178,12 +178,12 @@ importers: '@types/node': specifier: ^20.12.7 version: 20.12.7 - ts-node: - specifier: ^10.9.2 - version: 10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.3) + tsx: + specifier: ^4.7.2 + version: 4.7.2 typescript: - specifier: ^5.4.3 - version: 5.4.3 + specifier: ^5.4.5 + version: 5.4.5 packages/core: dependencies: @@ -208,9 +208,6 @@ importers: '@mistralai/mistralai': specifier: ^0.1.3 version: 0.1.3(encoding@0.1.13) - '@notionhq/client': - specifier: ^2.2.15 - version: 2.2.15(encoding@0.1.13) '@pinecone-database/pinecone': specifier: ^2.2.0 version: 2.2.0 @@ -266,8 +263,8 @@ importers: specifier: ^6.5.0 version: 6.5.0 notion-md-crawler: - specifier: ^0.0.2 - version: 0.0.2(encoding@0.1.13) + specifier: ^1.0.0 + version: 1.0.0(encoding@0.1.13) ollama: specifier: ^0.5.0 version: 0.5.0 @@ -305,6 +302,9 @@ importers: specifier: ^1.14.3 version: 1.14.3 devDependencies: + '@notionhq/client': + specifier: ^2.2.15 + version: 2.2.15(encoding@0.1.13) '@swc/cli': specifier: ^0.3.12 version: 0.3.12(@swc/core@1.4.16(@swc/helpers@0.5.2))(chokidar@3.6.0) @@ -371,9 +371,6 @@ importers: '@mistralai/mistralai': specifier: ^0.1.3 version: 0.1.3(encoding@0.1.13) - '@notionhq/client': - specifier: ^2.2.15 - version: 2.2.15(encoding@0.1.13) '@pinecone-database/pinecone': specifier: ^2.2.0 version: 2.2.0 @@ -429,8 +426,8 @@ importers: specifier: ^6.5.0 version: 6.5.0 notion-md-crawler: - specifier: ^0.0.2 - version: 0.0.2(encoding@0.1.13) + specifier: ^1.0.0 + version: 1.0.0(encoding@0.1.13) ollama: specifier: ^0.5.0 version: 0.5.0 @@ -467,6 +464,28 @@ importers: wink-nlp: specifier: ^1.14.3 version: 1.14.3 + devDependencies: + '@notionhq/client': + specifier: ^2.2.15 + version: 2.2.15(encoding@0.1.13) + '@swc/cli': + specifier: ^0.3.12 + version: 0.3.12(@swc/core@1.4.16(@swc/helpers@0.5.2))(chokidar@3.6.0) + '@swc/core': + specifier: ^1.4.16 + version: 1.4.16(@swc/helpers@0.5.2) + concurrently: + specifier: ^8.2.2 + version: 8.2.2 + glob: + specifier: ^10.3.12 + version: 10.3.12 + madge: + specifier: ^7.0.0 + version: 7.0.0(typescript@5.4.5) + typescript: + specifier: ^5.4.5 + version: 5.4.5 packages/edge/e2e/test-edge-runtime: dependencies: @@ -6125,8 +6144,8 @@ packages: resolution: {integrity: sha512-IO9QvjUMWxPQQhs60oOu10CRkWCiZzSUkzbXGGV9pviYl1fXYcvkzQ5jV9z8Y6un8ARoVRl4EtC6v6jNqbaJ/w==} engines: {node: '>=14.16'} - notion-md-crawler@0.0.2: - resolution: {integrity: sha512-lE3/DFMrg7GSbl1sBfDuLVLyxw+yjdarPVm1JGfQ6eONEbNGgO+BdZxpwwZQ1uYeEJurAXMXb/AXT8GKYjKAyg==} + notion-md-crawler@1.0.0: + resolution: {integrity: sha512-mdB6zn/i32qO2C7X7wZLDpWvFryO3bPYMuBfFgmTPomnfEtIejdQJNVaZzw2GapM82lfWZ5dfsZp3s3UL4p1Fg==} npm-run-path@2.0.2: resolution: {integrity: sha512-lJxZYlT4DW/bRUtFh1MQIWqmLwQfAxnqWG4HhEdjMlkrJYnJn0Jrr2u3mgxqaWsdiBc76TYkTG/mhrnYTuzfHw==} @@ -15765,7 +15784,7 @@ snapshots: normalize-url@8.0.1: {} - notion-md-crawler@0.0.2(encoding@0.1.13): + notion-md-crawler@1.0.0(encoding@0.1.13): dependencies: '@notionhq/client': 2.2.15(encoding@0.1.13) md-utils-ts: 2.0.0 @@ -17780,26 +17799,6 @@ snapshots: ts-graphviz@1.8.2: {} - ts-node@10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.3): - dependencies: - '@cspotcode/source-map-support': 0.8.1 - '@tsconfig/node10': 1.0.9 - '@tsconfig/node12': 1.0.11 - '@tsconfig/node14': 1.0.3 - '@tsconfig/node16': 1.0.4 - '@types/node': 20.12.7 - acorn: 8.11.3 - acorn-walk: 8.3.2 - arg: 4.1.3 - create-require: 1.1.1 - diff: 4.0.2 - make-error: 1.3.6 - typescript: 5.4.3 - v8-compile-cache-lib: 3.0.1 - yn: 3.1.1 - optionalDependencies: - '@swc/core': 1.4.16(@swc/helpers@0.5.2) - ts-node@10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.5): dependencies: '@cspotcode/source-map-support': 0.8.1 -- GitLab