Skip to content
Snippets Groups Projects
Unverified Commit 20bc466c authored by Alex Yang's avatar Alex Yang Committed by GitHub
Browse files

chore: bump notion reader (#753)

parent efb1c56b
Branches
Tags
No related merge requests found
......@@ -3,20 +3,21 @@
"private": true,
"type": "module",
"scripts": {
"start": "node --loader ts-node/esm ./src/simple-directory-reader.ts",
"start:csv": "node --loader ts-node/esm ./src/csv.ts",
"start:docx": "node --loader ts-node/esm ./src/docx.ts",
"start:html": "node --loader ts-node/esm ./src/html.ts",
"start:markdown": "node --loader ts-node/esm ./src/markdown.ts",
"start:pdf": "node --loader ts-node/esm ./src/pdf.ts",
"start:llamaparse": "node --loader ts-node/esm ./src/llamaparse.ts"
"start": "node --import tsx ./src/simple-directory-reader.ts",
"start:csv": "node --import tsx ./src/csv.ts",
"start:docx": "node --import tsx ./src/docx.ts",
"start:html": "node --import tsx ./src/html.ts",
"start:markdown": "node --import tsx ./src/markdown.ts",
"start:pdf": "node --import tsx ./src/pdf.ts",
"start:llamaparse": "node --import tsx ./src/llamaparse.ts",
"start:notion": "node --import tsx ./src/notion.ts"
},
"dependencies": {
"llamaindex": "*"
},
"devDependencies": {
"@types/node": "^20.12.7",
"ts-node": "^10.9.2",
"typescript": "^5.4.3"
"tsx": "^4.7.2",
"typescript": "^5.4.5"
}
}
......@@ -7,7 +7,7 @@ import { createInterface } from "node:readline/promises";
program
.argument("[page]", "Notion page id (must be provided)")
.action(async (page, _options, command) => {
.action(async (page, _options) => {
// Initializing a client
if (!process.env.NOTION_TOKEN) {
......@@ -55,7 +55,7 @@ program
.filter((page) => page !== null);
console.log("Found pages:");
console.table(pages);
console.log(`To run, run ts-node ${command.name()} [page id]`);
console.log(`To run, run with [page id]`);
return;
}
}
......
......@@ -12,7 +12,6 @@
"@llamaindex/cloud": "0.0.5",
"@llamaindex/env": "workspace:*",
"@mistralai/mistralai": "^0.1.3",
"@notionhq/client": "^2.2.15",
"@pinecone-database/pinecone": "^2.2.0",
"@qdrant/js-client-rest": "^1.8.2",
"@types/lodash": "^4.17.0",
......@@ -31,7 +30,7 @@
"mammoth": "^1.7.1",
"md-utils-ts": "^2.0.0",
"mongodb": "^6.5.0",
"notion-md-crawler": "^0.0.2",
"notion-md-crawler": "^1.0.0",
"ollama": "^0.5.0",
"openai": "^4.38.0",
"papaparse": "^5.4.1",
......@@ -45,7 +44,11 @@
"wikipedia": "^2.1.2",
"wink-nlp": "^1.14.3"
},
"peerDependencies": {
"@notionhq/client": "^2.2.15"
},
"devDependencies": {
"@notionhq/client": "^2.2.15",
"@swc/cli": "^0.3.12",
"@swc/core": "^1.4.16",
"concurrently": "^8.2.2",
......
import type { Client } from "@notionhq/client";
import type { Crawler, Pages } from "notion-md-crawler";
import type { Crawler, CrawlerOptions, Page } from "notion-md-crawler";
import { crawler, pageToString } from "notion-md-crawler";
import { Document } from "../Node.js";
import type { BaseReader } from "./type.js";
type OptionalSerializers = Parameters<Crawler>[number]["serializers"];
/**
* Options for initializing the NotionReader class
* @typedef {Object} NotionReaderOptions
* @property {Client} client - The Notion Client object for API interactions
* @property {OptionalSerializers} [serializers] - Option to customize serialization. See [the url](https://github.com/TomPenguin/notion-md-crawler/tree/main) for details.
*/
type NotionReaderOptions = {
client: Client;
serializers?: OptionalSerializers;
};
type NotionReaderOptions = Pick<CrawlerOptions, "client" | "serializers">;
/**
* Notion pages are retrieved recursively and converted to Document objects.
......@@ -25,7 +13,7 @@ type NotionReaderOptions = {
* Please refer to [this document](https://www.notion.so/help/create-integrations-with-the-notion-api) for details.
*/
export class NotionReader implements BaseReader {
private crawl: ReturnType<Crawler>;
private readonly crawl: ReturnType<Crawler>;
/**
* Constructor for the NotionReader class
......@@ -37,10 +25,10 @@ export class NotionReader implements BaseReader {
/**
* Converts Pages to an array of Document objects
* @param {Pages} pages - The Notion pages to convert (Return value of `loadPages`)
* @param {Page} pages - The Notion pages to convert (Return value of `loadPages`)
* @returns {Document[]} An array of Document objects
*/
toDocuments(pages: Pages): Document[] {
toDocuments(pages: Page[]): Document[] {
return Object.values(pages).map((page) => {
const text = pageToString(page);
return new Document({
......@@ -54,10 +42,21 @@ export class NotionReader implements BaseReader {
/**
* Loads recursively the Notion page with the specified root page ID.
* @param {string} rootPageId - The root Notion page ID
* @returns {Promise<Pages>} A Promise that resolves to a Pages object(Convertible with the `toDocuments` method)
* @returns {Promise<Page[]>} A Promise that resolves to a Pages object(Convertible with the `toDocuments` method)
*/
async loadPages(rootPageId: string): Promise<Pages> {
return this.crawl(rootPageId);
async loadPages(rootPageId: string): Promise<Page[]> {
const iter = this.crawl(rootPageId);
const pages: Page[] = [];
for await (const result of iter) {
if (result.success) {
pages.push(result.page);
} else {
console.error(
`Failed to load page (${result.failure.parentId}): ${result.failure.reason}`,
);
}
}
return pages;
}
/**
......
......@@ -11,7 +11,6 @@
"@llamaindex/cloud": "0.0.5",
"@llamaindex/env": "workspace:*",
"@mistralai/mistralai": "^0.1.3",
"@notionhq/client": "^2.2.15",
"@pinecone-database/pinecone": "^2.2.0",
"@qdrant/js-client-rest": "^1.8.2",
"@types/lodash": "^4.17.0",
......@@ -30,7 +29,7 @@
"mammoth": "^1.7.1",
"md-utils-ts": "^2.0.0",
"mongodb": "^6.5.0",
"notion-md-crawler": "^0.0.2",
"notion-md-crawler": "^1.0.0",
"ollama": "^0.5.0",
"openai": "^4.38.0",
"papaparse": "^5.4.1",
......@@ -82,5 +81,17 @@
"update:deps": "node scripts/update-deps.js",
"build:core": "pnpm --filter llamaindex build && cp -r ../core/dist . && rm -rf dist/cjs",
"build": "pnpm run update:deps && pnpm run build:core && pnpm copy"
},
"devDependencies": {
"@notionhq/client": "^2.2.15",
"@swc/cli": "^0.3.12",
"@swc/core": "^1.4.16",
"concurrently": "^8.2.2",
"glob": "^10.3.12",
"madge": "^7.0.0",
"typescript": "^5.4.5"
},
"peerDependencies": {
"@notionhq/client": "^2.2.15"
}
}
......@@ -10,6 +10,8 @@ const edgePackagePath = path.join(process.cwd(), "package.json");
const edgePackage = readJson(edgePackagePath);
const corePackage = readJson(corePackagePath);
edgePackage.dependencies = corePackage.dependencies;
edgePackage.devDependencies = corePackage.devDependencies;
edgePackage.peerDependencies = corePackage.peerDependencies;
edgePackage.version = corePackage.version;
writeJson(edgePackagePath, edgePackage);
execSync("pnpm install --lockfile-only", { stdio: "inherit" });
......
......@@ -178,12 +178,12 @@ importers:
'@types/node':
specifier: ^20.12.7
version: 20.12.7
ts-node:
specifier: ^10.9.2
version: 10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.3)
tsx:
specifier: ^4.7.2
version: 4.7.2
typescript:
specifier: ^5.4.3
version: 5.4.3
specifier: ^5.4.5
version: 5.4.5
 
packages/core:
dependencies:
......@@ -208,9 +208,6 @@ importers:
'@mistralai/mistralai':
specifier: ^0.1.3
version: 0.1.3(encoding@0.1.13)
'@notionhq/client':
specifier: ^2.2.15
version: 2.2.15(encoding@0.1.13)
'@pinecone-database/pinecone':
specifier: ^2.2.0
version: 2.2.0
......@@ -266,8 +263,8 @@ importers:
specifier: ^6.5.0
version: 6.5.0
notion-md-crawler:
specifier: ^0.0.2
version: 0.0.2(encoding@0.1.13)
specifier: ^1.0.0
version: 1.0.0(encoding@0.1.13)
ollama:
specifier: ^0.5.0
version: 0.5.0
......@@ -305,6 +302,9 @@ importers:
specifier: ^1.14.3
version: 1.14.3
devDependencies:
'@notionhq/client':
specifier: ^2.2.15
version: 2.2.15(encoding@0.1.13)
'@swc/cli':
specifier: ^0.3.12
version: 0.3.12(@swc/core@1.4.16(@swc/helpers@0.5.2))(chokidar@3.6.0)
......@@ -371,9 +371,6 @@ importers:
'@mistralai/mistralai':
specifier: ^0.1.3
version: 0.1.3(encoding@0.1.13)
'@notionhq/client':
specifier: ^2.2.15
version: 2.2.15(encoding@0.1.13)
'@pinecone-database/pinecone':
specifier: ^2.2.0
version: 2.2.0
......@@ -429,8 +426,8 @@ importers:
specifier: ^6.5.0
version: 6.5.0
notion-md-crawler:
specifier: ^0.0.2
version: 0.0.2(encoding@0.1.13)
specifier: ^1.0.0
version: 1.0.0(encoding@0.1.13)
ollama:
specifier: ^0.5.0
version: 0.5.0
......@@ -467,6 +464,28 @@ importers:
wink-nlp:
specifier: ^1.14.3
version: 1.14.3
devDependencies:
'@notionhq/client':
specifier: ^2.2.15
version: 2.2.15(encoding@0.1.13)
'@swc/cli':
specifier: ^0.3.12
version: 0.3.12(@swc/core@1.4.16(@swc/helpers@0.5.2))(chokidar@3.6.0)
'@swc/core':
specifier: ^1.4.16
version: 1.4.16(@swc/helpers@0.5.2)
concurrently:
specifier: ^8.2.2
version: 8.2.2
glob:
specifier: ^10.3.12
version: 10.3.12
madge:
specifier: ^7.0.0
version: 7.0.0(typescript@5.4.5)
typescript:
specifier: ^5.4.5
version: 5.4.5
 
packages/edge/e2e/test-edge-runtime:
dependencies:
......@@ -6125,8 +6144,8 @@ packages:
resolution: {integrity: sha512-IO9QvjUMWxPQQhs60oOu10CRkWCiZzSUkzbXGGV9pviYl1fXYcvkzQ5jV9z8Y6un8ARoVRl4EtC6v6jNqbaJ/w==}
engines: {node: '>=14.16'}
 
notion-md-crawler@0.0.2:
resolution: {integrity: sha512-lE3/DFMrg7GSbl1sBfDuLVLyxw+yjdarPVm1JGfQ6eONEbNGgO+BdZxpwwZQ1uYeEJurAXMXb/AXT8GKYjKAyg==}
notion-md-crawler@1.0.0:
resolution: {integrity: sha512-mdB6zn/i32qO2C7X7wZLDpWvFryO3bPYMuBfFgmTPomnfEtIejdQJNVaZzw2GapM82lfWZ5dfsZp3s3UL4p1Fg==}
 
npm-run-path@2.0.2:
resolution: {integrity: sha512-lJxZYlT4DW/bRUtFh1MQIWqmLwQfAxnqWG4HhEdjMlkrJYnJn0Jrr2u3mgxqaWsdiBc76TYkTG/mhrnYTuzfHw==}
......@@ -15765,7 +15784,7 @@ snapshots:
 
normalize-url@8.0.1: {}
 
notion-md-crawler@0.0.2(encoding@0.1.13):
notion-md-crawler@1.0.0(encoding@0.1.13):
dependencies:
'@notionhq/client': 2.2.15(encoding@0.1.13)
md-utils-ts: 2.0.0
......@@ -17780,26 +17799,6 @@ snapshots:
 
ts-graphviz@1.8.2: {}
 
ts-node@10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.3):
dependencies:
'@cspotcode/source-map-support': 0.8.1
'@tsconfig/node10': 1.0.9
'@tsconfig/node12': 1.0.11
'@tsconfig/node14': 1.0.3
'@tsconfig/node16': 1.0.4
'@types/node': 20.12.7
acorn: 8.11.3
acorn-walk: 8.3.2
arg: 4.1.3
create-require: 1.1.1
diff: 4.0.2
make-error: 1.3.6
typescript: 5.4.3
v8-compile-cache-lib: 3.0.1
yn: 3.1.1
optionalDependencies:
'@swc/core': 1.4.16(@swc/helpers@0.5.2)
ts-node@10.9.2(@swc/core@1.4.16(@swc/helpers@0.5.2))(@types/node@20.12.7)(typescript@5.4.5):
dependencies:
'@cspotcode/source-map-support': 0.8.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment