From b757d9a94ed9e20241bf4f64f4bf9b806a2afb15 Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Mon, 29 Jan 2024 22:05:23 -0600 Subject: [PATCH] chore: split readers into different files (#479) --- .github/workflows/test.yml | 29 ++++++- examples/chatEngine.ts | 2 - examples/gptllama.ts | 2 - examples/package.json | 3 +- examples/readers/load-assemblyai.ts | 9 +- examples/readers/load-csv.ts | 2 +- examples/readers/load-docx.ts | 3 +- examples/readers/load-html.ts | 3 +- examples/readers/load-md.ts | 3 +- examples/readers/load-notion.ts | 5 +- examples/readers/load-pdf.ts | 3 +- packages/core/package.json | 52 +++++++++++- packages/core/src/ingestion/IngestionCache.ts | 3 +- pnpm-lock.yaml | 83 ++++++++++--------- 14 files changed, 138 insertions(+), 64 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7e03e5c97..7abfada37 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,10 +32,35 @@ jobs: - name: Install dependencies run: pnpm install - name: Build - run: pnpm run build - working-directory: ./packages/core + run: pnpm run build --filter llamaindex - name: Run Type Check run: pnpm run type-check - name: Run Circular Dependency Check run: pnpm run circular-check working-directory: ./packages/core + typecheck-examples: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: pnpm/action-setup@v2 + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version-file: ".nvmrc" + cache: "pnpm" + - name: Install dependencies + run: pnpm install + - name: Build + run: pnpm run build --filter llamaindex + - name: Copy examples + run: rsync -rv --exclude=node_modules ./examples ${{ runner.temp }} + - name: Pack + run: pnpm pack --pack-destination ${{ runner.temp }} + working-directory: packages/core + - name: Install llamaindex + run: npm add ${{ runner.temp }}/*.tgz + working-directory: ${{ runner.temp }}/examples + - name: Run Type Check + run: npx tsc --project ./tsconfig.json + working-directory: ${{ runner.temp }}/examples diff --git a/examples/chatEngine.ts b/examples/chatEngine.ts index 52538944b..90152c88c 100644 --- a/examples/chatEngine.ts +++ b/examples/chatEngine.ts @@ -1,6 +1,4 @@ import { stdin as input, stdout as output } from "node:process"; -// readline/promises is still experimental so not in @types/node yet -// @ts-ignore import readline from "node:readline/promises"; import { diff --git a/examples/gptllama.ts b/examples/gptllama.ts index 0de35b789..71ff0ecfa 100644 --- a/examples/gptllama.ts +++ b/examples/gptllama.ts @@ -1,6 +1,4 @@ import { stdin as input, stdout as output } from "node:process"; -// readline/promises is still experimental so not in @types/node yet -// @ts-ignore import readline from "node:readline/promises"; import { ChatMessage, LlamaDeuce, OpenAI } from "llamaindex"; diff --git a/examples/package.json b/examples/package.json index 68a7c002d..f06c907b3 100644 --- a/examples/package.json +++ b/examples/package.json @@ -14,7 +14,8 @@ }, "devDependencies": { "@types/node": "^18.19.10", - "ts-node": "^10.9.2" + "ts-node": "^10.9.2", + "typescript": "^5.3.3" }, "scripts": { "lint": "eslint ." diff --git a/examples/readers/load-assemblyai.ts b/examples/readers/load-assemblyai.ts index 42c944059..63e07f294 100644 --- a/examples/readers/load-assemblyai.ts +++ b/examples/readers/load-assemblyai.ts @@ -1,12 +1,7 @@ import { program } from "commander"; -import { - AudioTranscriptReader, - TranscribeParams, - VectorStoreIndex, -} from "llamaindex"; +import { TranscribeParams, VectorStoreIndex } from "llamaindex"; +import { AudioTranscriptReader } from "llamaindex/readers/AssemblyAIReader"; import { stdin as input, stdout as output } from "node:process"; -// readline/promises is still experimental so not in @types/node yet -// @ts-ignore import readline from "node:readline/promises"; program diff --git a/examples/readers/load-csv.ts b/examples/readers/load-csv.ts index d16945197..ceec654f6 100644 --- a/examples/readers/load-csv.ts +++ b/examples/readers/load-csv.ts @@ -1,11 +1,11 @@ import { CompactAndRefine, OpenAI, - PapaCSVReader, ResponseSynthesizer, serviceContextFromDefaults, VectorStoreIndex, } from "llamaindex"; +import { PapaCSVReader } from "llamaindex/readers/CSVReader"; async function main() { // Load CSV diff --git a/examples/readers/load-docx.ts b/examples/readers/load-docx.ts index 459dad3ef..3c0b1b138 100644 --- a/examples/readers/load-docx.ts +++ b/examples/readers/load-docx.ts @@ -1,4 +1,5 @@ -import { DocxReader, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; +import { DocxReader } from "llamaindex/readers/DocxReader"; const FILE_PATH = "./data/stars.docx"; const SAMPLE_QUERY = "Information about Zodiac"; diff --git a/examples/readers/load-html.ts b/examples/readers/load-html.ts index 87ea89ec6..065bdbed8 100644 --- a/examples/readers/load-html.ts +++ b/examples/readers/load-html.ts @@ -1,4 +1,5 @@ -import { HTMLReader, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; +import { HTMLReader } from "llamaindex/readers/HTMLReader"; async function main() { // Load page diff --git a/examples/readers/load-md.ts b/examples/readers/load-md.ts index 5e6e300af..4ef21ca45 100644 --- a/examples/readers/load-md.ts +++ b/examples/readers/load-md.ts @@ -1,4 +1,5 @@ -import { MarkdownReader, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; +import { MarkdownReader } from "llamaindex/readers/MarkdownReader"; const FILE_PATH = "./data/planets.md"; const SAMPLE_QUERY = "List all planets"; diff --git a/examples/readers/load-notion.ts b/examples/readers/load-notion.ts index 04f1651ef..7a2394030 100644 --- a/examples/readers/load-notion.ts +++ b/examples/readers/load-notion.ts @@ -1,9 +1,8 @@ import { Client } from "@notionhq/client"; import { program } from "commander"; -import { NotionReader, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; +import { NotionReader } from "llamaindex/readers/NotionReader"; import { stdin as input, stdout as output } from "node:process"; -// readline/promises is still experimental so not in @types/node yet -// @ts-ignore import readline from "node:readline/promises"; program diff --git a/examples/readers/load-pdf.ts b/examples/readers/load-pdf.ts index 5721220fa..1552e9847 100644 --- a/examples/readers/load-pdf.ts +++ b/examples/readers/load-pdf.ts @@ -1,4 +1,5 @@ -import { PDFReader, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; +import { PDFReader } from "llamaindex/readers/PDFReader"; import { resolve } from "node:path"; async function main() { diff --git a/packages/core/package.json b/packages/core/package.json index 0efd5623d..2cb3f7b5c 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -40,7 +40,7 @@ "@types/node": "^18.19.10", "@types/papaparse": "^5.3.14", "@types/pg": "^8.11.0", - "bunchee": "^4.4.2", + "bunchee": "^4.4.3", "edit-json-file": "^1.8.0", "madge": "^6.1.0", "typescript": "^5.3.3" @@ -62,6 +62,56 @@ "import": "./dist/env.mjs", "edge-light": "./dist/env.edge-light.mjs", "require": "./dist/env.js" + }, + "./readers/AssemblyAIReader": { + "types": "./dist/readers/AssemblyAIReader.d.mts", + "import": "./dist/readers/AssemblyAIReader.mjs", + "require": "./dist/readers/AssemblyAIReader.js" + }, + "./readers/CSVReader": { + "types": "./dist/readers/CSVReader.d.mts", + "import": "./dist/readers/CSVReader.mjs", + "require": "./dist/readers/CSVReader.js" + }, + "./readers/DocxReader": { + "types": "./dist/readers/DocxReader.d.mts", + "import": "./dist/readers/DocxReader.mjs", + "require": "./dist/readers/DocxReader.js" + }, + "./readers/HTMLReader": { + "types": "./dist/readers/HTMLReader.d.mts", + "import": "./dist/readers/HTMLReader.mjs", + "require": "./dist/readers/HTMLReader.js" + }, + "./readers/ImageReader": { + "types": "./dist/readers/ImageReader.d.mts", + "import": "./dist/readers/ImageReader.mjs", + "require": "./dist/readers/ImageReader.js" + }, + "./readers/MarkdownReader": { + "types": "./dist/readers/MarkdownReader.d.mts", + "import": "./dist/readers/MarkdownReader.mjs", + "require": "./dist/readers/MarkdownReader.js" + }, + "./readers/NotionReader": { + "types": "./dist/readers/NotionReader.d.mts", + "import": "./dist/readers/NotionReader.mjs", + "require": "./dist/readers/NotionReader.js" + }, + "./readers/PDFReader": { + "types": "./dist/readers/PDFReader.d.mts", + "import": "./dist/readers/PDFReader.mjs", + "require": "./dist/readers/PDFReader.js" + }, + "./readers/SimpleDirectoryReader": { + "types": "./dist/readers/SimpleDirectoryReader.d.mts", + "import": "./dist/readers/SimpleDirectoryReader.mjs", + "require": "./dist/readers/SimpleDirectoryReader.js" + }, + "./readers/SimpleMongoReader": { + "types": "./dist/readers/SimpleMongoReader.d.mts", + "import": "./dist/readers/SimpleMongoReader.mjs", + "require": "./dist/readers/SimpleMongoReader.js" } }, "files": [ diff --git a/packages/core/src/ingestion/IngestionCache.ts b/packages/core/src/ingestion/IngestionCache.ts index e88906f8b..929f3a5d4 100644 --- a/packages/core/src/ingestion/IngestionCache.ts +++ b/packages/core/src/ingestion/IngestionCache.ts @@ -1,7 +1,8 @@ import { BaseNode, MetadataMode } from "../Node"; import { createSHA256 } from "../env"; -import { BaseKVStore, SimpleKVStore } from "../storage"; import { docToJson, jsonToDoc } from "../storage/docStore/utils"; +import { SimpleKVStore } from "../storage/kvStore/SimpleKVStore"; +import { BaseKVStore } from "../storage/kvStore/types"; import { TransformComponent } from "./types"; export function getTransformationHash( diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8999d7fa5..de25f40bb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -146,6 +146,9 @@ importers: ts-node: specifier: ^10.9.2 version: 10.9.2(@types/node@18.19.10)(typescript@5.3.3) + typescript: + specifier: ^5.3.3 + version: 5.3.3 packages/core: dependencies: @@ -253,8 +256,8 @@ importers: specifier: ^8.11.0 version: 8.11.0 bunchee: - specifier: ^4.4.2 - version: 4.4.2(typescript@5.3.3) + specifier: ^4.4.3 + version: 4.4.3(typescript@5.3.3) edit-json-file: specifier: ^1.8.0 version: 1.8.0 @@ -3967,8 +3970,8 @@ packages: transitivePeerDependencies: - supports-color - /@swc/core-darwin-arm64@1.3.102: - resolution: {integrity: sha512-CJDxA5Wd2cUMULj3bjx4GEoiYyyiyL8oIOu4Nhrs9X+tlg8DnkCm4nI57RJGP8Mf6BaXPIJkHX8yjcefK2RlDA==} + /@swc/core-darwin-arm64@1.3.107: + resolution: {integrity: sha512-47tD/5vSXWxPd0j/ZllyQUg4bqalbQTsmqSw0J4dDdS82MWqCAwUErUrAZPRjBkjNQ6Kmrf5rpCWaGTtPw+ngw==} engines: {node: '>=10'} cpu: [arm64] os: [darwin] @@ -3976,8 +3979,8 @@ packages: dev: true optional: true - /@swc/core-darwin-x64@1.3.102: - resolution: {integrity: sha512-X5akDkHwk6oAer49oER0qZMjNMkLH3IOZaV1m98uXIasAGyjo5WH1MKPeMLY1sY6V6TrufzwiSwD4ds571ytcg==} + /@swc/core-darwin-x64@1.3.107: + resolution: {integrity: sha512-hwiLJ2ulNkBGAh1m1eTfeY1417OAYbRGcb/iGsJ+LuVLvKAhU/itzsl535CvcwAlt2LayeCFfcI8gdeOLeZa9A==} engines: {node: '>=10'} cpu: [x64] os: [darwin] @@ -3985,8 +3988,8 @@ packages: dev: true optional: true - /@swc/core-linux-arm-gnueabihf@1.3.102: - resolution: {integrity: sha512-kJH3XtZP9YQdjq/wYVBeFuiVQl4HaC4WwRrIxAHwe2OyvrwUI43dpW3LpxSggBnxXcVCXYWf36sTnv8S75o2Gw==} + /@swc/core-linux-arm-gnueabihf@1.3.107: + resolution: {integrity: sha512-I2wzcC0KXqh0OwymCmYwNRgZ9nxX7DWnOOStJXV3pS0uB83TXAkmqd7wvMBuIl9qu4Hfomi9aDM7IlEEn9tumQ==} engines: {node: '>=10'} cpu: [arm] os: [linux] @@ -3994,8 +3997,8 @@ packages: dev: true optional: true - /@swc/core-linux-arm64-gnu@1.3.102: - resolution: {integrity: sha512-flQP2WDyCgO24WmKA1wjjTx+xfCmavUete2Kp6yrM+631IHLGnr17eu7rYJ/d4EnDBId/ytMyrnWbTVkaVrpbQ==} + /@swc/core-linux-arm64-gnu@1.3.107: + resolution: {integrity: sha512-HWgnn7JORYlOYnGsdunpSF8A+BCZKPLzLtEUA27/M/ZuANcMZabKL9Zurt7XQXq888uJFAt98Gy+59PU90aHKg==} engines: {node: '>=10'} cpu: [arm64] os: [linux] @@ -4003,8 +4006,8 @@ packages: dev: true optional: true - /@swc/core-linux-arm64-musl@1.3.102: - resolution: {integrity: sha512-bQEQSnC44DyoIGLw1+fNXKVGoCHi7eJOHr8BdH0y1ooy9ArskMjwobBFae3GX4T1AfnrTaejyr0FvLYIb0Zkog==} + /@swc/core-linux-arm64-musl@1.3.107: + resolution: {integrity: sha512-vfPF74cWfAm8hyhS8yvYI94ucMHIo8xIYU+oFOW9uvDlGQRgnUf/6DEVbLyt/3yfX5723Ln57U8uiMALbX5Pyw==} engines: {node: '>=10'} cpu: [arm64] os: [linux] @@ -4012,8 +4015,8 @@ packages: dev: true optional: true - /@swc/core-linux-x64-gnu@1.3.102: - resolution: {integrity: sha512-dFvnhpI478svQSxqISMt00MKTDS0e4YtIr+ioZDG/uJ/q+RpcNy3QI2KMm05Fsc8Y0d4krVtvCKWgfUMsJZXAg==} + /@swc/core-linux-x64-gnu@1.3.107: + resolution: {integrity: sha512-uBVNhIg0ip8rH9OnOsCARUFZ3Mq3tbPHxtmWk9uAa5u8jQwGWeBx5+nTHpDOVd3YxKb6+5xDEI/edeeLpha/9g==} engines: {node: '>=10'} cpu: [x64] os: [linux] @@ -4021,8 +4024,8 @@ packages: dev: true optional: true - /@swc/core-linux-x64-musl@1.3.102: - resolution: {integrity: sha512-+a0M3CvjeIRNA/jTCzWEDh2V+mhKGvLreHOL7J97oULZy5yg4gf7h8lQX9J8t9QLbf6fsk+0F8bVH1Ie/PbXjA==} + /@swc/core-linux-x64-musl@1.3.107: + resolution: {integrity: sha512-mvACkUvzSIB12q1H5JtabWATbk3AG+pQgXEN95AmEX2ZA5gbP9+B+mijsg7Sd/3tboHr7ZHLz/q3SHTvdFJrEw==} engines: {node: '>=10'} cpu: [x64] os: [linux] @@ -4030,8 +4033,8 @@ packages: dev: true optional: true - /@swc/core-win32-arm64-msvc@1.3.102: - resolution: {integrity: sha512-w76JWLjkZNOfkB25nqdWUNCbt0zJ41CnWrJPZ+LxEai3zAnb2YtgB/cCIrwxDebRuMgE9EJXRj7gDDaTEAMOOQ==} + /@swc/core-win32-arm64-msvc@1.3.107: + resolution: {integrity: sha512-J3P14Ngy/1qtapzbguEH41kY109t6DFxfbK4Ntz9dOWNuVY3o9/RTB841ctnJk0ZHEG+BjfCJjsD2n8H5HcaOA==} engines: {node: '>=10'} cpu: [arm64] os: [win32] @@ -4039,8 +4042,8 @@ packages: dev: true optional: true - /@swc/core-win32-ia32-msvc@1.3.102: - resolution: {integrity: sha512-vlDb09HiGqKwz+2cxDS9T5/461ipUQBplvuhW+cCbzzGuPq8lll2xeyZU0N1E4Sz3MVdSPx1tJREuRvlQjrwNg==} + /@swc/core-win32-ia32-msvc@1.3.107: + resolution: {integrity: sha512-ZBUtgyjTHlz8TPJh7kfwwwFma+ktr6OccB1oXC8fMSopD0AxVnQasgun3l3099wIsAB9eEsJDQ/3lDkOLs1gBA==} engines: {node: '>=10'} cpu: [ia32] os: [win32] @@ -4048,8 +4051,8 @@ packages: dev: true optional: true - /@swc/core-win32-x64-msvc@1.3.102: - resolution: {integrity: sha512-E/jfSD7sShllxBwwgDPeXp1UxvIqehj/ShSUqq1pjR/IDRXngcRSXKJK92mJkNFY7suH6BcCWwzrxZgkO7sWmw==} + /@swc/core-win32-x64-msvc@1.3.107: + resolution: {integrity: sha512-Eyzo2XRqWOxqhE1gk9h7LWmUf4Bp4Xn2Ttb0ayAXFp6YSTxQIThXcT9kipXZqcpxcmDwoq8iWbbf2P8XL743EA==} engines: {node: '>=10'} cpu: [x64] os: [win32] @@ -4057,8 +4060,8 @@ packages: dev: true optional: true - /@swc/core@1.3.102(@swc/helpers@0.5.3): - resolution: {integrity: sha512-OAjNLY/f6QWKSDzaM3bk31A+OYHu6cPa9P/rFIx8X5d24tHXUpRiiq6/PYI6SQRjUPlB72GjsjoEU8F+ALadHg==} + /@swc/core@1.3.107(@swc/helpers@0.5.3): + resolution: {integrity: sha512-zKhqDyFcTsyLIYK1iEmavljZnf4CCor5pF52UzLAz4B6Nu/4GLU+2LQVAf+oRHjusG39PTPjd2AlRT3f3QWfsQ==} engines: {node: '>=10'} requiresBuild: true peerDependencies: @@ -4071,16 +4074,16 @@ packages: '@swc/helpers': 0.5.3 '@swc/types': 0.1.5 optionalDependencies: - '@swc/core-darwin-arm64': 1.3.102 - '@swc/core-darwin-x64': 1.3.102 - '@swc/core-linux-arm-gnueabihf': 1.3.102 - '@swc/core-linux-arm64-gnu': 1.3.102 - '@swc/core-linux-arm64-musl': 1.3.102 - '@swc/core-linux-x64-gnu': 1.3.102 - '@swc/core-linux-x64-musl': 1.3.102 - '@swc/core-win32-arm64-msvc': 1.3.102 - '@swc/core-win32-ia32-msvc': 1.3.102 - '@swc/core-win32-x64-msvc': 1.3.102 + '@swc/core-darwin-arm64': 1.3.107 + '@swc/core-darwin-x64': 1.3.107 + '@swc/core-linux-arm-gnueabihf': 1.3.107 + '@swc/core-linux-arm64-gnu': 1.3.107 + '@swc/core-linux-arm64-musl': 1.3.107 + '@swc/core-linux-x64-gnu': 1.3.107 + '@swc/core-linux-x64-musl': 1.3.107 + '@swc/core-win32-arm64-msvc': 1.3.107 + '@swc/core-win32-ia32-msvc': 1.3.107 + '@swc/core-win32-x64-msvc': 1.3.107 dev: true /@swc/counter@0.1.2: @@ -5676,8 +5679,8 @@ packages: semver: 7.5.4 dev: true - /bunchee@4.4.2(typescript@5.3.3): - resolution: {integrity: sha512-AFMGGA2wESumdeMUOE3nOkZJPOQOyy3X+psMcyKDzzg3PF7V8CvG3Ozkq6W6nkPvev0X+JNY3iTOf6ED6buVZw==} + /bunchee@4.4.3(typescript@5.3.3): + resolution: {integrity: sha512-N67vxbBzdBOpbgwQtynvfKfM5mlvNw+f0IYFkrM0AL9/GFw/Zl6YB+4A/ph71A2OUvTLjWf4IRJIlQbBeR2xsg==} engines: {node: '>= 18.0.0'} hasBin: true peerDependencies: @@ -5695,7 +5698,7 @@ packages: '@rollup/plugin-replace': 5.0.5(rollup@4.9.5) '@rollup/plugin-wasm': 6.2.2(rollup@4.9.5) '@rollup/pluginutils': 5.1.0(rollup@4.9.5) - '@swc/core': 1.3.102(@swc/helpers@0.5.3) + '@swc/core': 1.3.107(@swc/helpers@0.5.3) '@swc/helpers': 0.5.3 arg: 5.0.2 clean-css: 5.3.3 @@ -5703,7 +5706,7 @@ packages: rimraf: 5.0.5 rollup: 4.9.5 rollup-plugin-dts: 6.1.0(rollup@4.9.5)(typescript@5.3.3) - rollup-plugin-swc3: 0.11.0(@swc/core@1.3.102)(rollup@4.9.5) + rollup-plugin-swc3: 0.11.0(@swc/core@1.3.107)(rollup@4.9.5) rollup-preserve-directives: 1.1.1(rollup@4.9.5) tslib: 2.6.2 typescript: 5.3.3 @@ -14016,7 +14019,7 @@ packages: '@babel/code-frame': 7.23.5 dev: true - /rollup-plugin-swc3@0.11.0(@swc/core@1.3.102)(rollup@4.9.5): + /rollup-plugin-swc3@0.11.0(@swc/core@1.3.107)(rollup@4.9.5): resolution: {integrity: sha512-luB9Ngb1YieWPpJttKvkmjN3lG5l28SmASLbf2CoScUB2+EImU0bE8wX4EYKEqv5clVulhWRQHQvE+H33X/03g==} engines: {node: '>=12'} peerDependencies: @@ -14025,7 +14028,7 @@ packages: dependencies: '@fastify/deepmerge': 1.3.0 '@rollup/pluginutils': 5.1.0(rollup@4.9.5) - '@swc/core': 1.3.102(@swc/helpers@0.5.3) + '@swc/core': 1.3.107(@swc/helpers@0.5.3) get-tsconfig: 4.7.2 rollup: 4.9.5 rollup-preserve-directives: 1.1.1(rollup@4.9.5) -- GitLab