From dc8be8740d67d5263c528aeecbd14ada76a94458 Mon Sep 17 00:00:00 2001 From: Louis de Courcel <einsenhorn@gmail.com> Date: Wed, 27 Sep 2023 01:18:11 +0200 Subject: [PATCH] impr: add a simple example to show pinecone query with prefilters --- apps/simple/package.json | 1 + apps/simple/vectorIndexFromVectorStore.ts | 197 ++++++++++++++++++++++ pnpm-lock.yaml | 110 ++++++++---- 3 files changed, 276 insertions(+), 32 deletions(-) create mode 100644 apps/simple/vectorIndexFromVectorStore.ts diff --git a/apps/simple/package.json b/apps/simple/package.json index adac6e02c..a40ced2bd 100644 --- a/apps/simple/package.json +++ b/apps/simple/package.json @@ -4,6 +4,7 @@ "name": "simple", "dependencies": { "@notionhq/client": "^2.2.12", + "@pinecone-database/pinecone": "^1.0.1", "commander": "^11.0.0", "llamaindex": "workspace:*" }, diff --git a/apps/simple/vectorIndexFromVectorStore.ts b/apps/simple/vectorIndexFromVectorStore.ts new file mode 100644 index 000000000..f61269ed2 --- /dev/null +++ b/apps/simple/vectorIndexFromVectorStore.ts @@ -0,0 +1,197 @@ +import { + OpenAI, + ResponseSynthesizer, + RetrieverQueryEngine, + serviceContextFromDefaults, + TextNode, + TreeSummarize, + VectorIndexRetriever, + VectorStore, + VectorStoreIndex, + VectorStoreQuery, + VectorStoreQueryResult, +} from "llamaindex"; + +import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone"; + +/** + * Please do not use this class in production; it's only for demonstration purposes. + */ +class PineconeVectorStore<T extends RecordMetadata = RecordMetadata> implements VectorStore { + storesText = true; + isEmbeddingQuery = false; + + indexName!: string; + pineconeClient!: Pinecone; + index!: Index<T>; + + constructor({ + indexName, + client, + }: { + indexName: string; + client: Pinecone; + }) { + this.indexName = indexName; + this.pineconeClient = client; + this.index = client.index<T>(indexName); + } + + client() { + return this.pineconeClient; + } + + async query( + query: VectorStoreQuery, + kwargs?: any, + ): Promise<VectorStoreQueryResult> { + let queryEmbedding: number[] = []; + if ( + query.queryEmbedding + ) { + if (typeof query.alpha === "number") { + const alpha = query.alpha; + queryEmbedding = query.queryEmbedding.map((v) => v * alpha); + } else { + queryEmbedding = query.queryEmbedding; + } + } + + // Current LlamaIndexTS implementation only support exact match filter, so we use kwargs instead. + const filter = kwargs?.filter || {}; + + const response = await this.index.query({ + filter, + vector: queryEmbedding, + topK: query.similarityTopK, + includeValues: true, + includeMetadata: true, + }); + + + console.log(`Numbers of vectors returned by Pinecone after preFilters are applied: ${response?.matches?.length || 0}.`); + + const topKIds: string[] = []; + const topKNodes: TextNode[] = []; + const topKScores: number[] = []; + + const metadataToNode = (metadata?: T): Partial<TextNode> => { + if (!metadata) { + throw new Error("metadata is undefined."); + } + + const nodeContent = metadata["_node_content"]; + if (!nodeContent) { + throw new Error("nodeContent is undefined."); + } + + if (typeof nodeContent !== "string") { + throw new Error("nodeContent is not a string."); + } + + return JSON.parse(nodeContent); + }; + + if (response.matches) { + for (const match of response.matches) { + const node = new TextNode({ + ...metadataToNode(match.metadata), + embedding: match.values, + }); + + topKIds.push(match.id); + topKNodes.push(node); + topKScores.push(match.score ?? 0); + } + } + + const result = { + ids: topKIds, + nodes: topKNodes, + similarities: topKScores, + }; + + return result; + } + + add(): Promise<string[]> { + return Promise.resolve([]); + } + + delete(): Promise<void> { + throw new Error("Method `delete` not implemented."); + } + + persist(): Promise<void> { + throw new Error("Method `persist` not implemented."); + } +} + +/** + * The goal of this example is to show how to use Pinecone as a vector store + * for LlamaIndexTS with(out) preFilters. + * + * It should not be used in production like that, + * as you might want to find a proper PineconeVectorStore implementation. + */ +async function main() { + process.env.PINECONE_API_KEY = 'Your Pinecone API Key.'; + process.env.PINECONE_ENVIRONMENT = 'Your Pinecone Environment.'; + process.env.PINECONE_PROJECT_ID = 'Your Pinecone Project ID.'; + process.env.PINECONE_INDEX_NAME = 'Your Pinecone Index Name.'; + process.env.OPENAI_API_KEY = 'Your OpenAI API Key.'; + process.env.OPENAI_API_ORGANISATION = 'Your OpenAI API Organisation.' + + const getPineconeVectorStore = async () => { + return new PineconeVectorStore({ + indexName: process.env.PINECONE_INDEX_NAME || 'index-name', + client: new Pinecone(), + }); + } + + const getServiceContext = () => { + const openAI = new OpenAI({ + model: "gpt-4", + apiKey: process.env.OPENAI_API_KEY + }); + + return serviceContextFromDefaults({ + llm: openAI, + }); + }; + + const getQueryEngine = async (filter: unknown) => { + const vectorStore = await getPineconeVectorStore(); + const serviceContext = getServiceContext(); + + const vectorStoreIndex = await VectorStoreIndex.fromVectorStore( + vectorStore, + serviceContext, + ); + + const retriever = new VectorIndexRetriever({ index: vectorStoreIndex, similarityTopK: 500 }); + + const responseSynthesizer = new ResponseSynthesizer({ + serviceContext, + responseBuilder: new TreeSummarize(serviceContext), + }); + + return new RetrieverQueryEngine(retriever, responseSynthesizer, { + filter + }); + } + + // whatever is a key from your metadata + const queryEngine = await getQueryEngine({ + whatever: { + $gte: 1, + $lte: 100 + }, + }); + + const response = await queryEngine.query('How many results do you have?'); + + console.log(response.toString()); +} + +main().catch(console.error); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2729e5190..2156eacee 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -106,6 +106,9 @@ importers: '@notionhq/client': specifier: ^2.2.12 version: 2.2.12 + '@pinecone-database/pinecone': + specifier: ^1.0.1 + version: 1.1.0 commander: specifier: ^11.0.0 version: 11.0.0 @@ -404,7 +407,7 @@ packages: digest-fetch: 1.3.0 form-data-encoder: 1.7.2 formdata-node: 4.4.1 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) transitivePeerDependencies: - encoding dev: false @@ -3147,6 +3150,18 @@ packages: - webpack-cli dev: false + /@edge-runtime/primitives@4.0.2: + resolution: {integrity: sha512-zIzzqvq62O0gxKv/PrfFBn2TEJtJYw6YlNyfLsWr16Lxz6bT8CB1IrfWa9vc5zsQPElP/orwZCu+x80+ihWQyQ==} + engines: {node: '>=16'} + dev: false + + /@edge-runtime/types@2.2.4: + resolution: {integrity: sha512-w2DrfkLW4C/r5lpjsICc76qGX++sNvnN8sYeqXsTSpWInc8+3unofGsDUw4w34T7zQ7Mmcyld04qiLIrNoz+fQ==} + engines: {node: '>=16'} + dependencies: + '@edge-runtime/primitives': 4.0.2 + dev: false + /@esbuild/android-arm64@0.18.20: resolution: {integrity: sha512-Nz4rJcchGDtENV0eMKUNa6L12zz2zBDXuhj/Vjh18zGqB44Bi7MBMSXjgunJgjRhCmKOjnPuZp4Mb6OKqtMHLQ==} engines: {node: '>=12'} @@ -3459,7 +3474,7 @@ packages: dependencies: '@jest/fake-timers': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 20.7.0 + '@types/node': 18.18.0 jest-mock: 29.7.0 dev: true @@ -3486,7 +3501,7 @@ packages: dependencies: '@jest/types': 29.6.3 '@sinonjs/fake-timers': 10.3.0 - '@types/node': 20.7.0 + '@types/node': 18.18.0 jest-message-util: 29.7.0 jest-mock: 29.7.0 jest-util: 29.7.0 @@ -3830,7 +3845,7 @@ packages: engines: {node: '>=12'} dependencies: '@types/node-fetch': 2.6.4 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) transitivePeerDependencies: - encoding dev: false @@ -3840,11 +3855,24 @@ packages: engines: {node: '>=12'} dependencies: '@types/node-fetch': 2.6.6 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) transitivePeerDependencies: - encoding dev: false + /@pinecone-database/pinecone@1.1.0: + resolution: {integrity: sha512-THg+D3cSYVCMmphroOEBQOU9UsOhABYcrExZyurcz8cZ3znipDyJuiX9F3CavysnQa5DTzQEZxcH1YmEMGW8mg==} + engines: {node: '>=14.0.0'} + dependencies: + '@edge-runtime/types': 2.2.4 + '@sinclair/typebox': 0.29.6 + '@types/node': 18.18.0 + ajv: 8.12.0 + cross-fetch: 3.1.8(encoding@0.1.13) + encoding: 0.1.13 + typescript: 4.9.5 + dev: false + /@pkgr/utils@2.4.0: resolution: {integrity: sha512-2OCURAmRtdlL8iUDTypMrrxfwe8frXTeXaxGsVOaYtc/wrUyk8Z/0OBetM7cdlsy7ZFWlMX72VogKeh+A4Xcjw==} engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0} @@ -3879,6 +3907,10 @@ packages: /@sinclair/typebox@0.27.8: resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==} + /@sinclair/typebox@0.29.6: + resolution: {integrity: sha512-aX5IFYWlMa7tQ8xZr3b2gtVReCvg7f3LEhjir/JAjX2bJCMVJA5tIPv30wTD4KDfcwMd7DDYY3hFDeGmOgtrZQ==} + dev: false + /@sindresorhus/is@0.14.0: resolution: {integrity: sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==} engines: {node: '>=6'} @@ -4174,26 +4206,26 @@ packages: resolution: {integrity: sha512-ALYone6pm6QmwZoAgeyNksccT9Q4AWZQ6PvfwR37GT6r6FWUPguq6sUmNGSMV2Wr761oQoBxwGGa6DR5o1DC9g==} dependencies: '@types/connect': 3.4.35 - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/bonjour@3.5.10: resolution: {integrity: sha512-p7ienRMiS41Nu2/igbJxxLDWrSZ0WxM8UQgCeO9KhoVF7cOVFkrKsiDr1EsJIla8vV3oEEjGcz11jc5yimhzZw==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/connect-history-api-fallback@1.5.0: resolution: {integrity: sha512-4x5FkPpLipqwthjPsF7ZRbOv3uoLUFkTA9G9v583qi4pACvq0uTELrB8OLUzPWUI4IJIyvM85vzkV1nyiI2Lig==} dependencies: '@types/express-serve-static-core': 4.17.35 - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/connect@3.4.35: resolution: {integrity: sha512-cdeYyv4KWoEgpBISTxWvqYsVy444DOqehiF3fM3ne10AmJ62RSyNkUnxMJXHQWRQQX2eR94m5y1IZyDwBjV9FQ==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/eslint-scope@3.7.4: @@ -4214,7 +4246,7 @@ packages: /@types/express-serve-static-core@4.17.35: resolution: {integrity: sha512-wALWQwrgiB2AWTT91CB62b6Yt0sNHpznUXeZEcnPU3DRdlDIz74x8Qg1UUYKSVFi+va5vKOLYRBI1bRKiLLKIg==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 '@types/qs': 6.9.7 '@types/range-parser': 1.2.4 '@types/send': 0.17.1 @@ -4233,13 +4265,13 @@ packages: resolution: {integrity: sha512-ZUxbzKl0IfJILTS6t7ip5fQQM/J3TJYubDm3nMbgubNNYS62eXeUpoLUC8/7fJNiFYHTrGPQn7hspDUzIHX3UA==} dependencies: '@types/minimatch': 5.1.2 - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: true /@types/graceful-fs@4.1.7: resolution: {integrity: sha512-MhzcwU8aUygZroVwL2jeYk6JisJrPl/oov/gsgGCue9mkgl9wjGbzReYQClxiUgFDnib9FuHqTndccKeZKxTRw==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: true /@types/hast@2.3.5: @@ -4262,7 +4294,7 @@ packages: /@types/http-proxy@1.17.11: resolution: {integrity: sha512-HC8G7c1WmaF2ekqpnFq626xd3Zz0uvaqFmBJNRZCGEZCXkvSdJoNFn/8Ygbd9fKNQj8UzLdCETaI0UWPAjK7IA==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/inquirer@6.5.0: @@ -4308,7 +4340,7 @@ packages: /@types/keyv@3.1.4: resolution: {integrity: sha512-BQ5aZNSCpj7D6K2ksrRCTmKRLEpnPvWDiLPfoGyhZ++8YtiK9d/3DBKPJgry359X/P1PfruyYwvnvwFjuEiEIg==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/lodash@4.14.199: @@ -4438,7 +4470,7 @@ packages: /@types/responselike@1.0.0: resolution: {integrity: sha512-85Y2BjiufFzaMIlvJDvTTB8Fxl2xfLo4HgmHzVBz08w4wDePCTjYw66PdrolO0kzli3yam/YCgRufyo1DdQVTA==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/retry@0.12.0: @@ -4462,7 +4494,7 @@ packages: resolution: {integrity: sha512-Cwo8LE/0rnvX7kIIa3QHCkcuF21c05Ayb0ZfxPiv0W8VRiZiNW/WuRupHKpqqGVGf7SUA44QSOUKaEd9lIrd/Q==} dependencies: '@types/mime': 1.3.2 - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/serve-index@1.9.1: @@ -4476,13 +4508,13 @@ packages: dependencies: '@types/http-errors': 2.0.1 '@types/mime': 3.0.1 - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/sockjs@0.3.33: resolution: {integrity: sha512-f0KEEe05NvUnat+boPTZ0dgaLZ4SfSouXUgv5noUiefG2ajgKjmETo9ZJyuqsl7dfl2aHlLJUiki6B4ZYldiiw==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/stack-utils@2.0.1: @@ -4492,7 +4524,7 @@ packages: /@types/through@0.0.31: resolution: {integrity: sha512-LpKpmb7FGevYgXnBXYs6HWnmiFyVG07Pt1cnbgM1IhEacITTiUaBXXvOR3Y50ksaJWGSfhbEvQFivQEFGCC55w==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: true /@types/tinycolor2@1.4.4: @@ -4521,7 +4553,7 @@ packages: /@types/ws@8.5.5: resolution: {integrity: sha512-lwhs8hktwxSjf9UaZ9tG5M03PGogvFaH8gUgLNbN9HKIg0dvv6q+gkSuJ8HN4/VbyxkuLzCjlN7GquQ0gUJfIg==} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 dev: false /@types/yargs-parser@21.0.1: @@ -6189,10 +6221,10 @@ packages: resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==} dev: true - /cross-fetch@3.1.8: + /cross-fetch@3.1.8(encoding@0.1.13): resolution: {integrity: sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg==} dependencies: - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) transitivePeerDependencies: - encoding dev: false @@ -6939,6 +6971,12 @@ packages: engines: {node: '>= 0.8'} dev: false + /encoding@0.1.13: + resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==} + dependencies: + iconv-lite: 0.6.3 + dev: false + /end-of-stream@1.4.4: resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==} dependencies: @@ -7718,7 +7756,7 @@ packages: /fbjs@3.0.5: resolution: {integrity: sha512-ztsSx77JBtkuMrEypfhgc3cI0+0h+svqeie7xHbh1k/IKdcydnvadp/mUaGgjAOXQmQSxsqgaRhS3q9fy+1kxg==} dependencies: - cross-fetch: 3.1.8 + cross-fetch: 3.1.8(encoding@0.1.13) fbjs-css-vars: 1.0.2 loose-envify: 1.4.0 object-assign: 4.1.1 @@ -8676,6 +8714,13 @@ packages: dependencies: safer-buffer: 2.1.2 + /iconv-lite@0.6.3: + resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} + engines: {node: '>=0.10.0'} + dependencies: + safer-buffer: 2.1.2 + dev: false + /icss-utils@5.1.0(postcss@8.4.28): resolution: {integrity: sha512-soFhflCVWLfRNOPU3iv5Z9VUdT44xFRbzjLsEzSr5AQmgqPMTHdU3PMT1Cf1ssx8fLNJDA1juftYl+PUcv3MqA==} engines: {node: ^10 || ^12 || >= 14} @@ -9316,7 +9361,7 @@ packages: '@jest/expect': 29.7.0 '@jest/test-result': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 20.7.0 + '@types/node': 18.18.0 chalk: 4.1.2 co: 4.6.0 dedent: 1.5.1 @@ -9480,7 +9525,7 @@ packages: '@jest/environment': 29.7.0 '@jest/fake-timers': 29.7.0 '@jest/types': 29.6.3 - '@types/node': 20.7.0 + '@types/node': 18.18.0 jest-mock: 29.7.0 jest-util: 29.7.0 dev: true @@ -9547,7 +9592,7 @@ packages: engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} dependencies: '@jest/types': 29.6.3 - '@types/node': 20.7.0 + '@types/node': 18.18.0 jest-util: 29.7.0 dev: true @@ -9685,7 +9730,7 @@ packages: engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} dependencies: '@jest/types': 29.6.3 - '@types/node': 20.7.0 + '@types/node': 18.18.0 chalk: 4.1.2 ci-info: 3.8.0 graceful-fs: 4.2.11 @@ -9734,7 +9779,7 @@ packages: resolution: {integrity: sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==} engines: {node: '>= 10.13.0'} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 merge-stream: 2.0.0 supports-color: 8.1.1 @@ -9742,7 +9787,7 @@ packages: resolution: {integrity: sha512-l3ccBOabTdkng8I/ORCkADz4eSMKejTYv1vB/Z83UiubqhC1oQ5Li6dWCyqOIvSifGjUBxuvxvlm6KGK2DtuAQ==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 jest-util: 29.6.3 merge-stream: 2.0.0 supports-color: 8.1.1 @@ -9752,7 +9797,7 @@ packages: resolution: {integrity: sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} dependencies: - '@types/node': 20.7.0 + '@types/node': 18.18.0 jest-util: 29.7.0 merge-stream: 2.0.0 supports-color: 8.1.1 @@ -10589,7 +10634,7 @@ packages: resolution: {integrity: sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==} dev: false - /node-fetch@2.7.0: + /node-fetch@2.7.0(encoding@0.1.13): resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} engines: {node: 4.x || >=6.0.0} peerDependencies: @@ -10598,6 +10643,7 @@ packages: encoding: optional: true dependencies: + encoding: 0.1.13 whatwg-url: 5.0.0 dev: false @@ -10853,7 +10899,7 @@ packages: digest-fetch: 1.3.0 form-data-encoder: 1.7.2 formdata-node: 4.4.1 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) transitivePeerDependencies: - encoding dev: false -- GitLab