diff --git a/.changeset/tender-candles-shop.md b/.changeset/tender-candles-shop.md new file mode 100644 index 0000000000000000000000000000000000000000..bd9f836fe515cd4c418c5c06ed951cc15a56a612 --- /dev/null +++ b/.changeset/tender-candles-shop.md @@ -0,0 +1,5 @@ +--- +"@llamaindex/voyage-ai": major +--- + +Adding VoyageAI embedding package diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b401223f97ca98486bdf172f07e762903223add1..1d1e09ebb066e9da724f8aedc469aae0beef95b1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -41,8 +41,15 @@ pnpm install ### Build the packages +You'll need Turbo to build the packages. If you don't have it, you can run it with `pnpx`. + +To build all packages, run: + ```shell # Build all packages +pnpx turbo build --filter "./packages/*" + +# Or if you have turbo installed, you can run: turbo build --filter "./packages/*" ``` diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/voyageai.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/voyageai.mdx new file mode 100644 index 0000000000000000000000000000000000000000..8b354d730a2c08ee042dc1a9974b315dc0dcf99e --- /dev/null +++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/voyageai.mdx @@ -0,0 +1,46 @@ +--- +title: VoyageAI +--- + +To use VoyageAI embeddings, you need to import `VoyageAIEmbedding` from `@llamaindex/voyage-ai`. + +## Installation + +import { Tab, Tabs } from "fumadocs-ui/components/tabs"; + +<Tabs groupId="install" items={["npm", "yarn", "pnpm"]} persist> + ```shell tab="npm" + npm install llamaindex @llamaindex/voyage-ai + ``` + + ```shell tab="yarn" + yarn add llamaindex @llamaindex/voyage-ai + ``` + + ```shell tab="pnpm" + pnpm add llamaindex @llamaindex/voyage-ai + ``` +</Tabs> + +```ts +import { VoyageAIEmbedding } from "@llamaindex/voyage-ai"; +import { Document, Settings, VectorStoreIndex } from "llamaindex"; + +Settings.embedModel = new VoyageAIEmbedding(); + +const document = new Document({ text: essay, id_: "essay" }); + +const index = await VectorStoreIndex.fromDocuments([document]); + +const queryEngine = index.asQueryEngine(); + +const query = "What is the meaning of life?"; + +const results = await queryEngine.query({ + query, +}); +``` + +## API Reference + +- [VoyageAIEmbedding](/docs/api/classes/VoyageAIEmbedding) diff --git a/examples/package.json b/examples/package.json index 3627565bf08b759378e923fe54bd812d61d91e86..69e9e724440d9e5fc2111f7bb815069aff611944 100644 --- a/examples/package.json +++ b/examples/package.json @@ -41,6 +41,7 @@ "@llamaindex/upstash": "^0.0.7", "@llamaindex/vercel": "^0.0.13", "@llamaindex/vllm": "^0.0.24", + "@llamaindex/voyage-ai": "^0.0.1", "@llamaindex/weaviate": "^0.0.7", "@llamaindex/workflow": "^0.0.11", "@notionhq/client": "^2.2.15", diff --git a/examples/voyage-ai/embedding.ts b/examples/voyage-ai/embedding.ts new file mode 100644 index 0000000000000000000000000000000000000000..2ba7c6f3c7700d33a893239caabdc3ec5efac425 --- /dev/null +++ b/examples/voyage-ai/embedding.ts @@ -0,0 +1,17 @@ +import { VoyageAIEmbedding } from "@llamaindex/voyage-ai"; + +async function main() { + // API token can be provided as an environment variable too + // using VOYAGE_API_TOKEN variable + const apiKey = process.env.VOYAGE_API_TOKEN ?? "YOUR_API_TOKEN"; + const model = "voyage-3-lite"; + const embedModel = new VoyageAIEmbedding({ + model, + apiKey, + }); + const texts = ["hello", "world"]; + const embeddings = await embedModel.getTextEmbeddingsBatch(texts); + console.log(`\nWe have ${embeddings.length} embeddings`); +} + +main().catch(console.error); diff --git a/packages/providers/voyage-ai/package.json b/packages/providers/voyage-ai/package.json new file mode 100644 index 0000000000000000000000000000000000000000..02fcbf6c373ddbfb30d6f58e4384996ac377f72f --- /dev/null +++ b/packages/providers/voyage-ai/package.json @@ -0,0 +1,40 @@ +{ + "name": "@llamaindex/voyage-ai", + "description": "VoyageAI Adapter for LlamaIndex", + "version": "0.0.1", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/providers/voyage-ai" + }, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "6.0.3" + }, + "dependencies": { + "@llamaindex/core": "workspace:*", + "@llamaindex/env": "workspace:*", + "voyageai": "0.0.3-1" + } +} diff --git a/packages/providers/voyage-ai/src/embedding.ts b/packages/providers/voyage-ai/src/embedding.ts new file mode 100644 index 0000000000000000000000000000000000000000..450bb6ba7d5715b24001bc0efec1cc247d0f6f8e --- /dev/null +++ b/packages/providers/voyage-ai/src/embedding.ts @@ -0,0 +1,137 @@ +import { BaseEmbedding } from "@llamaindex/core/embeddings"; +import type { MessageContentDetail } from "@llamaindex/core/llms"; +import { extractSingleText } from "@llamaindex/core/utils"; +import { getEnv } from "@llamaindex/env"; +import { VoyageAI, VoyageAIClient } from "voyageai"; + +const DEFAULT_MODEL = "voyage-3"; +const API_TOKEN_ENV_VARIABLE_NAME = "VOYAGE_API_TOKEN"; +// const API_ROOT = "https://api.voyageai.com/v1/embeddings"; +const DEFAULT_TIMEOUT = 60 * 1000; +const DEFAULT_MAX_RETRIES = 5; + +/** + * VoyageAIEmbedding is an alias for VoyageAI that implements the BaseEmbedding interface. + */ +export class VoyageAIEmbedding extends BaseEmbedding { + /** + * VoyageAI model to use + * @default "voyage-3" + * @see https://docs.voyageai.com/docs/embeddings + */ + model: string; + + /** + * VoyageAI API token + * @see https://docs.voyageai.com/docs/api-key-and-installation + * If not provided, it will try to get the token from the environment variable `VOYAGE_API_KEY` + * + */ + apiKey: string; + + /** + * Maximum number of retries + * @default 5 + */ + maxRetries: number; + + /** + * Timeout in seconds + * @default 60 + */ + timeout: number; + /** + * Whether to truncate the input texts to fit within the context length. Defaults to `true`. + * If `true`, over-length input texts will be truncated to fit within the context length, before vectorized by the embedding model. + * If `false`, an error will be raised if any given text exceeds the context length. + */ + truncation: boolean; + + /** + * VoyageAI supports `document` and `query` as input types, or it can be left undefined. Using an input type prepends the input with a prompt before embedding. + * Example from their docs: using "query" adds "Represent the query for retrieving supporting documents:" + * VoyageAI says these types improve performance, but it will add to token usage. Embeddings with input types are compatible with those that don't use them. + * Setting this to `query` will use the `query` input type for getQueryEmbedding(s). + * Setting this to `document` will use the `document` input type for getTextEmbedding(s). + * Setting this to `both` will do both of the above. + * By default, this is undefined, which means no input types are used. + * @see https://docs.voyageai.com/docs/embeddings + * @default undefined + */ + useInputTypes: "query" | "document" | "both" | undefined; + + /** + * VoyageAI client + */ + client: VoyageAIClient; + + constructor(init?: Partial<VoyageAIEmbedding>) { + super(); + + this.model = init?.model ?? DEFAULT_MODEL; + this.apiKey = init?.apiKey ?? getEnv(API_TOKEN_ENV_VARIABLE_NAME) ?? ""; + this.maxRetries = init?.maxRetries ?? DEFAULT_MAX_RETRIES; + this.timeout = init?.timeout ?? DEFAULT_TIMEOUT; + this.truncation = init?.truncation ?? true; + this.useInputTypes = init?.useInputTypes; + this.client = new VoyageAIClient({ + apiKey: this.apiKey, + }); + } + + async getTextEmbedding(text: string): Promise<number[]> { + const embeddings = await this.getVoyageAIEmbedding([text], "document"); + return embeddings[0]!; + } + + async getQueryEmbedding( + query: MessageContentDetail, + ): Promise<number[] | null> { + const text = extractSingleText(query); + if (text) { + const embeddings = await this.getVoyageAIEmbedding([text], "query"); + return embeddings[0]!; + } else { + return null; + } + } + + getTextEmbeddings = async (texts: string[]): Promise<number[][]> => { + return this.getVoyageAIEmbedding(texts, "document"); + }; + + async getQueryEmbeddings(queries: string[]): Promise<number[][]> { + return this.getVoyageAIEmbedding(queries, "query"); + } + + private getInputType(requestType: "query" | "document") { + if (this.useInputTypes === "both") { + return requestType; + } else if (this.useInputTypes === requestType) { + return requestType; + } else { + return undefined; + } + } + + private async getVoyageAIEmbedding( + inputs: VoyageAI.EmbedRequestInput, + inputType: VoyageAI.EmbedRequestInputType, + ): Promise<number[][]> { + const request: VoyageAI.EmbedRequest = { + model: this.model, + input: inputs, + truncation: this.truncation, + }; + const preferredInputType = this.getInputType(inputType); + if (preferredInputType) { + request.inputType = preferredInputType; + } + const response = await this.client.embed(request); + if (response.data) { + return response.data.map((item) => item.embedding ?? []); + } else { + throw new Error("Failed to get embeddings from VoyageAI"); + } + } +} diff --git a/packages/providers/voyage-ai/src/index.ts b/packages/providers/voyage-ai/src/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..1f472c3c2e146ad3298967ce9954d5fc8364990a --- /dev/null +++ b/packages/providers/voyage-ai/src/index.ts @@ -0,0 +1 @@ +export { VoyageAIEmbedding } from "./embedding"; diff --git a/packages/providers/voyage-ai/tsconfig.json b/packages/providers/voyage-ai/tsconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..bd8900e3ac6680d07b9e885f4121a6834bf60d68 --- /dev/null +++ b/packages/providers/voyage-ai/tsconfig.json @@ -0,0 +1,19 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib", + "tsBuildInfoFile": "./lib/.tsbuildinfo" + }, + "include": ["./src"], + "references": [ + { + "path": "../openai/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 89aa8da4c3f9a20e58f5a8a8efb0f11758b9a715..f943208147bde2e7aa2323c14062f3e3487f21a5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -685,6 +685,9 @@ importers: '@llamaindex/vllm': specifier: ^0.0.24 version: link:../packages/providers/vllm + '@llamaindex/voyage-ai': + specifier: ^0.0.1 + version: link:../packages/providers/voyage-ai '@llamaindex/weaviate': specifier: ^0.0.7 version: link:../packages/providers/storage/weaviate @@ -1443,7 +1446,7 @@ importers: version: link:../../../env chromadb: specifier: 1.10.3 - version: 1.10.3(cohere-ai@7.14.0)(openai@4.83.0(ws@8.18.0(bufferutil@4.0.9))(zod@3.24.2)) + version: 1.10.3(cohere-ai@7.14.0)(openai@4.83.0(ws@8.18.0(bufferutil@4.0.9))(zod@3.24.2))(voyageai@0.0.3-1) chromadb-default-embed: specifier: ^2.13.2 version: 2.13.2 @@ -1636,6 +1639,22 @@ importers: specifier: 6.3.4 version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/voyage-ai: + dependencies: + '@llamaindex/core': + specifier: workspace:* + version: link:../../core + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + voyageai: + specifier: 0.0.3-1 + version: 0.0.3-1 + devDependencies: + bunchee: + specifier: 6.0.3 + version: 6.0.3(typescript@5.7.3) + packages/readers: dependencies: '@azure/cosmos': @@ -4263,6 +4282,15 @@ packages: rollup: optional: true + '@rollup/plugin-node-resolve@15.3.1': + resolution: {integrity: sha512-tgg6b91pAybXHJQMAAwW9VuWBO6Thi+q7BCNARLwSqlmsHz0XYURtGvh/AuwSADXSI4h/2uHbs7s4FzlZDGSGA==} + engines: {node: '>=14.0.0'} + peerDependencies: + rollup: ^2.78.0||^3.0.0||^4.0.0 + peerDependenciesMeta: + rollup: + optional: true + '@rollup/plugin-node-resolve@16.0.0': resolution: {integrity: sha512-0FPvAeVUT/zdWoO0jnb/V5BlBsUSNfkIOtFHzMO4H9MOklrmQFY6FduVHKucNb/aTFxvnGhj4MNj/T1oNdDfNg==} engines: {node: '>=14.0.0'} @@ -5926,6 +5954,16 @@ packages: resolution: {integrity: sha512-WDtdLmJvAuNNPzByAYpRo2rF1Mmradw6gvWsQKf63476DDXmomT9zUiGypLcG4ibIM67vhAj8jJRdbmEws2Aqw==} engines: {node: '>=6.14.2'} + bunchee@6.0.3: + resolution: {integrity: sha512-Yq/srd3ocXPAHv0KEdJvhFMNUOOVVqy0kNzaGVCirk/+MfnLdvZO5uf5BHugIHe/qSvWUQTJZ3SAfB/VABONeQ==} + engines: {node: '>= 18.0.0'} + hasBin: true + peerDependencies: + typescript: ^4.1 || ^5.0 + peerDependenciesMeta: + typescript: + optional: true + bunchee@6.3.4: resolution: {integrity: sha512-bMy2/+tdMPXOqBAX+9BI0HTNjOXOZ2TXjgFpp5Prt0ztP15xQQUcsECnU7wuBPpLH+4id3rXakH9icdbBRZHZQ==} engines: {node: '>= 18.0.0'} @@ -11534,6 +11572,9 @@ packages: jsdom: optional: true + voyageai@0.0.3-1: + resolution: {integrity: sha512-R3jN/xnILWoMBL3jPY61Ydm1JbpK3J+VmXBoHvlNg1Xz8h0xdX7sEffXeSu+sAEKQaPyWXVQDmM/jpBhPXw58g==} + vue-demi@0.14.10: resolution: {integrity: sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==} engines: {node: '>=12'} @@ -14803,6 +14844,16 @@ snapshots: optionalDependencies: rollup: 4.34.6 + '@rollup/plugin-node-resolve@15.3.1(rollup@4.34.6)': + dependencies: + '@rollup/pluginutils': 5.1.4(rollup@4.34.6) + '@types/resolve': 1.20.2 + deepmerge: 4.3.1 + is-module: 1.0.0 + resolve: 1.22.10 + optionalDependencies: + rollup: 4.34.6 + '@rollup/plugin-node-resolve@16.0.0(rollup@4.34.6)': dependencies: '@rollup/pluginutils': 5.1.4(rollup@4.34.6) @@ -16932,6 +16983,31 @@ snapshots: dependencies: node-gyp-build: 4.8.4 + bunchee@6.0.3(typescript@5.7.3): + dependencies: + '@rollup/plugin-commonjs': 28.0.2(rollup@4.34.6) + '@rollup/plugin-json': 6.1.0(rollup@4.34.6) + '@rollup/plugin-node-resolve': 15.3.1(rollup@4.34.6) + '@rollup/plugin-replace': 6.0.2(rollup@4.34.6) + '@rollup/plugin-wasm': 6.2.2(rollup@4.34.6) + '@rollup/pluginutils': 5.1.4(rollup@4.34.6) + '@swc/core': 1.10.16(@swc/helpers@0.5.15) + '@swc/helpers': 0.5.15 + clean-css: 5.3.3 + glob: 11.0.1 + magic-string: 0.30.17 + ora: 8.2.0 + picomatch: 4.0.2 + pretty-bytes: 5.6.0 + rollup: 4.34.6 + rollup-plugin-dts: 6.1.1(rollup@4.34.6)(typescript@5.7.3) + rollup-plugin-swc3: 0.11.2(@swc/core@1.10.16(@swc/helpers@0.5.15))(rollup@4.34.6) + rollup-preserve-directives: 1.1.3(rollup@4.34.6) + tslib: 2.8.1 + yargs: 17.7.2 + optionalDependencies: + typescript: 5.7.3 + bunchee@6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.2): dependencies: '@rollup/plugin-commonjs': 28.0.2(rollup@4.34.6) @@ -17134,13 +17210,14 @@ snapshots: transitivePeerDependencies: - bare-buffer - chromadb@1.10.3(cohere-ai@7.14.0)(openai@4.83.0(ws@8.18.0(bufferutil@4.0.9))(zod@3.24.2)): + chromadb@1.10.3(cohere-ai@7.14.0)(openai@4.83.0(ws@8.18.0(bufferutil@4.0.9))(zod@3.24.2))(voyageai@0.0.3-1): dependencies: cliui: 8.0.1 isomorphic-fetch: 3.0.0 optionalDependencies: cohere-ai: 7.14.0 openai: 4.83.0(ws@8.18.0(bufferutil@4.0.9))(zod@3.24.2) + voyageai: 0.0.3-1 transitivePeerDependencies: - encoding @@ -22738,6 +22815,15 @@ snapshots: rollup: 4.34.6 rollup-preserve-directives: 1.1.3(rollup@4.34.6) + rollup-plugin-swc3@0.11.2(@swc/core@1.10.16(@swc/helpers@0.5.15))(rollup@4.34.6): + dependencies: + '@fastify/deepmerge': 1.3.0 + '@rollup/pluginutils': 5.1.4(rollup@4.34.6) + '@swc/core': 1.10.16(@swc/helpers@0.5.15) + get-tsconfig: 4.10.0 + rollup: 4.34.6 + rollup-preserve-directives: 1.1.3(rollup@4.34.6) + rollup-pluginutils@2.8.2: dependencies: estree-walker: 0.6.1 @@ -24326,6 +24412,18 @@ snapshots: - supports-color - terser + voyageai@0.0.3-1: + dependencies: + form-data: 4.0.0 + formdata-node: 6.0.3 + js-base64: 3.7.2 + node-fetch: 2.7.0 + qs: 6.11.2 + readable-stream: 4.7.0 + url-join: 4.0.1 + transitivePeerDependencies: + - encoding + vue-demi@0.14.10(vue@3.5.13(typescript@5.7.2)): dependencies: vue: 3.5.13(typescript@5.7.2)