From 6b70c5408fb1b7c610c950eb9288eee432e5bb96 Mon Sep 17 00:00:00 2001 From: Aaron Ji <127167174+DresAaron@users.noreply.github.com> Date: Sat, 14 Sep 2024 00:44:43 +0800 Subject: [PATCH] chore: update JinaEmbedding for v3 release (#1187) --- .changeset/quick-brooms-sniff.md | 5 +++ .../src/embeddings/JinaAIEmbedding.ts | 33 +++++++++++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 .changeset/quick-brooms-sniff.md diff --git a/.changeset/quick-brooms-sniff.md b/.changeset/quick-brooms-sniff.md new file mode 100644 index 000000000..c3f2668d2 --- /dev/null +++ b/.changeset/quick-brooms-sniff.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat: update JinaAIEmbedding, support embedding v3 diff --git a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts b/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts index 6cf789c6d..b0514b998 100644 --- a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts +++ b/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts @@ -8,10 +8,20 @@ function isLocal(url: ImageType): boolean { return new URL(url).protocol === "file:"; } +type TaskType = + | "retrieval.passage" + | "retrieval.query" + | "separation" + | "classification" + | "text-matching"; +type EncodingType = "float" | "binary" | "ubinary"; + export type JinaEmbeddingRequest = { input: Array<{ text: string } | { url: string } | { bytes: string }>; model?: string; - encoding_type?: "float" | "binary" | "ubinary"; + encoding_type?: EncodingType; + task_type?: TaskType; + dimensions?: number; }; export type JinaEmbeddingResponse = { @@ -34,6 +44,9 @@ export class JinaAIEmbedding extends MultiModalEmbedding { apiKey: string; model: string; baseURL: string; + taskType: TaskType | undefined; + encodingType?: EncodingType | undefined; + dimensions?: number | undefined; async getTextEmbedding(text: string): Promise<number[]> { const result = await this.getJinaEmbedding({ input: [{ text }] }); @@ -71,9 +84,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding { ); } this.apiKey = apiKey; - this.model = init?.model ?? "jina-embeddings-v2-base-en"; + this.model = init?.model ?? "jina-embeddings-v3"; this.baseURL = init?.baseURL ?? "https://api.jina.ai/v1/embeddings"; init?.embedBatchSize && (this.embedBatchSize = init?.embedBatchSize); + this.taskType = init?.taskType; + this.encodingType = init?.encodingType; } private async getImageInput( @@ -89,11 +104,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding { } private async getJinaEmbedding( - input: JinaEmbeddingRequest, + params: JinaEmbeddingRequest, ): Promise<JinaEmbeddingResponse> { // if input includes image, check if model supports multimodal embeddings if ( - input.input.some((i) => "url" in i || "bytes" in i) && + params.input.some((i) => "url" in i || "bytes" in i) && !JINA_MULTIMODAL_MODELS.includes(this.model) ) { throw new Error( @@ -109,13 +124,17 @@ export class JinaAIEmbedding extends MultiModalEmbedding { }, body: JSON.stringify({ model: this.model, - encoding_type: "float", - ...input, + encoding_type: this.encodingType ?? "float", + ...(this.taskType && { task_type: this.taskType }), + ...(this.dimensions !== undefined && { dimensions: this.dimensions }), + ...params, }), }); if (!response.ok) { + const reason = await response.text(); + throw new Error( - `Request ${this.baseURL} failed with status ${response.status}`, + `Request failed with status ${response.status}: ${reason}`, ); } const result: JinaEmbeddingResponse = await response.json(); -- GitLab