From 6b70c5408fb1b7c610c950eb9288eee432e5bb96 Mon Sep 17 00:00:00 2001
From: Aaron Ji <127167174+DresAaron@users.noreply.github.com>
Date: Sat, 14 Sep 2024 00:44:43 +0800
Subject: [PATCH] chore: update JinaEmbedding for v3 release (#1187)

---
 .changeset/quick-brooms-sniff.md              |  5 +++
 .../src/embeddings/JinaAIEmbedding.ts         | 33 +++++++++++++++----
 2 files changed, 31 insertions(+), 7 deletions(-)
 create mode 100644 .changeset/quick-brooms-sniff.md

diff --git a/.changeset/quick-brooms-sniff.md b/.changeset/quick-brooms-sniff.md
new file mode 100644
index 000000000..c3f2668d2
--- /dev/null
+++ b/.changeset/quick-brooms-sniff.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+feat: update JinaAIEmbedding, support embedding v3
diff --git a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts b/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts
index 6cf789c6d..b0514b998 100644
--- a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts
+++ b/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts
@@ -8,10 +8,20 @@ function isLocal(url: ImageType): boolean {
   return new URL(url).protocol === "file:";
 }
 
+type TaskType =
+  | "retrieval.passage"
+  | "retrieval.query"
+  | "separation"
+  | "classification"
+  | "text-matching";
+type EncodingType = "float" | "binary" | "ubinary";
+
 export type JinaEmbeddingRequest = {
   input: Array<{ text: string } | { url: string } | { bytes: string }>;
   model?: string;
-  encoding_type?: "float" | "binary" | "ubinary";
+  encoding_type?: EncodingType;
+  task_type?: TaskType;
+  dimensions?: number;
 };
 
 export type JinaEmbeddingResponse = {
@@ -34,6 +44,9 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
   apiKey: string;
   model: string;
   baseURL: string;
+  taskType: TaskType | undefined;
+  encodingType?: EncodingType | undefined;
+  dimensions?: number | undefined;
 
   async getTextEmbedding(text: string): Promise<number[]> {
     const result = await this.getJinaEmbedding({ input: [{ text }] });
@@ -71,9 +84,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
       );
     }
     this.apiKey = apiKey;
-    this.model = init?.model ?? "jina-embeddings-v2-base-en";
+    this.model = init?.model ?? "jina-embeddings-v3";
     this.baseURL = init?.baseURL ?? "https://api.jina.ai/v1/embeddings";
     init?.embedBatchSize && (this.embedBatchSize = init?.embedBatchSize);
+    this.taskType = init?.taskType;
+    this.encodingType = init?.encodingType;
   }
 
   private async getImageInput(
@@ -89,11 +104,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
   }
 
   private async getJinaEmbedding(
-    input: JinaEmbeddingRequest,
+    params: JinaEmbeddingRequest,
   ): Promise<JinaEmbeddingResponse> {
     // if input includes image, check if model supports multimodal embeddings
     if (
-      input.input.some((i) => "url" in i || "bytes" in i) &&
+      params.input.some((i) => "url" in i || "bytes" in i) &&
       !JINA_MULTIMODAL_MODELS.includes(this.model)
     ) {
       throw new Error(
@@ -109,13 +124,17 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
       },
       body: JSON.stringify({
         model: this.model,
-        encoding_type: "float",
-        ...input,
+        encoding_type: this.encodingType ?? "float",
+        ...(this.taskType && { task_type: this.taskType }),
+        ...(this.dimensions !== undefined && { dimensions: this.dimensions }),
+        ...params,
       }),
     });
     if (!response.ok) {
+      const reason = await response.text();
+
       throw new Error(
-        `Request ${this.baseURL} failed with status ${response.status}`,
+        `Request failed with status ${response.status}: ${reason}`,
       );
     }
     const result: JinaEmbeddingResponse = await response.json();
-- 
GitLab