From 5c4badbccaa59692e76c4cfffa612c0cbc31aa90 Mon Sep 17 00:00:00 2001
From: Aaron Ji <127167174+DresAaron@users.noreply.github.com>
Date: Wed, 18 Sep 2024 18:38:46 +0800
Subject: [PATCH] chore: add 'late_chunking' for Jina embedding (#1223)

---
 .changeset/yellow-donkeys-complain.md             |  5 +++++
 .../llamaindex/src/embeddings/JinaAIEmbedding.ts  | 15 +++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)
 create mode 100644 .changeset/yellow-donkeys-complain.md

diff --git a/.changeset/yellow-donkeys-complain.md b/.changeset/yellow-donkeys-complain.md
new file mode 100644
index 000000000..4ac9987f1
--- /dev/null
+++ b/.changeset/yellow-donkeys-complain.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+Extend JinaAPIEmbedding parameters
diff --git a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts b/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts
index 00188dc00..27f5d7407 100644
--- a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts
+++ b/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts
@@ -20,8 +20,9 @@ export type JinaEmbeddingRequest = {
   input: Array<{ text: string } | { url: string } | { bytes: string }>;
   model?: string;
   encoding_type?: EncodingType;
-  task_type?: TaskType;
+  task?: TaskType;
   dimensions?: number;
+  late_chunking?: boolean;
 };
 
 export type JinaEmbeddingResponse = {
@@ -44,9 +45,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
   apiKey: string;
   model: string;
   baseURL: string;
-  taskType: TaskType | undefined;
+  task?: TaskType | undefined;
   encodingType?: EncodingType | undefined;
   dimensions?: number | undefined;
+  late_chunking?: boolean | undefined;
 
   async getTextEmbedding(text: string): Promise<number[]> {
     const result = await this.getJinaEmbedding({ input: [{ text }] });
@@ -87,8 +89,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
     this.model = init?.model ?? "jina-embeddings-v3";
     this.baseURL = init?.baseURL ?? "https://api.jina.ai/v1/embeddings";
     init?.embedBatchSize && (this.embedBatchSize = init?.embedBatchSize);
-    this.taskType = init?.taskType;
+    this.task = init?.task;
     this.encodingType = init?.encodingType;
+    this.dimensions = init?.dimensions;
+    this.late_chunking = init?.late_chunking;
   }
 
   private async getImageInput(
@@ -125,8 +129,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
       body: JSON.stringify({
         model: this.model,
         encoding_type: this.encodingType ?? "float",
-        ...(this.taskType && { task_type: this.taskType }),
+        ...(this.task && { task: this.task }),
         ...(this.dimensions !== undefined && { dimensions: this.dimensions }),
+        ...(this.late_chunking !== undefined && {
+          late_chunking: this.late_chunking,
+        }),
         ...params,
       }),
     });
-- 
GitLab