Skip to content
Snippets Groups Projects
Unverified Commit 5c4badbc authored by Aaron Ji's avatar Aaron Ji Committed by GitHub
Browse files

chore: add 'late_chunking' for Jina embedding (#1223)

parent 2cd1383d
No related branches found
No related tags found
No related merge requests found
---
"llamaindex": patch
---
Extend JinaAPIEmbedding parameters
...@@ -20,8 +20,9 @@ export type JinaEmbeddingRequest = { ...@@ -20,8 +20,9 @@ export type JinaEmbeddingRequest = {
input: Array<{ text: string } | { url: string } | { bytes: string }>; input: Array<{ text: string } | { url: string } | { bytes: string }>;
model?: string; model?: string;
encoding_type?: EncodingType; encoding_type?: EncodingType;
task_type?: TaskType; task?: TaskType;
dimensions?: number; dimensions?: number;
late_chunking?: boolean;
}; };
export type JinaEmbeddingResponse = { export type JinaEmbeddingResponse = {
...@@ -44,9 +45,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding { ...@@ -44,9 +45,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
apiKey: string; apiKey: string;
model: string; model: string;
baseURL: string; baseURL: string;
taskType: TaskType | undefined; task?: TaskType | undefined;
encodingType?: EncodingType | undefined; encodingType?: EncodingType | undefined;
dimensions?: number | undefined; dimensions?: number | undefined;
late_chunking?: boolean | undefined;
async getTextEmbedding(text: string): Promise<number[]> { async getTextEmbedding(text: string): Promise<number[]> {
const result = await this.getJinaEmbedding({ input: [{ text }] }); const result = await this.getJinaEmbedding({ input: [{ text }] });
...@@ -87,8 +89,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding { ...@@ -87,8 +89,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
this.model = init?.model ?? "jina-embeddings-v3"; this.model = init?.model ?? "jina-embeddings-v3";
this.baseURL = init?.baseURL ?? "https://api.jina.ai/v1/embeddings"; this.baseURL = init?.baseURL ?? "https://api.jina.ai/v1/embeddings";
init?.embedBatchSize && (this.embedBatchSize = init?.embedBatchSize); init?.embedBatchSize && (this.embedBatchSize = init?.embedBatchSize);
this.taskType = init?.taskType; this.task = init?.task;
this.encodingType = init?.encodingType; this.encodingType = init?.encodingType;
this.dimensions = init?.dimensions;
this.late_chunking = init?.late_chunking;
} }
private async getImageInput( private async getImageInput(
...@@ -125,8 +129,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding { ...@@ -125,8 +129,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
body: JSON.stringify({ body: JSON.stringify({
model: this.model, model: this.model,
encoding_type: this.encodingType ?? "float", encoding_type: this.encodingType ?? "float",
...(this.taskType && { task_type: this.taskType }), ...(this.task && { task: this.task }),
...(this.dimensions !== undefined && { dimensions: this.dimensions }), ...(this.dimensions !== undefined && { dimensions: this.dimensions }),
...(this.late_chunking !== undefined && {
late_chunking: this.late_chunking,
}),
...params, ...params,
}), }),
}); });
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment