diff --git a/.changeset/selfish-frogs-lie.md b/.changeset/selfish-frogs-lie.md new file mode 100644 index 0000000000000000000000000000000000000000..bd6ffdf3941db7efe9873baffbc6b0cfad225301 --- /dev/null +++ b/.changeset/selfish-frogs-lie.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Convert undefined values to null in LlamaCloud filters diff --git a/packages/llamaindex/src/cloud/LLamaCloudFileService.ts b/packages/llamaindex/src/cloud/LLamaCloudFileService.ts index 48da789cede9adab83e51d8bef38add80a97aa71..2ee76e2ea513309ace0b1104ade5bbe309fdb0fb 100644 --- a/packages/llamaindex/src/cloud/LLamaCloudFileService.ts +++ b/packages/llamaindex/src/cloud/LLamaCloudFileService.ts @@ -85,7 +85,7 @@ export class LLamaCloudFileService { await new Promise((resolve) => setTimeout(resolve, 100)); // Sleep for 100ms } throw new Error( - `File processing did not complete after ${maxAttempts} attempts.`, + `File processing did not complete after ${maxAttempts} attempts. Check your LlamaCloud index at https://cloud.llamaindex.ai/project/${projectId}/deploy/${pipelineId} for more details.`, ); } diff --git a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts index 7b08d0aa1889fa62edd3845fce4dc8df7e043a98..ca3a6ce6ec666849f0a25dccddb69b5601d97363 100644 --- a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts +++ b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts @@ -7,7 +7,12 @@ import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js"; import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js"; import { getPipelineCreate } from "./config.js"; import type { CloudConstructorParams } from "./type.js"; -import { getAppBaseUrl, getProjectId, initService } from "./utils.js"; +import { + getAppBaseUrl, + getPipelineId, + getProjectId, + initService, +} from "./utils.js"; import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api"; import { SentenceSplitter } from "@llamaindex/core/node-parser"; @@ -28,10 +33,7 @@ export class LlamaCloudIndex { verbose = Settings.debug, raiseOnError = false, ): Promise<void> { - const pipelineId = await this.getPipelineId( - this.params.name, - this.params.projectName, - ); + const pipelineId = await this.getPipelineId(); if (verbose) { console.log("Waiting for pipeline ingestion: "); @@ -78,10 +80,7 @@ export class LlamaCloudIndex { verbose = Settings.debug, raiseOnError = false, ): Promise<void> { - const pipelineId = await this.getPipelineId( - this.params.name, - this.params.projectName, - ); + const pipelineId = await this.getPipelineId(); if (verbose) { console.log("Loading data: "); @@ -145,19 +144,11 @@ export class LlamaCloudIndex { projectName?: string, organizationId?: string, ): Promise<string> { - const { data: pipelines } = - await PipelinesService.searchPipelinesApiV1PipelinesGet({ - query: { - project_id: await getProjectId( - projectName ?? this.params.projectName, - organizationId ?? this.params.organizationId, - ), - pipeline_name: name ?? this.params.name, - }, - throwOnError: true, - }); - - return pipelines[0]!.id; + return await getPipelineId( + name ?? this.params.name, + projectName ?? this.params.projectName, + organizationId ?? this.params.organizationId, + ); } public async getProjectId( @@ -317,10 +308,7 @@ export class LlamaCloudIndex { } async insert(document: Document) { - const pipelineId = await this.getPipelineId( - this.params.name, - this.params.projectName, - ); + const pipelineId = await this.getPipelineId(); if (!pipelineId) { throw new Error("We couldn't find the pipeline ID for the given name"); @@ -347,10 +335,7 @@ export class LlamaCloudIndex { } async delete(document: Document) { - const pipelineId = await this.getPipelineId( - this.params.name, - this.params.projectName, - ); + const pipelineId = await this.getPipelineId(); if (!pipelineId) { throw new Error("We couldn't find the pipeline ID for the given name"); @@ -369,10 +354,7 @@ export class LlamaCloudIndex { } async refreshDoc(document: Document) { - const pipelineId = await this.getPipelineId( - this.params.name, - this.params.projectName, - ); + const pipelineId = await this.getPipelineId(); if (!pipelineId) { throw new Error("We couldn't find the pipeline ID for the given name"); diff --git a/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts b/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts index d2a6595044764d2e6a0e115225a24e8ddbb97d5a..fdb43e282d2f6ca134881fc49bb83212b188c25f 100644 --- a/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts +++ b/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts @@ -1,4 +1,5 @@ import { + type MetadataFilter, type MetadataFilters, PipelinesService, type RetrievalParams, @@ -11,7 +12,7 @@ import type { NodeWithScore } from "@llamaindex/core/schema"; import { jsonToNode, ObjectType } from "@llamaindex/core/schema"; import { extractText } from "@llamaindex/core/utils"; import type { ClientParams, CloudConstructorParams } from "./type.js"; -import { getProjectId, initService } from "./utils.js"; +import { getPipelineId, initService } from "./utils.js"; export type CloudRetrieveParams = Omit< RetrievalParams, @@ -42,6 +43,24 @@ export class LlamaCloudRetriever extends BaseRetriever { }); } + // LlamaCloud expects null values for filters, but LlamaIndexTS uses undefined for empty values + // This function converts the undefined values to null + private convertFilter(filters?: MetadataFilters): MetadataFilters | null { + if (!filters) return null; + + const processFilter = ( + filter: MetadataFilter | MetadataFilters, + ): MetadataFilter | MetadataFilters => { + if ("filters" in filter) { + // type MetadataFilters + return { ...filter, filters: filter.filters.map(processFilter) }; + } + return { ...filter, value: filter.value ?? null }; + }; + + return { ...filters, filters: filters.filters.map(processFilter) }; + } + constructor(params: CloudConstructorParams & CloudRetrieveParams) { super(); this.clientParams = { apiKey: params.apiKey, baseUrl: params.baseUrl }; @@ -57,31 +76,24 @@ export class LlamaCloudRetriever extends BaseRetriever { } async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> { - const { data: pipelines } = - await PipelinesService.searchPipelinesApiV1PipelinesGet({ - query: { - project_id: await getProjectId(this.projectName, this.organizationId), - pipeline_name: this.pipelineName, - }, - throwOnError: true, - }); + const pipelineId = await getPipelineId( + this.pipelineName, + this.projectName, + this.organizationId, + ); - if (pipelines.length === 0 || !pipelines[0]!.id) { - throw new Error( - `No pipeline found with name ${this.pipelineName} in project ${this.projectName}`, - ); - } + const filters = this.convertFilter(this.retrieveParams.filters); const { data: results } = await PipelinesService.runSearchApiV1PipelinesPipelineIdRetrievePost({ throwOnError: true, path: { - pipeline_id: pipelines[0]!.id, + pipeline_id: pipelineId, }, body: { ...this.retrieveParams, query: extractText(query), - search_filters: this.retrieveParams.filters as MetadataFilters, + search_filters: filters, dense_similarity_top_k: this.retrieveParams.similarityTopK!, }, }); diff --git a/packages/llamaindex/src/cloud/utils.ts b/packages/llamaindex/src/cloud/utils.ts index bbaadd854c7b834171a1d5f74dfaab4d1483b9ac..78c508197d3665ff7dd576f02d732921ed58f648 100644 --- a/packages/llamaindex/src/cloud/utils.ts +++ b/packages/llamaindex/src/cloud/utils.ts @@ -1,4 +1,8 @@ -import { client, ProjectsService } from "@llamaindex/cloud/api"; +import { + client, + PipelinesService, + ProjectsService, +} from "@llamaindex/cloud/api"; import { DEFAULT_BASE_URL } from "@llamaindex/core/global"; import { getEnv } from "@llamaindex/env"; import type { ClientParams } from "./type.js"; @@ -66,3 +70,26 @@ export async function getProjectId( return project.id; } + +export async function getPipelineId( + name: string, + projectName: string, + organizationId?: string, +): Promise<string> { + const { data: pipelines } = + await PipelinesService.searchPipelinesApiV1PipelinesGet({ + query: { + project_id: await getProjectId(projectName, organizationId), + pipeline_name: name, + }, + throwOnError: true, + }); + + if (pipelines.length === 0 || !pipelines[0]!.id) { + throw new Error( + `No pipeline found with name ${name} in project ${projectName}`, + ); + } + + return pipelines[0]!.id; +}