From bbc8c8787dd066f5cc608bce60dc65b600cd08dc Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 6 Mar 2025 16:24:05 +0700 Subject: [PATCH] fix: prefer using embedding model from vector store (#1708) --- .changeset/tame-chairs-shake.md | 9 +++ .../available_embeddings/jinaai.mdx | 5 +- .../available_embeddings/together.mdx | 5 +- .../modules/llms/available_llms/deepseek.mdx | 6 +- .../modules/llms/available_llms/fireworks.mdx | 3 +- .../modules/llms/available_llms/together.mdx | 3 +- examples/deepseek.ts | 46 +++++++++++++ examples/multimodal/jina.ts | 8 +-- examples/package.json | 4 ++ examples/qdrantdb/with-gemini.ts | 33 +++++++++ examples/qdrantdb/with-jina.ts | 27 ++++++++ examples/readers/src/pdf_fw.ts | 3 +- examples/together-ai.ts | 2 +- examples/together-ai/vector-index.ts | 9 +-- .../src/embeddings/OpenAIEmbedding.ts | 1 - packages/llamaindex/src/embeddings/index.ts | 5 -- packages/llamaindex/src/index.edge.ts | 4 +- packages/llamaindex/src/index.ts | 3 - .../src/indices/vectorStore/index.ts | 2 +- packages/llamaindex/src/llm/index.ts | 4 -- packages/llamaindex/src/llm/openai.ts | 1 - .../tests/MetadataExtractors.test.ts | 4 +- packages/llamaindex/tests/Selectors.test.ts | 2 +- .../tests/indices/SummaryIndex.test.ts | 8 +-- .../tests/indices/VectorStoreIndex.test.ts | 35 ++++++++-- .../llamaindex/tests/utility/mockOpenAI.ts | 6 +- .../tests/utility/mockStorageContext.ts | 21 ++++-- packages/providers/deepseek/package.json | 39 +++++++++++ packages/providers/deepseek/src/index.ts | 1 + .../deepseek/src/llm.ts} | 0 packages/providers/deepseek/tsconfig.json | 19 ++++++ packages/providers/fireworks/package.json | 39 +++++++++++ .../fireworks/src/embedding.ts} | 0 packages/providers/fireworks/src/index.ts | 2 + .../fireworks/src/llm.ts} | 0 packages/providers/fireworks/tsconfig.json | 19 ++++++ packages/providers/jinaai/package.json | 40 +++++++++++ .../jinaai/src/embedding.ts} | 4 +- packages/providers/jinaai/src/index.ts | 1 + packages/providers/jinaai/tsconfig.json | 19 ++++++ packages/providers/together/package.json | 39 +++++++++++ .../together/src/embedding.ts} | 0 packages/providers/together/src/index.ts | 2 + .../together/src/llm.ts} | 0 packages/providers/together/tsconfig.json | 19 ++++++ pnpm-lock.yaml | 67 +++++++++++++++++++ tsconfig.json | 12 ++++ 47 files changed, 518 insertions(+), 63 deletions(-) create mode 100644 .changeset/tame-chairs-shake.md create mode 100644 examples/deepseek.ts create mode 100644 examples/qdrantdb/with-gemini.ts create mode 100644 examples/qdrantdb/with-jina.ts delete mode 100644 packages/llamaindex/src/embeddings/OpenAIEmbedding.ts delete mode 100644 packages/llamaindex/src/embeddings/index.ts delete mode 100644 packages/llamaindex/src/llm/index.ts delete mode 100644 packages/llamaindex/src/llm/openai.ts create mode 100644 packages/providers/deepseek/package.json create mode 100644 packages/providers/deepseek/src/index.ts rename packages/{llamaindex/src/llm/deepseek.ts => providers/deepseek/src/llm.ts} (100%) create mode 100644 packages/providers/deepseek/tsconfig.json create mode 100644 packages/providers/fireworks/package.json rename packages/{llamaindex/src/embeddings/fireworks.ts => providers/fireworks/src/embedding.ts} (100%) create mode 100644 packages/providers/fireworks/src/index.ts rename packages/{llamaindex/src/llm/fireworks.ts => providers/fireworks/src/llm.ts} (100%) create mode 100644 packages/providers/fireworks/tsconfig.json create mode 100644 packages/providers/jinaai/package.json rename packages/{llamaindex/src/embeddings/JinaAIEmbedding.ts => providers/jinaai/src/embedding.ts} (97%) create mode 100644 packages/providers/jinaai/src/index.ts create mode 100644 packages/providers/jinaai/tsconfig.json create mode 100644 packages/providers/together/package.json rename packages/{llamaindex/src/embeddings/together.ts => providers/together/src/embedding.ts} (100%) create mode 100644 packages/providers/together/src/index.ts rename packages/{llamaindex/src/llm/together.ts => providers/together/src/llm.ts} (100%) create mode 100644 packages/providers/together/tsconfig.json diff --git a/.changeset/tame-chairs-shake.md b/.changeset/tame-chairs-shake.md new file mode 100644 index 000000000..7e9e02b1d --- /dev/null +++ b/.changeset/tame-chairs-shake.md @@ -0,0 +1,9 @@ +--- +"llamaindex": patch +"@llamaindex/deepseek": patch +"@llamaindex/fireworks": patch +"@llamaindex/together": patch +"@llamaindex/jinaai": patch +--- + +fix: prefer using embedding model from vector store diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx index 6a308ec0d..0af1e1af2 100644 --- a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx @@ -2,10 +2,11 @@ title: Jina AI --- -To use Jina AI embeddings, you need to import `JinaAIEmbedding` from `llamaindex`. +To use Jina AI embeddings, you need to import `JinaAIEmbedding` from `@llamaindex/jinaai`. ```ts -import { JinaAIEmbedding, Settings } from "llamaindex"; +import { Settings } from "llamaindex"; +import { JinaAIEmbedding } from "@llamaindex/jinaai"; Settings.embedModel = new JinaAIEmbedding(); diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx index 7d5a6a832..30c83e0fa 100644 --- a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx @@ -2,10 +2,11 @@ title: Together --- -To use together embeddings, you need to import `TogetherEmbedding` from `llamaindex`. +To use together embeddings, you need to import `TogetherEmbedding` from `@llamaindex/together`. ```ts -import { TogetherEmbedding, Settings } from "llamaindex"; +import { Settings } from "llamaindex"; +import { TogetherEmbedding } from "@llamaindex/together"; Settings.embedModel = new TogetherEmbedding({ apiKey: "<YOUR_API_KEY>", diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx index 928164ec8..e197eff56 100644 --- a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx @@ -7,7 +7,8 @@ title: DeepSeek LLM ## Usage ```ts -import { DeepSeekLLM, Settings } from "llamaindex"; +import { Settings } from "llamaindex"; +import { DeepSeekLLM } from "@llamaindex/deepseek"; Settings.llm = new DeepSeekLLM({ apiKey: "<YOUR_API_KEY>", @@ -18,7 +19,8 @@ Settings.llm = new DeepSeekLLM({ ## Example ```ts -import { DeepSeekLLM, Document, VectorStoreIndex, Settings } from "llamaindex"; +import { Document, VectorStoreIndex, Settings } from "llamaindex"; +import { DeepSeekLLM } from "@llamaindex/deepseek"; const deepseekLlm = new DeepSeekLLM({ apiKey: "<YOUR_API_KEY>", diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx index 1680de683..328ba4e24 100644 --- a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx @@ -7,7 +7,8 @@ title: Fireworks LLM ## Usage ```ts -import { FireworksLLM, Settings } from "llamaindex"; +import { Settings } from "llamaindex"; +import { FireworksLLM } from "@llamaindex/fireworks"; Settings.llm = new FireworksLLM({ apiKey: "<YOUR_API_KEY>", diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx index 65cc58c41..a9877cfa3 100644 --- a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx @@ -23,7 +23,8 @@ import { Tab, Tabs } from "fumadocs-ui/components/tabs"; ## Usage ```ts -import { Settings, TogetherLLM } from "llamaindex"; +import { Settings } from "llamaindex"; +import { TogetherLLM } from "@llamaindex/together"; Settings.llm = new TogetherLLM({ apiKey: "<YOUR_API_KEY>", diff --git a/examples/deepseek.ts b/examples/deepseek.ts new file mode 100644 index 000000000..1a9b49444 --- /dev/null +++ b/examples/deepseek.ts @@ -0,0 +1,46 @@ +import { DeepSeekLLM } from "@llamaindex/deepseek"; + +// process.env.DEEPSEEK_API_KEY is required +const deepseek = new DeepSeekLLM({ + apiKey: process.env.DEEPSEEK_API_KEY, + model: "deepseek-coder", // or "deepseek-chat" +}); + +(async () => { + // Example of non-streaming chat + const response = await deepseek.chat({ + messages: [ + { + role: "system", + content: "You are an AI assistant", + }, + { + role: "user", + content: "Tell me about San Francisco", + }, + ], + stream: false, + }); + console.log("Response from DeepSeek AI:"); + console.log(response); + + // Example of streaming chat + const generator = await deepseek.chat({ + messages: [ + { + role: "system", + content: "You are an AI assistant", + }, + { + role: "user", + content: "Write a short poem about San Francisco", + }, + ], + stream: true, + }); + console.log("\nStreaming response from DeepSeek AI..."); + for await (const message of generator) { + process.stdout.write(message.delta); + } + console.log("\n"); +})(); diff --git a/examples/multimodal/jina.ts b/examples/multimodal/jina.ts index 1c3081d0f..e5adc1042 100644 --- a/examples/multimodal/jina.ts +++ b/examples/multimodal/jina.ts @@ -1,10 +1,6 @@ +import { JinaAIEmbedding } from "@llamaindex/jinaai"; import { SimpleDirectoryReader } from "@llamaindex/readers/directory"; -import { - ImageDocument, - JinaAIEmbedding, - similarity, - SimilarityType, -} from "llamaindex"; +import { ImageDocument, similarity, SimilarityType } from "llamaindex"; import path from "path"; async function main() { diff --git a/examples/package.json b/examples/package.json index 16672acaf..4e439d460 100644 --- a/examples/package.json +++ b/examples/package.json @@ -44,6 +44,10 @@ "@llamaindex/voyage-ai": "^1.0.3", "@llamaindex/weaviate": "^0.0.11", "@llamaindex/workflow": "^0.0.13", + "@llamaindex/deepseek": "^0.0.1", + "@llamaindex/fireworks": "^0.0.1", + "@llamaindex/together": "^0.0.1", + "@llamaindex/jinaai": "^0.0.1", "@notionhq/client": "^2.2.15", "@pinecone-database/pinecone": "^4.0.0", "@vercel/postgres": "^0.10.0", diff --git a/examples/qdrantdb/with-gemini.ts b/examples/qdrantdb/with-gemini.ts new file mode 100644 index 000000000..0c199a55d --- /dev/null +++ b/examples/qdrantdb/with-gemini.ts @@ -0,0 +1,33 @@ +import { + GEMINI_EMBEDDING_MODEL, + GeminiEmbedding, + GeminiSession, +} from "@llamaindex/google"; +import { QdrantVectorStore } from "@llamaindex/qdrant"; +import { + Document, + storageContextFromDefaults, + VectorStoreIndex, +} from "llamaindex"; + +const embedding = new GeminiEmbedding({ + model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001, + session: new GeminiSession({ + apiKey: process.env.GEMINI_API_KEY, + }), +}); + +async function main() { + const docs = [new Document({ text: "Lorem ipsum dolor sit amet" })]; + const vectorStore = new QdrantVectorStore({ + url: process.env.QDRANT_URL, + apiKey: process.env.QDRANT_API_KEY, + embeddingModel: embedding, + collectionName: "gemini_test", + }); + const storageContext = await storageContextFromDefaults({ vectorStore }); + await VectorStoreIndex.fromDocuments(docs, { storageContext }); + console.log("Inizialized vector store successfully"); +} + +void main().catch((err) => console.error(err)); diff --git a/examples/qdrantdb/with-jina.ts b/examples/qdrantdb/with-jina.ts new file mode 100644 index 000000000..f51f27e4e --- /dev/null +++ b/examples/qdrantdb/with-jina.ts @@ -0,0 +1,27 @@ +import { JinaAIEmbedding } from "@llamaindex/jinaai"; +import { QdrantVectorStore } from "@llamaindex/qdrant"; +import { + Document, + storageContextFromDefaults, + VectorStoreIndex, +} from "llamaindex"; + +const embedding = new JinaAIEmbedding({ + apiKey: process.env.JINAAI_API_KEY, + model: "jina-embeddings-v3", +}); + +async function main() { + const docs = [new Document({ text: "Lorem ipsum dolor sit amet" })]; + const vectorStore = new QdrantVectorStore({ + url: process.env.QDRANT_URL, + apiKey: process.env.QDRANT_API_KEY, + embeddingModel: embedding, + collectionName: "jina_test", + }); + const storageContext = await storageContextFromDefaults({ vectorStore }); + await VectorStoreIndex.fromDocuments(docs, { storageContext }); + console.log("Inizialized vector store successfully"); +} + +void main().catch((err) => console.error(err)); diff --git a/examples/readers/src/pdf_fw.ts b/examples/readers/src/pdf_fw.ts index cad617fa8..78efad1f7 100644 --- a/examples/readers/src/pdf_fw.ts +++ b/examples/readers/src/pdf_fw.ts @@ -1,5 +1,6 @@ +import { FireworksEmbedding, FireworksLLM } from "@llamaindex/fireworks"; import { PDFReader } from "@llamaindex/readers/pdf"; -import { FireworksEmbedding, FireworksLLM, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; import { Settings } from "llamaindex"; diff --git a/examples/together-ai.ts b/examples/together-ai.ts index 300627659..3823d5998 100644 --- a/examples/together-ai.ts +++ b/examples/together-ai.ts @@ -1,4 +1,4 @@ -import { TogetherEmbedding, TogetherLLM } from "llamaindex"; +import { TogetherEmbedding, TogetherLLM } from "@llamaindex/together"; // process.env.TOGETHER_API_KEY is required const together = new TogetherLLM({ diff --git a/examples/together-ai/vector-index.ts b/examples/together-ai/vector-index.ts index 001c3448e..a38fbbac3 100644 --- a/examples/together-ai/vector-index.ts +++ b/examples/together-ai/vector-index.ts @@ -1,12 +1,7 @@ import fs from "node:fs/promises"; -import { - Document, - Settings, - TogetherEmbedding, - TogetherLLM, - VectorStoreIndex, -} from "llamaindex"; +import { TogetherEmbedding, TogetherLLM } from "@llamaindex/together"; +import { Document, Settings, VectorStoreIndex } from "llamaindex"; // Update llm to use TogetherAI Settings.llm = new TogetherLLM({ diff --git a/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts b/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts deleted file mode 100644 index 02781efcb..000000000 --- a/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "@llamaindex/openai"; diff --git a/packages/llamaindex/src/embeddings/index.ts b/packages/llamaindex/src/embeddings/index.ts deleted file mode 100644 index 5467f2add..000000000 --- a/packages/llamaindex/src/embeddings/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export * from "@llamaindex/core/embeddings"; -export { FireworksEmbedding } from "./fireworks.js"; -export * from "./JinaAIEmbedding.js"; -export * from "./OpenAIEmbedding.js"; -export { TogetherEmbedding } from "./together.js"; diff --git a/packages/llamaindex/src/index.edge.ts b/packages/llamaindex/src/index.edge.ts index 63c6993b3..3a79a2080 100644 --- a/packages/llamaindex/src/index.edge.ts +++ b/packages/llamaindex/src/index.edge.ts @@ -22,6 +22,7 @@ export { export * from "@llamaindex/core/agent"; export * from "@llamaindex/core/chat-engine"; export * from "@llamaindex/core/data-structs"; +export * from "@llamaindex/core/embeddings"; export { CallbackManager, DEFAULT_BASE_URL, @@ -65,10 +66,10 @@ export * from "@llamaindex/core/storage/doc-store"; export * from "@llamaindex/core/storage/index-store"; export * from "@llamaindex/core/storage/kv-store"; export * from "@llamaindex/core/utils"; +export * from "@llamaindex/openai"; export * from "@llamaindex/workflow/agent"; export * from "./agent/index.js"; export * from "./cloud/index.js"; -export * from "./embeddings/index.js"; export * from "./engines/chat/index.js"; export * from "./engines/query/index.js"; export * from "./evaluation/index.js"; @@ -76,7 +77,6 @@ export * from "./extractors/index.js"; export * from "./indices/index.js"; export * from "./ingestion/index.js"; export { imageToDataUrl } from "./internal/utils.js"; -export * from "./llm/index.js"; export * from "./node-parser.js"; export * from "./objects/index.js"; export * from "./OutputParser.js"; diff --git a/packages/llamaindex/src/index.ts b/packages/llamaindex/src/index.ts index ccdd367ae..84a40464e 100644 --- a/packages/llamaindex/src/index.ts +++ b/packages/llamaindex/src/index.ts @@ -1,8 +1,5 @@ export * from "./index.edge.js"; -// TODO: clean up, move to jinaai package -export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js"; - // Don't export file-system stores for non-node.js runtime on top level, // as we cannot guarantee that they will work in other environments export * from "./storage/index.js"; diff --git a/packages/llamaindex/src/indices/vectorStore/index.ts b/packages/llamaindex/src/indices/vectorStore/index.ts index fc03b6e19..92be0bcb3 100644 --- a/packages/llamaindex/src/indices/vectorStore/index.ts +++ b/packages/llamaindex/src/indices/vectorStore/index.ts @@ -175,7 +175,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> { for (const type in nodeMap) { const nodes = nodeMap[type as ModalityType]; const embedModel = - this.embedModel ?? this.vectorStores[type as ModalityType]?.embedModel; + this.vectorStores[type as ModalityType]?.embedModel ?? this.embedModel; if (embedModel && nodes) { await embedModel(nodes, { logProgress: options?.logProgress, diff --git a/packages/llamaindex/src/llm/index.ts b/packages/llamaindex/src/llm/index.ts deleted file mode 100644 index dec9f0801..000000000 --- a/packages/llamaindex/src/llm/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export { DeepSeekLLM } from "./deepseek.js"; -export { FireworksLLM } from "./fireworks.js"; -export * from "./openai.js"; -export { TogetherLLM } from "./together.js"; diff --git a/packages/llamaindex/src/llm/openai.ts b/packages/llamaindex/src/llm/openai.ts deleted file mode 100644 index 02781efcb..000000000 --- a/packages/llamaindex/src/llm/openai.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "@llamaindex/openai"; diff --git a/packages/llamaindex/tests/MetadataExtractors.test.ts b/packages/llamaindex/tests/MetadataExtractors.test.ts index 29bb117ab..47f560e97 100644 --- a/packages/llamaindex/tests/MetadataExtractors.test.ts +++ b/packages/llamaindex/tests/MetadataExtractors.test.ts @@ -1,13 +1,11 @@ import { Document } from "@llamaindex/core/schema"; -import { Settings } from "llamaindex"; -import { OpenAIEmbedding } from "llamaindex/embeddings/index"; +import { OpenAI, OpenAIEmbedding, Settings } from "llamaindex"; import { KeywordExtractor, QuestionsAnsweredExtractor, SummaryExtractor, TitleExtractor, } from "llamaindex/extractors/index"; -import { OpenAI } from "llamaindex/llm/openai"; import { SentenceSplitter } from "llamaindex/node-parser"; import { afterAll, beforeAll, describe, expect, test, vi } from "vitest"; import { diff --git a/packages/llamaindex/tests/Selectors.test.ts b/packages/llamaindex/tests/Selectors.test.ts index 9fe1ace17..5eec85a5b 100644 --- a/packages/llamaindex/tests/Selectors.test.ts +++ b/packages/llamaindex/tests/Selectors.test.ts @@ -1,7 +1,7 @@ import { describe, expect, test } from "vitest"; // from unittest.mock import patch -import { OpenAI } from "llamaindex/llm/index"; +import { OpenAI } from "llamaindex"; import { LLMSingleSelector } from "llamaindex/selectors/index"; import { mocStructuredkLlmGeneration } from "./utility/mockOpenAI.js"; diff --git a/packages/llamaindex/tests/indices/SummaryIndex.test.ts b/packages/llamaindex/tests/indices/SummaryIndex.test.ts index c2a523e95..287dc5838 100644 --- a/packages/llamaindex/tests/indices/SummaryIndex.test.ts +++ b/packages/llamaindex/tests/indices/SummaryIndex.test.ts @@ -20,13 +20,13 @@ describe("SummaryIndex", () => { let storageContext: StorageContext; beforeAll(async () => { - storageContext = await storageContextFromDefaults({ - persistDir: testDir, - }); - const embedModel = new OpenAIEmbedding(); mockEmbeddingModel(embedModel); Settings.embedModel = embedModel; + + storageContext = await storageContextFromDefaults({ + persistDir: testDir, + }); }); afterAll(() => { diff --git a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts index 61c8299a2..5a755ec71 100644 --- a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts +++ b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts @@ -9,7 +9,7 @@ import { DocStoreStrategy } from "llamaindex/ingestion/strategies/index"; import { mkdtemp, rm } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { afterAll, beforeAll, describe, expect, test, vi } from "vitest"; +import { afterAll, beforeAll, describe, expect, it, test, vi } from "vitest"; const testDir = await mkdtemp(join(tmpdir(), "test-")); @@ -24,6 +24,10 @@ describe("VectorStoreIndex", () => { ) => Promise<Array<number>>; beforeAll(async () => { + const embedModel = new OpenAIEmbedding(); + mockEmbeddingModel(embedModel); + Settings.embedModel = embedModel; + storageContext = await mockStorageContext(testDir); testStrategy = async ( strategy: DocStoreStrategy, @@ -41,10 +45,6 @@ describe("VectorStoreIndex", () => { } return entries; }; - - const embedModel = new OpenAIEmbedding(); - mockEmbeddingModel(embedModel); - Settings.embedModel = embedModel; }); afterAll(() => { @@ -65,3 +65,28 @@ describe("VectorStoreIndex", () => { await rm(testDir, { recursive: true }); }); }); + +describe("[VectorStoreIndex] use embedding model", () => { + it("should use embedding model passed in options instead of Settings", async () => { + const documents = [new Document({ text: "This needs to be embedded" })]; + + // Create mock embedding models + const settingsEmbedModel = new OpenAIEmbedding(); + const customEmbedModel = new OpenAIEmbedding(); + + // Mock the embedding models using the utility function + mockEmbeddingModel(settingsEmbedModel); + mockEmbeddingModel(customEmbedModel); + + // Add spies to track calls + const settingsSpy = vi.spyOn(settingsEmbedModel, "getTextEmbeddings"); + const customSpy = vi.spyOn(customEmbedModel, "getTextEmbeddings"); + + Settings.embedModel = settingsEmbedModel; + + const storageContext = await mockStorageContext(testDir, customEmbedModel); // setup custom embedding model + await VectorStoreIndex.fromDocuments(documents, { storageContext }); + expect(customSpy).toHaveBeenCalled(); + expect(settingsSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/llamaindex/tests/utility/mockOpenAI.ts b/packages/llamaindex/tests/utility/mockOpenAI.ts index 102ce43b0..e6d90a3bf 100644 --- a/packages/llamaindex/tests/utility/mockOpenAI.ts +++ b/packages/llamaindex/tests/utility/mockOpenAI.ts @@ -1,8 +1,6 @@ import type { CallbackManager } from "@llamaindex/core/global"; -import type { LLMChatParamsBase } from "llamaindex"; -import { Settings } from "llamaindex"; -import type { OpenAIEmbedding } from "llamaindex/embeddings/OpenAIEmbedding"; -import { OpenAI } from "llamaindex/llm/openai"; +import type { LLMChatParamsBase, OpenAIEmbedding } from "llamaindex"; +import { OpenAI, Settings } from "llamaindex"; import { vi } from "vitest"; export const DEFAULT_LLM_TEXT_OUTPUT = "MOCK_TOKEN_1-MOCK_TOKEN_2"; diff --git a/packages/llamaindex/tests/utility/mockStorageContext.ts b/packages/llamaindex/tests/utility/mockStorageContext.ts index 4a8c0f880..dab379d7d 100644 --- a/packages/llamaindex/tests/utility/mockStorageContext.ts +++ b/packages/llamaindex/tests/utility/mockStorageContext.ts @@ -1,14 +1,27 @@ -import { OpenAIEmbedding, storageContextFromDefaults } from "llamaindex"; +import { + BaseEmbedding, + OpenAIEmbedding, + storageContextFromDefaults, +} from "llamaindex"; import { mockEmbeddingModel } from "./mockOpenAI.js"; -export async function mockStorageContext(testDir: string) { +export async function mockStorageContext( + testDir: string, + embeddingModel?: BaseEmbedding, +) { const storageContext = await storageContextFromDefaults({ persistDir: testDir, }); for (const store of Object.values(storageContext.vectorStores)) { - store.embedModel = new OpenAIEmbedding(); - mockEmbeddingModel(store.embedModel as OpenAIEmbedding); + if (embeddingModel) { + // use embeddingModel if it is passed in + store.embedModel = embeddingModel; + } else { + // mock an embedding model for testing + store.embedModel = new OpenAIEmbedding(); + mockEmbeddingModel(store.embedModel as OpenAIEmbedding); + } } return storageContext; } diff --git a/packages/providers/deepseek/package.json b/packages/providers/deepseek/package.json new file mode 100644 index 000000000..02d3edf33 --- /dev/null +++ b/packages/providers/deepseek/package.json @@ -0,0 +1,39 @@ +{ + "name": "@llamaindex/deepseek", + "description": "DeepSeek Adapter for LlamaIndex", + "version": "0.0.1", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/providers/deepseek" + }, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "6.3.4" + }, + "dependencies": { + "@llamaindex/env": "workspace:*", + "@llamaindex/openai": "workspace:*" + } +} diff --git a/packages/providers/deepseek/src/index.ts b/packages/providers/deepseek/src/index.ts new file mode 100644 index 000000000..e6679f111 --- /dev/null +++ b/packages/providers/deepseek/src/index.ts @@ -0,0 +1 @@ +export * from "./llm"; diff --git a/packages/llamaindex/src/llm/deepseek.ts b/packages/providers/deepseek/src/llm.ts similarity index 100% rename from packages/llamaindex/src/llm/deepseek.ts rename to packages/providers/deepseek/src/llm.ts diff --git a/packages/providers/deepseek/tsconfig.json b/packages/providers/deepseek/tsconfig.json new file mode 100644 index 000000000..bd8900e3a --- /dev/null +++ b/packages/providers/deepseek/tsconfig.json @@ -0,0 +1,19 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib", + "tsBuildInfoFile": "./lib/.tsbuildinfo" + }, + "include": ["./src"], + "references": [ + { + "path": "../openai/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/packages/providers/fireworks/package.json b/packages/providers/fireworks/package.json new file mode 100644 index 000000000..de5a063e6 --- /dev/null +++ b/packages/providers/fireworks/package.json @@ -0,0 +1,39 @@ +{ + "name": "@llamaindex/fireworks", + "description": "Fireworks Adapter for LlamaIndex", + "version": "0.0.1", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/providers/fireworks" + }, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "6.3.4" + }, + "dependencies": { + "@llamaindex/env": "workspace:*", + "@llamaindex/openai": "workspace:*" + } +} diff --git a/packages/llamaindex/src/embeddings/fireworks.ts b/packages/providers/fireworks/src/embedding.ts similarity index 100% rename from packages/llamaindex/src/embeddings/fireworks.ts rename to packages/providers/fireworks/src/embedding.ts diff --git a/packages/providers/fireworks/src/index.ts b/packages/providers/fireworks/src/index.ts new file mode 100644 index 000000000..90b741f1e --- /dev/null +++ b/packages/providers/fireworks/src/index.ts @@ -0,0 +1,2 @@ +export * from "./embedding"; +export * from "./llm"; diff --git a/packages/llamaindex/src/llm/fireworks.ts b/packages/providers/fireworks/src/llm.ts similarity index 100% rename from packages/llamaindex/src/llm/fireworks.ts rename to packages/providers/fireworks/src/llm.ts diff --git a/packages/providers/fireworks/tsconfig.json b/packages/providers/fireworks/tsconfig.json new file mode 100644 index 000000000..bd8900e3a --- /dev/null +++ b/packages/providers/fireworks/tsconfig.json @@ -0,0 +1,19 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib", + "tsBuildInfoFile": "./lib/.tsbuildinfo" + }, + "include": ["./src"], + "references": [ + { + "path": "../openai/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/packages/providers/jinaai/package.json b/packages/providers/jinaai/package.json new file mode 100644 index 000000000..deb2cdd89 --- /dev/null +++ b/packages/providers/jinaai/package.json @@ -0,0 +1,40 @@ +{ + "name": "@llamaindex/jinaai", + "description": "JinaAI Adapter for LlamaIndex", + "version": "0.0.1", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/providers/jinaai" + }, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "6.3.4" + }, + "dependencies": { + "@llamaindex/core": "workspace:*", + "@llamaindex/env": "workspace:*", + "@llamaindex/openai": "workspace:*" + } +} diff --git a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts b/packages/providers/jinaai/src/embedding.ts similarity index 97% rename from packages/llamaindex/src/embeddings/JinaAIEmbedding.ts rename to packages/providers/jinaai/src/embedding.ts index 9f7f3d705..6ad09293a 100644 --- a/packages/llamaindex/src/embeddings/JinaAIEmbedding.ts +++ b/packages/providers/jinaai/src/embedding.ts @@ -1,7 +1,7 @@ import { MultiModalEmbedding } from "@llamaindex/core/embeddings"; +import type { ImageType } from "@llamaindex/core/schema"; +import { imageToDataUrl } from "@llamaindex/core/utils"; import { getEnv } from "@llamaindex/env"; -import { imageToDataUrl } from "../internal/utils.js"; -import type { ImageType } from "../Node.js"; function isLocal(url: ImageType): boolean { if (url instanceof Blob) return true; diff --git a/packages/providers/jinaai/src/index.ts b/packages/providers/jinaai/src/index.ts new file mode 100644 index 000000000..5b00d3610 --- /dev/null +++ b/packages/providers/jinaai/src/index.ts @@ -0,0 +1 @@ +export * from "./embedding"; diff --git a/packages/providers/jinaai/tsconfig.json b/packages/providers/jinaai/tsconfig.json new file mode 100644 index 000000000..bd8900e3a --- /dev/null +++ b/packages/providers/jinaai/tsconfig.json @@ -0,0 +1,19 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib", + "tsBuildInfoFile": "./lib/.tsbuildinfo" + }, + "include": ["./src"], + "references": [ + { + "path": "../openai/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/packages/providers/together/package.json b/packages/providers/together/package.json new file mode 100644 index 000000000..6a5fce0de --- /dev/null +++ b/packages/providers/together/package.json @@ -0,0 +1,39 @@ +{ + "name": "@llamaindex/together", + "description": "Together Adapter for LlamaIndex", + "version": "0.0.1", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/providers/together" + }, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "6.3.4" + }, + "dependencies": { + "@llamaindex/env": "workspace:*", + "@llamaindex/openai": "workspace:*" + } +} diff --git a/packages/llamaindex/src/embeddings/together.ts b/packages/providers/together/src/embedding.ts similarity index 100% rename from packages/llamaindex/src/embeddings/together.ts rename to packages/providers/together/src/embedding.ts diff --git a/packages/providers/together/src/index.ts b/packages/providers/together/src/index.ts new file mode 100644 index 000000000..90b741f1e --- /dev/null +++ b/packages/providers/together/src/index.ts @@ -0,0 +1,2 @@ +export * from "./embedding"; +export * from "./llm"; diff --git a/packages/llamaindex/src/llm/together.ts b/packages/providers/together/src/llm.ts similarity index 100% rename from packages/llamaindex/src/llm/together.ts rename to packages/providers/together/src/llm.ts diff --git a/packages/providers/together/tsconfig.json b/packages/providers/together/tsconfig.json new file mode 100644 index 000000000..3fad47fb7 --- /dev/null +++ b/packages/providers/together/tsconfig.json @@ -0,0 +1,19 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib", + "tsBuildInfoFile": "./lib/.tsbuildinfo" + }, + "include": ["./src"], + "references": [ + { + "path": "../../core/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 925315405..4114b9a56 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -622,12 +622,18 @@ importers: '@llamaindex/deepinfra': specifier: ^0.0.42 version: link:../packages/providers/deepinfra + '@llamaindex/deepseek': + specifier: ^0.0.1 + version: link:../packages/providers/deepseek '@llamaindex/env': specifier: ^0.1.29 version: link:../packages/env '@llamaindex/firestore': specifier: ^1.0.4 version: link:../packages/providers/storage/firestore + '@llamaindex/fireworks': + specifier: ^0.0.1 + version: link:../packages/providers/fireworks '@llamaindex/google': specifier: ^0.0.13 version: link:../packages/providers/google @@ -637,6 +643,9 @@ importers: '@llamaindex/huggingface': specifier: ^0.0.42 version: link:../packages/providers/huggingface + '@llamaindex/jinaai': + specifier: ^0.0.1 + version: link:../packages/providers/jinaai '@llamaindex/milvus': specifier: ^0.1.6 version: link:../packages/providers/storage/milvus @@ -676,6 +685,9 @@ importers: '@llamaindex/replicate': specifier: ^0.0.39 version: link:../packages/providers/replicate + '@llamaindex/together': + specifier: ^0.0.1 + version: link:../packages/providers/together '@llamaindex/upstash': specifier: ^0.0.11 version: link:../packages/providers/storage/upstash @@ -1218,6 +1230,32 @@ importers: specifier: 6.3.4 version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/deepseek: + dependencies: + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + '@llamaindex/openai': + specifier: workspace:* + version: link:../openai + devDependencies: + bunchee: + specifier: 6.3.4 + version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + + packages/providers/fireworks: + dependencies: + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + '@llamaindex/openai': + specifier: workspace:* + version: link:../openai + devDependencies: + bunchee: + specifier: 6.3.4 + version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/google: dependencies: '@google-cloud/vertexai': @@ -1275,6 +1313,22 @@ importers: specifier: 6.3.4 version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/jinaai: + dependencies: + '@llamaindex/core': + specifier: workspace:* + version: link:../../core + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + '@llamaindex/openai': + specifier: workspace:* + version: link:../openai + devDependencies: + bunchee: + specifier: 6.3.4 + version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/mistral: dependencies: '@llamaindex/core': @@ -1607,6 +1661,19 @@ importers: specifier: 6.3.4 version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/together: + dependencies: + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + '@llamaindex/openai': + specifier: workspace:* + version: link:../openai + devDependencies: + bunchee: + specifier: 6.3.4 + version: 6.3.4(patch_hash=pavboztthlgni7m5gzw7643oru)(typescript@5.7.3) + packages/providers/vercel: dependencies: '@llamaindex/core': diff --git a/tsconfig.json b/tsconfig.json index cca56fe09..8af5db8f3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -175,6 +175,18 @@ }, { "path": "./packages/providers/voyage-ai/tsconfig.json" + }, + { + "path": "./packages/providers/deepseek/tsconfig.json" + }, + { + "path": "./packages/providers/fireworks/tsconfig.json" + }, + { + "path": "./packages/providers/together/tsconfig.json" + }, + { + "path": "./packages/providers/jinaai/tsconfig.json" } ] } -- GitLab