diff --git a/.changeset/tasty-hornets-repeat.md b/.changeset/tasty-hornets-repeat.md new file mode 100644 index 0000000000000000000000000000000000000000..b4fd5ffb12a6d464c6a5c834b72a7e0ad19826c6 --- /dev/null +++ b/.changeset/tasty-hornets-repeat.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Export imageToDataUrl for using images in chat diff --git a/examples/multimodal/chat.ts b/examples/multimodal/chat.ts new file mode 100644 index 0000000000000000000000000000000000000000..4884a11b5297a339c1b58dea6dd56d1e55251b3d --- /dev/null +++ b/examples/multimodal/chat.ts @@ -0,0 +1,40 @@ +// call pnpm tsx multimodal/load.ts first to init the storage +import { OpenAI, Settings, SimpleChatEngine, imageToDataUrl } from "llamaindex"; +import fs from "node:fs/promises"; + +import path from "path"; +// Update llm +Settings.llm = new OpenAI({ model: "gpt-4o-mini", maxTokens: 512 }); + +async function main() { + const chatEngine = new SimpleChatEngine(); + + // Load the image and convert it to a data URL + const imagePath = path.join(__dirname, ".", "data", "60.jpg"); + + // 1. you can read the buffer from the file + const imageBuffer = await fs.readFile(imagePath); + const dataUrl = await imageToDataUrl(imageBuffer); + // or 2. you can just pass the file path to the imageToDataUrl function + // const dataUrl = await imageToDataUrl(imagePath); + + // Update the image_url in the chat message + const response = await chatEngine.chat({ + message: [ + { + type: "text", + text: "What is in this image?", + }, + { + type: "image_url", + image_url: { + url: dataUrl, + }, + }, + ], + }); + + console.log(response.message.content); +} + +main().catch(console.error); diff --git a/packages/llamaindex/src/index.edge.ts b/packages/llamaindex/src/index.edge.ts index cf1162d7338085dc70a85591a7fb79113f221c62..100361ef2fcfd42edeceff261926292b445392c2 100644 --- a/packages/llamaindex/src/index.edge.ts +++ b/packages/llamaindex/src/index.edge.ts @@ -37,6 +37,7 @@ export * from "./evaluation/index.js"; export * from "./extractors/index.js"; export * from "./indices/index.js"; export * from "./ingestion/index.js"; +export { imageToDataUrl } from "./internal/utils.js"; export * from "./llm/index.js"; export * from "./nodeParsers/index.js"; export * from "./objects/index.js"; diff --git a/packages/llamaindex/src/internal/utils.ts b/packages/llamaindex/src/internal/utils.ts index a301c2707997ec3598ce3f9c7fcea7470a4d938d..ab144a3cfc4c87060faa1b81c9d310ec37084a83 100644 --- a/packages/llamaindex/src/internal/utils.ts +++ b/packages/llamaindex/src/internal/utils.ts @@ -182,7 +182,9 @@ export function stringToImage(input: string): ImageType { } } -export async function imageToDataUrl(input: ImageType): Promise<string> { +export async function imageToDataUrl( + input: ImageType | Uint8Array, +): Promise<string> { // first ensure, that the input is a Blob if ( (input instanceof URL && input.protocol === "file:") || @@ -196,6 +198,8 @@ export async function imageToDataUrl(input: ImageType): Promise<string> { } else if (!(input instanceof Blob)) { if (input instanceof URL) { throw new Error(`Unsupported URL with protocol: ${input.protocol}`); + } else if (input instanceof Uint8Array) { + input = new Blob([input]); // convert Uint8Array to Blob } else { throw new Error(`Unsupported input type: ${typeof input}`); }