diff --git a/apps/docs/docs/recipes/cost-analysis.mdx b/apps/docs/docs/recipes/cost-analysis.mdx index eb5eeb3e7cde19bddd09fc958d2557ccf7245dbf..ed9b0fe1d92450ba17960824f86101764ea9d9c0 100644 --- a/apps/docs/docs/recipes/cost-analysis.mdx +++ b/apps/docs/docs/recipes/cost-analysis.mdx @@ -6,7 +6,7 @@ This page shows how to track LLM cost using APIs. The callback manager is a class that manages the callback functions. -You can register `llm-start`, and `llm-end` callbacks to the callback manager for tracking the cost. +You can register `llm-start`, `llm-end`, and `llm-stream` callbacks to the callback manager for tracking the cost. import CodeBlock from "@theme/CodeBlock"; import CodeSource from "!raw-loader!../../../../examples/recipes/cost-analysis"; diff --git a/examples/package.json b/examples/package.json index 6837aa22ed3f4032380b07e3e66ccd452dd20f84..36fd832b7f43767cce2872ca1d658ce3ec883bb7 100644 --- a/examples/package.json +++ b/examples/package.json @@ -19,6 +19,7 @@ "devDependencies": { "@types/node": "^18.19.31", "ts-node": "^10.9.2", + "tsx": "^4.7.2", "typescript": "^5.4.4" }, "scripts": { diff --git a/examples/recipes/cost-analysis.ts b/examples/recipes/cost-analysis.ts index cf8d102b42aa02e1cbb34da30028860a6ed6cd64..79b162c3bdee6cf9c5c521c45cf890c38a4a9c3c 100644 --- a/examples/recipes/cost-analysis.ts +++ b/examples/recipes/cost-analysis.ts @@ -6,7 +6,8 @@ import { extractText } from "llamaindex/llm/utils"; const encoding = encodingForModel("gpt-4-0125-preview"); const llm = new OpenAI({ - model: "gpt-4-0125-preview", + // currently is "gpt-4-turbo-2024-04-09" + model: "gpt-4-turbo", }); let tokenCount = 0; @@ -19,18 +20,25 @@ Settings.callbackManager.on("llm-start", (event) => { console.log("Token count:", tokenCount); // https://openai.com/pricing // $10.00 / 1M tokens - console.log(`Price: $${(tokenCount / 1_000_000) * 10}`); + console.log(`Total Price: $${(tokenCount / 1_000_000) * 10}`); }); -Settings.callbackManager.on("llm-end", (event) => { - const { response } = event.detail.payload; - tokenCount += encoding.encode(extractText(response.message.content)).length; - console.log("Token count:", tokenCount); + +Settings.callbackManager.on("llm-stream", (event) => { + const { chunk } = event.detail.payload; + const { delta } = chunk; + tokenCount += encoding.encode(extractText(delta)).length; + if (tokenCount > 20) { + // This is just an example, you can set your own limit or handle it differently + throw new Error("Token limit exceeded!"); + } +}); +Settings.callbackManager.on("llm-end", () => { // https://openai.com/pricing // $30.00 / 1M tokens - console.log(`Price: $${(tokenCount / 1_000_000) * 30}`); + console.log(`Total Price: $${(tokenCount / 1_000_000) * 30}`); }); -const question = "Hello, how are you?"; +const question = "Hello, how are you? Please response about 50 tokens."; console.log("Question:", question); void llm .chat({ diff --git a/packages/core/e2e/fixtures/llm/open_ai.ts b/packages/core/e2e/fixtures/llm/open_ai.ts index d8ee4661e6bd258fd8de9e778d0090d578d958b4..24dfaee2faf64bdcd01389be2d25c55c583ec13c 100644 --- a/packages/core/e2e/fixtures/llm/open_ai.ts +++ b/packages/core/e2e/fixtures/llm/open_ai.ts @@ -52,19 +52,12 @@ export class OpenAI implements LLM { } if (llmCompleteMockStorage.llmEventEnd.length > 0) { - const response = - llmCompleteMockStorage.llmEventEnd.shift()!["response"]; + const { id, response } = llmCompleteMockStorage.llmEventEnd.shift()!; if (params.stream) { - const content = response.message.content as string; - // maybe this is not the correct way to split the content, but it's good enough for now - const tokens = content.split(""); return { [Symbol.asyncIterator]: async function* () { - const delta = tokens.shift(); - if (delta) { - yield { - delta, - } as ChatResponseChunk; + while (llmCompleteMockStorage.llmEventStream.at(-1)?.id === id) { + yield llmCompleteMockStorage.llmEventStream.shift()!["chunk"]; } }, }; diff --git a/packages/core/e2e/node/snapshot/agent.snap b/packages/core/e2e/node/snapshot/agent.snap index c6ab517205b6023cd067cd7133ec2c814038ff3d..4571ee4965514b005f6eb5806d6da90d00e4cf93 100644 --- a/packages/core/e2e/node/snapshot/agent.snap +++ b/packages/core/e2e/node/snapshot/agent.snap @@ -1,7 +1,7 @@ { "llmEventStart": [ { - "id": "88a7035b-b493-4e95-8902-666ede936fb6", + "id": "HIDDEN", "messages": [ { "content": "What is the weather in San Francisco?", @@ -10,7 +10,7 @@ ] }, { - "id": "79c7f739-cc24-4cf8-b605-89a460111da1", + "id": "HIDDEN", "messages": [ { "content": "What is the weather in San Francisco?", @@ -22,7 +22,7 @@ "options": { "toolCalls": [ { - "id": "call_kIqRf0PiYEa6uIQzI3wYZJkR", + "id": "HIDDEN", "type": "function", "function": { "name": "Weather", @@ -37,7 +37,7 @@ "role": "tool", "options": { "name": "Weather", - "tool_call_id": "call_kIqRf0PiYEa6uIQzI3wYZJkR" + "tool_call_id": "HIDDEN" } } ] @@ -45,12 +45,12 @@ ], "llmEventEnd": [ { - "id": "88a7035b-b493-4e95-8902-666ede936fb6", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CMHEE56nQ0oLqGeOeJuky31TIHZY", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712732596, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -60,7 +60,7 @@ "content": null, "tool_calls": [ { - "id": "call_kIqRf0PiYEa6uIQzI3wYZJkR", + "id": "HIDDEN", "type": "function", "function": { "name": "Weather", @@ -78,7 +78,7 @@ "completion_tokens": 15, "total_tokens": 64 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "", @@ -86,7 +86,7 @@ "options": { "toolCalls": [ { - "id": "call_kIqRf0PiYEa6uIQzI3wYZJkR", + "id": "HIDDEN", "type": "function", "function": { "name": "Weather", @@ -99,12 +99,12 @@ } }, { - "id": "79c7f739-cc24-4cf8-b605-89a460111da1", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CMHF8HQai1B6w1BpAd5GAHLKfbkR", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712732597, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -122,7 +122,7 @@ "completion_tokens": 14, "total_tokens": 92 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "The weather in San Francisco is currently 35 degrees and sunny.", @@ -131,5 +131,6 @@ } } } - ] + ], + "llmEventStream": [] } \ No newline at end of file diff --git a/packages/core/e2e/node/snapshot/gpt-4-turbo.snap b/packages/core/e2e/node/snapshot/gpt-4-turbo.snap index 8a93f3d8d19e25bdc50a1af353726246920f17b3..960c40f77176da8f254f268db6fb126033088e4b 100644 --- a/packages/core/e2e/node/snapshot/gpt-4-turbo.snap +++ b/packages/core/e2e/node/snapshot/gpt-4-turbo.snap @@ -1,7 +1,7 @@ { "llmEventStart": [ { - "id": "3c5024e0-df1d-4a29-b491-9712324bd520", + "id": "HIDDEN", "messages": [ { "content": "What is the weather in San Jose?", @@ -10,7 +10,7 @@ ] }, { - "id": "860b61c3-3c3a-4301-8200-9d6c0668cae5", + "id": "HIDDEN", "messages": [ { "content": "What is the weather in San Jose?", @@ -22,7 +22,7 @@ "options": { "toolCalls": [ { - "id": "call_wlpohl1FXSCU9vV2CsjTPSWE", + "id": "HIDDEN", "type": "function", "function": { "name": "Weather", @@ -37,7 +37,7 @@ "role": "tool", "options": { "name": "Weather", - "tool_call_id": "call_wlpohl1FXSCU9vV2CsjTPSWE" + "tool_call_id": "HIDDEN" } } ] @@ -45,12 +45,12 @@ ], "llmEventEnd": [ { - "id": "3c5024e0-df1d-4a29-b491-9712324bd520", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CQt20hfgKNlrbsbu47j40GzHzFUJ", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712750316, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -60,7 +60,7 @@ "content": null, "tool_calls": [ { - "id": "call_wlpohl1FXSCU9vV2CsjTPSWE", + "id": "HIDDEN", "type": "function", "function": { "name": "Weather", @@ -78,7 +78,7 @@ "completion_tokens": 15, "total_tokens": 64 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "", @@ -86,7 +86,7 @@ "options": { "toolCalls": [ { - "id": "call_wlpohl1FXSCU9vV2CsjTPSWE", + "id": "HIDDEN", "type": "function", "function": { "name": "Weather", @@ -99,12 +99,12 @@ } }, { - "id": "860b61c3-3c3a-4301-8200-9d6c0668cae5", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CQt2PPpt5qL8wl3lipBYJXLZXeQi", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712750316, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -122,7 +122,7 @@ "completion_tokens": 14, "total_tokens": 92 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "The weather in San Jose is currently 45 degrees and sunny.", @@ -131,5 +131,6 @@ } } } - ] + ], + "llmEventStream": [] } \ No newline at end of file diff --git a/packages/core/e2e/node/snapshot/llm.snap b/packages/core/e2e/node/snapshot/llm.snap index 6074f1524fe5a188f5cb705dbf1af1d21daa5cc2..4fb4a33ba4898ecb9dca2f305c29bd262f339384 100644 --- a/packages/core/e2e/node/snapshot/llm.snap +++ b/packages/core/e2e/node/snapshot/llm.snap @@ -1,7 +1,7 @@ { "llmEventStart": [ { - "id": "68846dc5-d099-4ab1-b987-3eb5376c9859", + "id": "HIDDEN", "messages": [ { "content": "Hello", @@ -10,7 +10,7 @@ ] }, { - "id": "de46b84e-7345-430f-b8fa-423354b630c9", + "id": "HIDDEN", "messages": [ { "content": "hello", @@ -21,12 +21,12 @@ ], "llmEventEnd": [ { - "id": "68846dc5-d099-4ab1-b987-3eb5376c9859", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CMHDMY3CO1uI6P3JfJMNg94dhrbN", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712732595, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -44,7 +44,7 @@ "completion_tokens": 9, "total_tokens": 17 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "Hello! How can I assist you today?", @@ -54,9 +54,199 @@ } }, { - "id": "de46b84e-7345-430f-b8fa-423354b630c9", + "id": "HIDDEN", "response": { - "raw": null, + "raw": [ + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": "Hello" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": "Hello" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": "!" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": "!" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " How" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " How" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " can" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " can" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " I" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " I" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " assist" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " assist" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " you" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " you" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " today" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " today" + }, + { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": "?" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": "?" + } + ], "message": { "content": "Hello! How can I assist you today?", "role": "assistant", @@ -64,5 +254,223 @@ } } } + ], + "llmEventStream": [ + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": "Hello" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": "Hello" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": "!" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": "!" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " How" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " How" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " can" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " can" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " I" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " I" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " assist" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " assist" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " you" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " you" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": " today" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": " today" + } + }, + { + "id": "HIDDEN", + "chunk": { + "raw": { + "id": "HIDDEN", + "object": "chat.completion.chunk", + "created": 114514, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "HIDDEN", + "choices": [ + { + "index": 0, + "delta": { + "content": "?" + }, + "logprobs": null, + "finish_reason": null + } + ] + }, + "options": {}, + "delta": "?" + } + } ] } \ No newline at end of file diff --git a/packages/core/e2e/node/snapshot/queryEngine_subquestion.snap b/packages/core/e2e/node/snapshot/queryEngine_subquestion.snap index 9f4549f407310a8ace1588a765d8bdf4908b3846..4b86f5604a8df4f052f1498202a8af76d0807b3a 100644 --- a/packages/core/e2e/node/snapshot/queryEngine_subquestion.snap +++ b/packages/core/e2e/node/snapshot/queryEngine_subquestion.snap @@ -1,7 +1,7 @@ { "llmEventStart": [ { - "id": "6542b9be-50e9-4bd6-9e8b-94088c0d0a43", + "id": "HIDDEN", "messages": [ { "content": "Given a user question, and a list of tools, output a list of relevant sub-questions that when composed can help answer the full user question:\n\n# Example 1\n<Tools>\n```json\n{\n \"uber_10k\": \"Provides information about Uber financials for year 2021\",\n \"lyft_10k\": \"Provides information about Lyft financials for year 2021\"\n}\n```\n\n<User Question>\nCompare and contrast the revenue growth and EBITDA of Uber and Lyft for year 2021\n\n<Output>\n```json\n[\n {\n \"subQuestion\": \"What is the revenue growth of Uber\",\n \"toolName\": \"uber_10k\"\n },\n {\n \"subQuestion\": \"What is the EBITDA of Uber\",\n \"toolName\": \"uber_10k\"\n },\n {\n \"subQuestion\": \"What is the revenue growth of Lyft\",\n \"toolName\": \"lyft_10k\"\n },\n {\n \"subQuestion\": \"What is the EBITDA of Lyft\",\n \"toolName\": \"lyft_10k\"\n }\n]\n```\n\n# Example 2\n<Tools>\n```json\n{\n \"bill_gates_idea\": \"Get what Bill Gates idea from.\"\n}\n```\n\n<User Question>\nWhat did Bill Gates steal from?\n\n<Output>\n", @@ -10,7 +10,7 @@ ] }, { - "id": "8e61829a-f816-47d5-95d5-fc93ff479085", + "id": "HIDDEN", "messages": [ { "content": "Context information is below.\n---------------------\nBill Gates stole from Apple. Steve Jobs stole from Xerox.\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: What is Bill Gates' idea\nAnswer:", @@ -19,7 +19,7 @@ ] }, { - "id": "c4b73220-3108-4951-8280-2c6429247f7c", + "id": "HIDDEN", "messages": [ { "content": "Context information is below.\n---------------------\nSub question: What is Bill Gates' idea\nResponse: Bill Gates' idea was to steal from Apple.\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: What did Bill Gates steal from?\nAnswer:", @@ -30,12 +30,12 @@ ], "llmEventEnd": [ { - "id": "6542b9be-50e9-4bd6-9e8b-94088c0d0a43", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CMHF9kjM55jAPKnPfZ7Yj6nOZJNf", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712732597, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -53,7 +53,7 @@ "completion_tokens": 35, "total_tokens": 325 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "```json\n[\n {\n \"subQuestion\": \"What is Bill Gates' idea\",\n \"toolName\": \"bill_gates_idea\"\n }\n]\n```", @@ -63,12 +63,12 @@ } }, { - "id": "8e61829a-f816-47d5-95d5-fc93ff479085", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CMHGHagthzFopnyqU5uPFDNx1fnw", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712732598, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -86,7 +86,7 @@ "completion_tokens": 10, "total_tokens": 63 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "Bill Gates' idea was to steal from Apple.", @@ -96,12 +96,12 @@ } }, { - "id": "c4b73220-3108-4951-8280-2c6429247f7c", + "id": "HIDDEN", "response": { "raw": { - "id": "chatcmpl-9CMHHL7EUvMjm1jTKZIThu76XNAiN", + "id": "HIDDEN", "object": "chat.completion", - "created": 1712732599, + "created": 114514, "model": "gpt-3.5-turbo-0125", "choices": [ { @@ -119,7 +119,7 @@ "completion_tokens": 6, "total_tokens": 68 }, - "system_fingerprint": "fp_b28b39ffa8" + "system_fingerprint": "HIDDEN" }, "message": { "content": "Bill Gates stole from Apple.", @@ -128,5 +128,6 @@ } } } - ] + ], + "llmEventStream": [] } \ No newline at end of file diff --git a/packages/core/e2e/node/utils.ts b/packages/core/e2e/node/utils.ts index 260e86e6b5ef381e641a5c39e92da163cf58e5ba..352b8bdd619da7390ccac65cf2b6df3831506c14 100644 --- a/packages/core/e2e/node/utils.ts +++ b/packages/core/e2e/node/utils.ts @@ -1,4 +1,9 @@ -import { Settings, type LLMEndEvent, type LLMStartEvent } from "llamaindex"; +import { + Settings, + type LLMEndEvent, + type LLMStartEvent, + type LLMStreamEvent, +} from "llamaindex"; import { readFile, writeFile } from "node:fs/promises"; import { join } from "node:path"; import { type test } from "node:test"; @@ -7,11 +12,13 @@ import { fileURLToPath } from "node:url"; type MockStorage = { llmEventStart: LLMStartEvent["detail"]["payload"][]; llmEventEnd: LLMEndEvent["detail"]["payload"][]; + llmEventStream: LLMStreamEvent["detail"]["payload"][]; }; export const llmCompleteMockStorage: MockStorage = { llmEventStart: [], llmEventEnd: [], + llmEventStream: [], }; export const testRootDir = fileURLToPath(new URL(".", import.meta.url)); @@ -23,6 +30,7 @@ export async function mockLLMEvent( const newLLMCompleteMockStorage: MockStorage = { llmEventStart: [], llmEventEnd: [], + llmEventStream: [], }; function captureLLMStart(event: LLMStartEvent) { @@ -33,6 +41,10 @@ export async function mockLLMEvent( newLLMCompleteMockStorage.llmEventEnd.push(event.detail.payload); } + function captureLLMStream(event: LLMStreamEvent) { + newLLMCompleteMockStorage.llmEventStream.push(event.detail.payload); + } + await readFile(join(testRootDir, "snapshot", `${snapshotName}.snap`), { encoding: "utf-8", }) @@ -44,6 +56,9 @@ export async function mockLLMEvent( result["llmEventStart"].forEach((event) => { llmCompleteMockStorage.llmEventStart.push(event); }); + result["llmEventStream"].forEach((event) => { + llmCompleteMockStorage.llmEventStream.push(event); + }); }) .catch((error) => { if (error.code === "ENOENT") { @@ -53,15 +68,25 @@ export async function mockLLMEvent( }); Settings.callbackManager.on("llm-start", captureLLMStart); Settings.callbackManager.on("llm-end", captureLLMEnd); + Settings.callbackManager.on("llm-stream", captureLLMStream); t.after(async () => { + Settings.callbackManager.off("llm-stream", captureLLMStream); Settings.callbackManager.off("llm-end", captureLLMEnd); Settings.callbackManager.off("llm-start", captureLLMStart); // eslint-disable-next-line turbo/no-undeclared-env-vars if (process.env.UPDATE_SNAPSHOT === "1") { + const data = JSON.stringify(newLLMCompleteMockStorage, null, 2) + .replace(/"id": ".*"/g, `"id": "HIDDEN"`) + .replace(/"created": \d+/g, `"created": 114514`) + .replace( + /"system_fingerprint": ".*"/g, + '"system_fingerprint": "HIDDEN"', + ) + .replace(/"tool_call_id": ".*"/g, '"tool_call_id": "HIDDEN"'); await writeFile( join(testRootDir, "snapshot", `${snapshotName}.snap`), - JSON.stringify(newLLMCompleteMockStorage, null, 2), + data, ); return; } @@ -79,10 +104,20 @@ export async function mockLLMEvent( "New LLMStartEvent does not match, please update snapshot", ); } + + if ( + newLLMCompleteMockStorage.llmEventStream.length !== + llmCompleteMockStorage.llmEventStream.length + ) { + throw new Error( + "New LLMStreamEvent does not match, please update snapshot", + ); + } }); // cleanup t.after(() => { llmCompleteMockStorage.llmEventEnd = []; llmCompleteMockStorage.llmEventStart = []; + llmCompleteMockStorage.llmEventStream = []; }); } diff --git a/packages/core/src/callbacks/CallbackManager.ts b/packages/core/src/callbacks/CallbackManager.ts index f79f972fba4c36dc40f7e2d09688276adae5edba..d596615acb39ab54831885d33a00627fa65ea272 100644 --- a/packages/core/src/callbacks/CallbackManager.ts +++ b/packages/core/src/callbacks/CallbackManager.ts @@ -5,7 +5,11 @@ import { EventCaller, getEventCaller, } from "../internal/context/EventCaller.js"; -import type { LLMEndEvent, LLMStartEvent } from "../llm/types.js"; +import type { + LLMEndEvent, + LLMStartEvent, + LLMStreamEvent, +} from "../llm/types.js"; export class LlamaIndexCustomEvent<T = any> extends CustomEvent<T> { reason: EventCaller | null; @@ -44,6 +48,7 @@ export interface LlamaIndexEventMaps { stream: CustomEvent<StreamCallbackResponse>; "llm-start": LLMStartEvent; "llm-end": LLMEndEvent; + "llm-stream": LLMStreamEvent; } //#region @deprecated remove in the next major version diff --git a/packages/core/src/llm/LLM.ts b/packages/core/src/llm/LLM.ts index 7c8ff98d7238879e184964cb7652f92595278227..dc37d3718a436986cfb0336127ee6801ca2e3dd9 100644 --- a/packages/core/src/llm/LLM.ts +++ b/packages/core/src/llm/LLM.ts @@ -366,7 +366,7 @@ export class Portkey extends BaseLLM { idx_counter++; - yield { delta: part.choices[0].delta?.content ?? "" }; + yield { raw: part, delta: part.choices[0].delta?.content ?? "" }; } return; } diff --git a/packages/core/src/llm/anthropic.ts b/packages/core/src/llm/anthropic.ts index 9c471eebe24170e51a9aa7d6c593500cc4db58f5..364525d7a4d62b8c0cea5009fc158256aa44d1a6 100644 --- a/packages/core/src/llm/anthropic.ts +++ b/packages/core/src/llm/anthropic.ts @@ -212,7 +212,10 @@ export class Anthropic extends BaseLLM { if (typeof content !== "string") continue; idx_counter++; - yield { delta: content }; + yield { + raw: part, + delta: content, + }; } return; } diff --git a/packages/core/src/llm/mistral.ts b/packages/core/src/llm/mistral.ts index 28a11ab41c202859b9029333285ca04e87f47975..d933327b5ae6cabb02a5aaa991e38c2d432623ea 100644 --- a/packages/core/src/llm/mistral.ts +++ b/packages/core/src/llm/mistral.ts @@ -137,6 +137,7 @@ export class MistralAI extends BaseLLM { idx_counter++; yield { + raw: part, delta: part.choices[0].delta.content ?? "", }; } diff --git a/packages/core/src/llm/ollama.ts b/packages/core/src/llm/ollama.ts index 85e475cf27f90112f93108fa9303cbffea5d3582..c7b132bc80ffe865850637162346363ae11f3c1e 100644 --- a/packages/core/src/llm/ollama.ts +++ b/packages/core/src/llm/ollama.ts @@ -14,6 +14,7 @@ import type { const messageAccessor = (data: any): ChatResponseChunk => { return { + raw: data, delta: data.message.content, }; }; diff --git a/packages/core/src/llm/open_ai.ts b/packages/core/src/llm/open_ai.ts index 6d6db4eb58f1f43c178fdc3f166fa3b3749b9bb3..6279f924ec33757bf513a5d6e77e0348d144a3ad 100644 --- a/packages/core/src/llm/open_ai.ts +++ b/packages/core/src/llm/open_ai.ts @@ -404,6 +404,7 @@ export class OpenAI extends BaseLLM< }); yield { + raw: part, // add tool calls to final chunk options: toolCalls.length > 0 ? { toolCalls: toolCalls } : {}, delta: choice.delta.content ?? "", diff --git a/packages/core/src/llm/types.ts b/packages/core/src/llm/types.ts index 1b27b130921e0778a96184a4b83ca7a4ea4616d6..bbe8aa8e061b222e0b88e071a11367d7d11cb641 100644 --- a/packages/core/src/llm/types.ts +++ b/packages/core/src/llm/types.ts @@ -22,6 +22,13 @@ export type LLMEndEvent = LLMBaseEvent< response: ChatResponse; } >; +export type LLMStreamEvent = LLMBaseEvent< + "llm-stream", + { + id: UUID; + chunk: ChatResponseChunk; + } +>; /** * @internal @@ -127,7 +134,7 @@ export interface ChatResponse< /** * Raw response from the LLM * - * It's possible that this is `null` if the LLM response an iterable of chunks + * If LLM response an iterable of chunks, this will be an array of those chunks */ raw: object | null; } @@ -140,10 +147,12 @@ export type ChatResponseChunk< > = AdditionalMessageOptions extends Record<string, unknown> ? { + raw: object | null; delta: string; options?: AdditionalMessageOptions; } : { + raw: object | null; delta: string; options: AdditionalMessageOptions; }; diff --git a/packages/core/src/llm/utils.ts b/packages/core/src/llm/utils.ts index b37cc05daebde88b20b4f60994b0f5afedf85b6a..44f256cbc6bfa84a5ab2714b539f7c73b8fc565a 100644 --- a/packages/core/src/llm/utils.ts +++ b/packages/core/src/llm/utils.ts @@ -2,6 +2,7 @@ import { AsyncLocalStorage, randomUUID } from "@llamaindex/env"; import { getCallbackManager } from "../internal/settings/CallbackManager.js"; import type { ChatResponse, + ChatResponseChunk, LLM, LLMChat, MessageContent, @@ -83,14 +84,14 @@ export function wrapLLMEvent( [Symbol.asyncIterator]: response[Symbol.asyncIterator].bind(response), }; response[Symbol.asyncIterator] = async function* () { - const finalResponse: ChatResponse = { - raw: null, + const finalResponse = { + raw: [] as ChatResponseChunk[], message: { content: "", role: "assistant", options: {}, }, - }; + } satisfies ChatResponse; let firstOne = false; for await (const chunk of originalAsyncIterator) { if (!firstOne) { @@ -105,6 +106,13 @@ export function wrapLLMEvent( ...chunk.options, }; } + getCallbackManager().dispatchEvent("llm-stream", { + payload: { + id, + chunk, + }, + }); + finalResponse.raw.push(chunk); yield chunk; } snapshot(() => { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d64e6b9d41e3247008e2d1d4ca3ed30a63211787..1a020a65c17db6deb517c5079761a754c0eac71f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -149,6 +149,9 @@ importers: ts-node: specifier: ^10.9.2 version: 10.9.2(@types/node@18.19.31)(typescript@5.4.4) + tsx: + specifier: ^4.7.2 + version: 4.7.2 typescript: specifier: ^5.4.4 version: 5.4.4