diff --git a/server/endpoints/embed/index.js b/server/endpoints/embed/index.js index 25e7cb48e974a7d53b9eda51289926ccd3dcf004..7db2539f811242a9d6a4fa24aabefd7c4f53cb81 100644 --- a/server/endpoints/embed/index.js +++ b/server/endpoints/embed/index.js @@ -56,6 +56,7 @@ function embeddedEndpoints(app) { writeResponseChunk(response, { id: uuidv4(), type: "abort", + sources: [], textResponse: null, close: true, error: e.message, @@ -72,11 +73,15 @@ function embeddedEndpoints(app) { try { const { sessionId } = request.params; const embed = response.locals.embedConfig; + const history = await EmbedChats.forEmbedByUser( + embed.id, + sessionId, + null, + null, + true + ); - const history = await EmbedChats.forEmbedByUser(embed.id, sessionId); - response.status(200).json({ - history: convertToChatHistory(history), - }); + response.status(200).json({ history: convertToChatHistory(history) }); } catch (e) { console.error(e.message, e); response.sendStatus(500).end(); diff --git a/server/models/embedChats.js b/server/models/embedChats.js index 1c46f6d4a3d9c1c37cdf4045ebc2020198da5440..9f11b1c6e56e1fbcd1d335b329cfc3189b00d119 100644 --- a/server/models/embedChats.js +++ b/server/models/embedChats.js @@ -1,5 +1,17 @@ +const { safeJsonParse } = require("../utils/http"); const prisma = require("../utils/prisma"); +/** + * @typedef {Object} EmbedChat + * @property {number} id + * @property {number} embed_id + * @property {string} prompt + * @property {string} response + * @property {string} connection_information + * @property {string} session_id + * @property {boolean} include + */ + const EmbedChats = { new: async function ({ embedId, @@ -25,11 +37,36 @@ const EmbedChats = { } }, + /** + * Loops through each chat and filters out the sources from the response object. + * We do this when returning /history of an embed to the frontend to prevent inadvertent leaking + * of private sources the user may not have intended to share with users. + * @param {EmbedChat[]} chats + * @returns {EmbedChat[]} Returns a new array of chats with the sources filtered out of responses + */ + filterSources: function (chats) { + return chats.map((chat) => { + const { response, ...rest } = chat; + const { sources, ...responseRest } = safeJsonParse(response); + return { ...rest, response: JSON.stringify(responseRest) }; + }); + }, + + /** + * Fetches chats for a given embed and session id. + * @param {number} embedId the id of the embed to fetch chats for + * @param {string} sessionId the id of the session to fetch chats for + * @param {number|null} limit the maximum number of chats to fetch + * @param {string|null} orderBy the order to fetch chats in + * @param {boolean} filterSources whether to filter out the sources from the response (default: false) + * @returns {Promise<EmbedChat[]>} Returns an array of chats for the given embed and session + */ forEmbedByUser: async function ( embedId = null, sessionId = null, limit = null, - orderBy = null + orderBy = null, + filterSources = false ) { if (!embedId || !sessionId) return []; @@ -43,7 +80,7 @@ const EmbedChats = { ...(limit !== null ? { take: limit } : {}), ...(orderBy !== null ? { orderBy } : { orderBy: { id: "asc" } }), }); - return chats; + return filterSources ? this.filterSources(chats) : chats; } catch (error) { console.error(error.message); return []; diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js index 8108060590baeeea816dcf4e5b8eae78a4113234..b4d1a03fbc7b98d4d7192d5f053f02d57bd55dd2 100644 --- a/server/utils/chats/embed.js +++ b/server/utils/chats/embed.js @@ -60,8 +60,7 @@ async function streamChatWithForEmbed( const { rawHistory, chatHistory } = await recentEmbedChatHistory( sessionId, embed, - messageLimit, - chatMode + messageLimit ); // See stream.js comment for more information on this implementation. @@ -113,16 +112,27 @@ async function streamChatWithForEmbed( return; } - contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts]; + const { fillSourceWindow } = require("../helpers/chat"); + const filledSources = fillSourceWindow({ + nDocs: embed.workspace?.topN || 4, + searchResults: vectorSearchResults.sources, + history: rawHistory, + filterIdentifiers: pinnedDocIdentifiers, + }); + + // Why does contextTexts get all the info, but sources only get current search? + // This is to give the ability of the LLM to "comprehend" a contextual response without + // populating the Citations under a response with documents the user "thinks" are irrelevant + // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. + // If a past citation was used to answer the question - that is visible in the history so it logically makes sense + // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. + // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. + contextTexts = [...contextTexts, ...filledSources.contextTexts]; sources = [...sources, ...vectorSearchResults.sources]; - // If in query mode and no sources are found, do not + // If in query mode and no sources are found in current search or backfilled from history, do not // let the LLM try to hallucinate a response or use general knowledge - if ( - chatMode === "query" && - sources.length === 0 && - pinnedDocIdentifiers.length === 0 - ) { + if (chatMode === "query" && contextTexts.length === 0) { writeResponseChunk(response, { id: uuid, type: "textResponse", @@ -178,7 +188,7 @@ async function streamChatWithForEmbed( await EmbedChats.new({ embedId: embed.id, prompt: message, - response: { text: completeText, type: chatMode }, + response: { text: completeText, type: chatMode, sources }, connection_information: response.locals.connection ? { ...response.locals.connection, @@ -190,15 +200,13 @@ async function streamChatWithForEmbed( return; } -// On query we don't return message history. All other chat modes and when chatting -// with no embeddings we return history. -async function recentEmbedChatHistory( - sessionId, - embed, - messageLimit = 20, - chatMode = null -) { - if (chatMode === "query") return { rawHistory: [], chatHistory: [] }; +/** + * @param {string} sessionId the session id of the user from embed widget + * @param {Object} embed the embed config object + * @param {Number} messageLimit the number of messages to return + * @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string}[]}> + */ +async function recentEmbedChatHistory(sessionId, embed, messageLimit = 20) { const rawHistory = ( await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, { id: "desc",