Skip to content
Snippets Groups Projects
Unverified Commit c3723ce2 authored by Timothy Carambat's avatar Timothy Carambat Committed by GitHub
Browse files

Add backfilling on `query` for chat widget to improve UX (#2482)

parent be6289d1
No related branches found
No related tags found
No related merge requests found
......@@ -56,6 +56,7 @@ function embeddedEndpoints(app) {
writeResponseChunk(response, {
id: uuidv4(),
type: "abort",
sources: [],
textResponse: null,
close: true,
error: e.message,
......@@ -72,11 +73,15 @@ function embeddedEndpoints(app) {
try {
const { sessionId } = request.params;
const embed = response.locals.embedConfig;
const history = await EmbedChats.forEmbedByUser(
embed.id,
sessionId,
null,
null,
true
);
const history = await EmbedChats.forEmbedByUser(embed.id, sessionId);
response.status(200).json({
history: convertToChatHistory(history),
});
response.status(200).json({ history: convertToChatHistory(history) });
} catch (e) {
console.error(e.message, e);
response.sendStatus(500).end();
......
const { safeJsonParse } = require("../utils/http");
const prisma = require("../utils/prisma");
/**
* @typedef {Object} EmbedChat
* @property {number} id
* @property {number} embed_id
* @property {string} prompt
* @property {string} response
* @property {string} connection_information
* @property {string} session_id
* @property {boolean} include
*/
const EmbedChats = {
new: async function ({
embedId,
......@@ -25,11 +37,36 @@ const EmbedChats = {
}
},
/**
* Loops through each chat and filters out the sources from the response object.
* We do this when returning /history of an embed to the frontend to prevent inadvertent leaking
* of private sources the user may not have intended to share with users.
* @param {EmbedChat[]} chats
* @returns {EmbedChat[]} Returns a new array of chats with the sources filtered out of responses
*/
filterSources: function (chats) {
return chats.map((chat) => {
const { response, ...rest } = chat;
const { sources, ...responseRest } = safeJsonParse(response);
return { ...rest, response: JSON.stringify(responseRest) };
});
},
/**
* Fetches chats for a given embed and session id.
* @param {number} embedId the id of the embed to fetch chats for
* @param {string} sessionId the id of the session to fetch chats for
* @param {number|null} limit the maximum number of chats to fetch
* @param {string|null} orderBy the order to fetch chats in
* @param {boolean} filterSources whether to filter out the sources from the response (default: false)
* @returns {Promise<EmbedChat[]>} Returns an array of chats for the given embed and session
*/
forEmbedByUser: async function (
embedId = null,
sessionId = null,
limit = null,
orderBy = null
orderBy = null,
filterSources = false
) {
if (!embedId || !sessionId) return [];
......@@ -43,7 +80,7 @@ const EmbedChats = {
...(limit !== null ? { take: limit } : {}),
...(orderBy !== null ? { orderBy } : { orderBy: { id: "asc" } }),
});
return chats;
return filterSources ? this.filterSources(chats) : chats;
} catch (error) {
console.error(error.message);
return [];
......
......@@ -60,8 +60,7 @@ async function streamChatWithForEmbed(
const { rawHistory, chatHistory } = await recentEmbedChatHistory(
sessionId,
embed,
messageLimit,
chatMode
messageLimit
);
// See stream.js comment for more information on this implementation.
......@@ -113,16 +112,27 @@ async function streamChatWithForEmbed(
return;
}
contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
const { fillSourceWindow } = require("../helpers/chat");
const filledSources = fillSourceWindow({
nDocs: embed.workspace?.topN || 4,
searchResults: vectorSearchResults.sources,
history: rawHistory,
filterIdentifiers: pinnedDocIdentifiers,
});
// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts = [...contextTexts, ...filledSources.contextTexts];
sources = [...sources, ...vectorSearchResults.sources];
// If in query mode and no sources are found, do not
// If in query mode and no sources are found in current search or backfilled from history, do not
// let the LLM try to hallucinate a response or use general knowledge
if (
chatMode === "query" &&
sources.length === 0 &&
pinnedDocIdentifiers.length === 0
) {
if (chatMode === "query" && contextTexts.length === 0) {
writeResponseChunk(response, {
id: uuid,
type: "textResponse",
......@@ -178,7 +188,7 @@ async function streamChatWithForEmbed(
await EmbedChats.new({
embedId: embed.id,
prompt: message,
response: { text: completeText, type: chatMode },
response: { text: completeText, type: chatMode, sources },
connection_information: response.locals.connection
? {
...response.locals.connection,
......@@ -190,15 +200,13 @@ async function streamChatWithForEmbed(
return;
}
// On query we don't return message history. All other chat modes and when chatting
// with no embeddings we return history.
async function recentEmbedChatHistory(
sessionId,
embed,
messageLimit = 20,
chatMode = null
) {
if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
/**
* @param {string} sessionId the session id of the user from embed widget
* @param {Object} embed the embed config object
* @param {Number} messageLimit the number of messages to return
* @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string}[]}>
*/
async function recentEmbedChatHistory(sessionId, embed, messageLimit = 20) {
const rawHistory = (
await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, {
id: "desc",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment