Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Anything Llm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mirrored_repos
MachineLearning
Mintplex Labs
Anything Llm
Commits
c3723ce2
Unverified
Commit
c3723ce2
authored
5 months ago
by
Timothy Carambat
Committed by
GitHub
5 months ago
Browse files
Options
Downloads
Patches
Plain Diff
Add backfilling on `query` for chat widget to improve UX (#2482)
parent
be6289d1
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
server/endpoints/embed/index.js
+9
-4
9 additions, 4 deletions
server/endpoints/embed/index.js
server/models/embedChats.js
+39
-2
39 additions, 2 deletions
server/models/embedChats.js
server/utils/chats/embed.js
+27
-19
27 additions, 19 deletions
server/utils/chats/embed.js
with
75 additions
and
25 deletions
server/endpoints/embed/index.js
+
9
−
4
View file @
c3723ce2
...
...
@@ -56,6 +56,7 @@ function embeddedEndpoints(app) {
writeResponseChunk
(
response
,
{
id
:
uuidv4
(),
type
:
"
abort
"
,
sources
:
[],
textResponse
:
null
,
close
:
true
,
error
:
e
.
message
,
...
...
@@ -72,11 +73,15 @@ function embeddedEndpoints(app) {
try
{
const
{
sessionId
}
=
request
.
params
;
const
embed
=
response
.
locals
.
embedConfig
;
const
history
=
await
EmbedChats
.
forEmbedByUser
(
embed
.
id
,
sessionId
,
null
,
null
,
true
);
const
history
=
await
EmbedChats
.
forEmbedByUser
(
embed
.
id
,
sessionId
);
response
.
status
(
200
).
json
({
history
:
convertToChatHistory
(
history
),
});
response
.
status
(
200
).
json
({
history
:
convertToChatHistory
(
history
)
});
}
catch
(
e
)
{
console
.
error
(
e
.
message
,
e
);
response
.
sendStatus
(
500
).
end
();
...
...
This diff is collapsed.
Click to expand it.
server/models/embedChats.js
+
39
−
2
View file @
c3723ce2
const
{
safeJsonParse
}
=
require
(
"
../utils/http
"
);
const
prisma
=
require
(
"
../utils/prisma
"
);
/**
* @typedef {Object} EmbedChat
* @property {number} id
* @property {number} embed_id
* @property {string} prompt
* @property {string} response
* @property {string} connection_information
* @property {string} session_id
* @property {boolean} include
*/
const
EmbedChats
=
{
new
:
async
function
({
embedId
,
...
...
@@ -25,11 +37,36 @@ const EmbedChats = {
}
},
/**
* Loops through each chat and filters out the sources from the response object.
* We do this when returning /history of an embed to the frontend to prevent inadvertent leaking
* of private sources the user may not have intended to share with users.
* @param {EmbedChat[]} chats
* @returns {EmbedChat[]} Returns a new array of chats with the sources filtered out of responses
*/
filterSources
:
function
(
chats
)
{
return
chats
.
map
((
chat
)
=>
{
const
{
response
,
...
rest
}
=
chat
;
const
{
sources
,
...
responseRest
}
=
safeJsonParse
(
response
);
return
{
...
rest
,
response
:
JSON
.
stringify
(
responseRest
)
};
});
},
/**
* Fetches chats for a given embed and session id.
* @param {number} embedId the id of the embed to fetch chats for
* @param {string} sessionId the id of the session to fetch chats for
* @param {number|null} limit the maximum number of chats to fetch
* @param {string|null} orderBy the order to fetch chats in
* @param {boolean} filterSources whether to filter out the sources from the response (default: false)
* @returns {Promise<EmbedChat[]>} Returns an array of chats for the given embed and session
*/
forEmbedByUser
:
async
function
(
embedId
=
null
,
sessionId
=
null
,
limit
=
null
,
orderBy
=
null
orderBy
=
null
,
filterSources
=
false
)
{
if
(
!
embedId
||
!
sessionId
)
return
[];
...
...
@@ -43,7 +80,7 @@ const EmbedChats = {
...(
limit
!==
null
?
{
take
:
limit
}
:
{}),
...(
orderBy
!==
null
?
{
orderBy
}
:
{
orderBy
:
{
id
:
"
asc
"
}
}),
});
return
chats
;
return
filterSources
?
this
.
filterSources
(
chats
)
:
chats
;
}
catch
(
error
)
{
console
.
error
(
error
.
message
);
return
[];
...
...
This diff is collapsed.
Click to expand it.
server/utils/chats/embed.js
+
27
−
19
View file @
c3723ce2
...
...
@@ -60,8 +60,7 @@ async function streamChatWithForEmbed(
const
{
rawHistory
,
chatHistory
}
=
await
recentEmbedChatHistory
(
sessionId
,
embed
,
messageLimit
,
chatMode
messageLimit
);
// See stream.js comment for more information on this implementation.
...
...
@@ -113,16 +112,27 @@ async function streamChatWithForEmbed(
return
;
}
contextTexts
=
[...
contextTexts
,
...
vectorSearchResults
.
contextTexts
];
const
{
fillSourceWindow
}
=
require
(
"
../helpers/chat
"
);
const
filledSources
=
fillSourceWindow
({
nDocs
:
embed
.
workspace
?.
topN
||
4
,
searchResults
:
vectorSearchResults
.
sources
,
history
:
rawHistory
,
filterIdentifiers
:
pinnedDocIdentifiers
,
});
// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts
=
[...
contextTexts
,
...
filledSources
.
contextTexts
];
sources
=
[...
sources
,
...
vectorSearchResults
.
sources
];
// If in query mode and no sources are found, do not
// If in query mode and no sources are found
in current search or backfilled from history
, do not
// let the LLM try to hallucinate a response or use general knowledge
if
(
chatMode
===
"
query
"
&&
sources
.
length
===
0
&&
pinnedDocIdentifiers
.
length
===
0
)
{
if
(
chatMode
===
"
query
"
&&
contextTexts
.
length
===
0
)
{
writeResponseChunk
(
response
,
{
id
:
uuid
,
type
:
"
textResponse
"
,
...
...
@@ -178,7 +188,7 @@ async function streamChatWithForEmbed(
await
EmbedChats
.
new
({
embedId
:
embed
.
id
,
prompt
:
message
,
response
:
{
text
:
completeText
,
type
:
chatMode
},
response
:
{
text
:
completeText
,
type
:
chatMode
,
sources
},
connection_information
:
response
.
locals
.
connection
?
{
...
response
.
locals
.
connection
,
...
...
@@ -190,15 +200,13 @@ async function streamChatWithForEmbed(
return
;
}
// On query we don't return message history. All other chat modes and when chatting
// with no embeddings we return history.
async
function
recentEmbedChatHistory
(
sessionId
,
embed
,
messageLimit
=
20
,
chatMode
=
null
)
{
if
(
chatMode
===
"
query
"
)
return
{
rawHistory
:
[],
chatHistory
:
[]
};
/**
* @param {string} sessionId the session id of the user from embed widget
* @param {Object} embed the embed config object
* @param {Number} messageLimit the number of messages to return
* @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string}[]}>
*/
async
function
recentEmbedChatHistory
(
sessionId
,
embed
,
messageLimit
=
20
)
{
const
rawHistory
=
(
await
EmbedChats
.
forEmbedByUser
(
embed
.
id
,
sessionId
,
messageLimit
,
{
id
:
"
desc
"
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment