diff --git a/server/endpoints/embed/index.js b/server/endpoints/embed/index.js
index 25e7cb48e974a7d53b9eda51289926ccd3dcf004..7db2539f811242a9d6a4fa24aabefd7c4f53cb81 100644
--- a/server/endpoints/embed/index.js
+++ b/server/endpoints/embed/index.js
@@ -56,6 +56,7 @@ function embeddedEndpoints(app) {
         writeResponseChunk(response, {
           id: uuidv4(),
           type: "abort",
+          sources: [],
           textResponse: null,
           close: true,
           error: e.message,
@@ -72,11 +73,15 @@ function embeddedEndpoints(app) {
       try {
         const { sessionId } = request.params;
         const embed = response.locals.embedConfig;
+        const history = await EmbedChats.forEmbedByUser(
+          embed.id,
+          sessionId,
+          null,
+          null,
+          true
+        );
 
-        const history = await EmbedChats.forEmbedByUser(embed.id, sessionId);
-        response.status(200).json({
-          history: convertToChatHistory(history),
-        });
+        response.status(200).json({ history: convertToChatHistory(history) });
       } catch (e) {
         console.error(e.message, e);
         response.sendStatus(500).end();
diff --git a/server/models/embedChats.js b/server/models/embedChats.js
index 1c46f6d4a3d9c1c37cdf4045ebc2020198da5440..9f11b1c6e56e1fbcd1d335b329cfc3189b00d119 100644
--- a/server/models/embedChats.js
+++ b/server/models/embedChats.js
@@ -1,5 +1,17 @@
+const { safeJsonParse } = require("../utils/http");
 const prisma = require("../utils/prisma");
 
+/**
+ * @typedef {Object} EmbedChat
+ * @property {number} id
+ * @property {number} embed_id
+ * @property {string} prompt
+ * @property {string} response
+ * @property {string} connection_information
+ * @property {string} session_id
+ * @property {boolean} include
+ */
+
 const EmbedChats = {
   new: async function ({
     embedId,
@@ -25,11 +37,36 @@ const EmbedChats = {
     }
   },
 
+  /**
+   * Loops through each chat and filters out the sources from the response object.
+   * We do this when returning /history of an embed to the frontend to prevent inadvertent leaking
+   * of private sources the user may not have intended to share with users.
+   * @param {EmbedChat[]} chats
+   * @returns {EmbedChat[]} Returns a new array of chats with the sources filtered out of responses
+   */
+  filterSources: function (chats) {
+    return chats.map((chat) => {
+      const { response, ...rest } = chat;
+      const { sources, ...responseRest } = safeJsonParse(response);
+      return { ...rest, response: JSON.stringify(responseRest) };
+    });
+  },
+
+  /**
+   * Fetches chats for a given embed and session id.
+   * @param {number} embedId the id of the embed to fetch chats for
+   * @param {string} sessionId the id of the session to fetch chats for
+   * @param {number|null} limit the maximum number of chats to fetch
+   * @param {string|null} orderBy the order to fetch chats in
+   * @param {boolean} filterSources whether to filter out the sources from the response (default: false)
+   * @returns {Promise<EmbedChat[]>} Returns an array of chats for the given embed and session
+   */
   forEmbedByUser: async function (
     embedId = null,
     sessionId = null,
     limit = null,
-    orderBy = null
+    orderBy = null,
+    filterSources = false
   ) {
     if (!embedId || !sessionId) return [];
 
@@ -43,7 +80,7 @@ const EmbedChats = {
         ...(limit !== null ? { take: limit } : {}),
         ...(orderBy !== null ? { orderBy } : { orderBy: { id: "asc" } }),
       });
-      return chats;
+      return filterSources ? this.filterSources(chats) : chats;
     } catch (error) {
       console.error(error.message);
       return [];
diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js
index 8108060590baeeea816dcf4e5b8eae78a4113234..b4d1a03fbc7b98d4d7192d5f053f02d57bd55dd2 100644
--- a/server/utils/chats/embed.js
+++ b/server/utils/chats/embed.js
@@ -60,8 +60,7 @@ async function streamChatWithForEmbed(
   const { rawHistory, chatHistory } = await recentEmbedChatHistory(
     sessionId,
     embed,
-    messageLimit,
-    chatMode
+    messageLimit
   );
 
   // See stream.js comment for more information on this implementation.
@@ -113,16 +112,27 @@ async function streamChatWithForEmbed(
     return;
   }
 
-  contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
+  const { fillSourceWindow } = require("../helpers/chat");
+  const filledSources = fillSourceWindow({
+    nDocs: embed.workspace?.topN || 4,
+    searchResults: vectorSearchResults.sources,
+    history: rawHistory,
+    filterIdentifiers: pinnedDocIdentifiers,
+  });
+
+  // Why does contextTexts get all the info, but sources only get current search?
+  // This is to give the ability of the LLM to "comprehend" a contextual response without
+  // populating the Citations under a response with documents the user "thinks" are irrelevant
+  // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
+  // If a past citation was used to answer the question - that is visible in the history so it logically makes sense
+  // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
+  // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
+  contextTexts = [...contextTexts, ...filledSources.contextTexts];
   sources = [...sources, ...vectorSearchResults.sources];
 
-  // If in query mode and no sources are found, do not
+  // If in query mode and no sources are found in current search or backfilled from history, do not
   // let the LLM try to hallucinate a response or use general knowledge
-  if (
-    chatMode === "query" &&
-    sources.length === 0 &&
-    pinnedDocIdentifiers.length === 0
-  ) {
+  if (chatMode === "query" && contextTexts.length === 0) {
     writeResponseChunk(response, {
       id: uuid,
       type: "textResponse",
@@ -178,7 +188,7 @@ async function streamChatWithForEmbed(
   await EmbedChats.new({
     embedId: embed.id,
     prompt: message,
-    response: { text: completeText, type: chatMode },
+    response: { text: completeText, type: chatMode, sources },
     connection_information: response.locals.connection
       ? {
           ...response.locals.connection,
@@ -190,15 +200,13 @@ async function streamChatWithForEmbed(
   return;
 }
 
-// On query we don't return message history. All other chat modes and when chatting
-// with no embeddings we return history.
-async function recentEmbedChatHistory(
-  sessionId,
-  embed,
-  messageLimit = 20,
-  chatMode = null
-) {
-  if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
+/**
+ * @param {string} sessionId the session id of the user from embed widget
+ * @param {Object} embed the embed config object
+ * @param {Number} messageLimit the number of messages to return
+ * @returns {Promise<{rawHistory: import("@prisma/client").embed_chats[], chatHistory: {role: string, content: string}[]}>
+ */
+async function recentEmbedChatHistory(sessionId, embed, messageLimit = 20) {
   const rawHistory = (
     await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, {
       id: "desc",