diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
index bd656a7f79426406f4164aa54b34b7a463ee7382..0ecf3238f1857df315810633f8b0c53ddcbc13ee 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
@@ -1,6 +1,6 @@
-import React, { useState, useRef } from "react";
+import React, { useState, useRef, memo, useEffect } from "react";
 import { isMobile } from "react-device-detect";
-import { Loader, Menu, Send, X } from "react-feather";
+import { Loader, Menu, X } from "react-feather";
 
 export default function PromptInput({
   workspace,
@@ -35,6 +35,17 @@ export default function PromptInput({
   };
 
   const setTextCommand = (command = "") => {
+    const storageKey = `workspace_chat_mode_${workspace.slug}`;
+    if (command === "/query") {
+      window.localStorage.setItem(storageKey, "query");
+      window.dispatchEvent(new Event("workspace_chat_mode_update"));
+      return;
+    } else if (command === "/conversation") {
+      window.localStorage.setItem(storageKey, "chat");
+      window.dispatchEvent(new Event("workspace_chat_mode_update"));
+      return;
+    }
+
     onChange({ target: { value: `${command} ${message}` } });
   };
 
@@ -45,13 +56,19 @@ export default function PromptInput({
         className="flex flex-col gap-y-1 bg-white dark:bg-black-900 md:bg-transparent rounded-t-lg md:w-3/4 w-full mx-auto"
       >
         <div className="flex items-center py-2 px-4 rounded-lg">
-          {/* Toggle selector? */}
-          {/* <button
+          <CommandMenu
+            workspace={workspace}
+            show={showMenu}
+            handleClick={setTextCommand}
+            hide={() => setShowMenu(false)}
+          />
+          <button
             onClick={() => setShowMenu(!showMenu)}
             type="button"
-            className="p-2 text-slate-200 bg-transparent rounded-md hover:bg-gray-50 dark:hover:bg-stone-500">
+            className="p-2 text-slate-200 bg-transparent rounded-md hover:bg-gray-50 dark:hover:bg-stone-500"
+          >
             <Menu className="w-4 h-4 md:h-6 md:w-6" />
-          </button> */}
+          </button>
           <textarea
             onKeyUp={adjustTextArea}
             onKeyDown={captureEnter}
@@ -94,19 +111,92 @@ export default function PromptInput({
             <span className="sr-only">Send message</span>
           </button>
         </div>
-        <Tracking />
+        <Tracking workspaceSlug={workspace.slug} />
       </form>
     </div>
   );
 }
 
-const Tracking = () => {
+const Tracking = memo(({ workspaceSlug }) => {
+  const storageKey = `workspace_chat_mode_${workspaceSlug}`;
+  const [chatMode, setChatMode] = useState(
+    window.localStorage.getItem(storageKey) ?? "chat"
+  );
+
+  useEffect(() => {
+    function watchForChatModeChange() {
+      if (!workspaceSlug) return;
+      window.addEventListener(`workspace_chat_mode_update`, () => {
+        try {
+          const chatMode = window.localStorage.getItem(storageKey);
+          setChatMode(chatMode);
+        } catch {}
+      });
+    }
+    watchForChatModeChange();
+  }, [workspaceSlug]);
+
   return (
-    <div className="flex flex-col w-full justify-center items-center gap-y-2 mb-2 px-4 mx:px-0">
-      <p className="text-slate-400 text-xs">
+    <div className="flex flex-col md:flex-row w-full justify-center items-center gap-2 mb-2 px-4 mx:px-0">
+      <p className="bg-stone-600 text-slate-400 text-xs px-2 rounded-lg font-mono text-center">
+        Chat mode: {chatMode}
+      </p>
+      <p className="text-slate-400 text-xs text-center">
         Responses from system may produce inaccurate or invalid responses - use
         with caution.
       </p>
     </div>
   );
-};
+});
+
+function CommandMenu({ workspace, show, handleClick, hide }) {
+  if (!show) return null;
+  const COMMANDS = [
+    {
+      cmd: "/conversation",
+      description: "- switch to chat mode (remembers recent chat history) .",
+    },
+    {
+      cmd: "/query",
+      description: "- switch to query mode (does not remember previous chats).",
+    },
+    { cmd: "/reset", description: "- clear current chat history." },
+  ];
+
+  return (
+    <div className="absolute top-[-25vh] md:top-[-23vh] min-h-[200px] flex flex-col rounded-lg border border-slate-400 p-2 pt-4 bg-stone-600">
+      <div className="flex justify-between items-center border-b border-slate-400 px-2 py-1 ">
+        <p className="text-slate-200">Available Commands</p>
+        <button
+          type="button"
+          onClick={hide}
+          className="p-2 rounded-lg hover:bg-slate-500 rounded-full text-slate-400"
+        >
+          <X className="h-4 w-4" />
+        </button>
+      </div>
+
+      <div className="flex flex-col">
+        {COMMANDS.map((item, i) => {
+          const { cmd, description } = item;
+          return (
+            <div className="border-b border-slate-400 p-1">
+              <button
+                key={i}
+                type="button"
+                onClick={() => {
+                  handleClick(cmd);
+                  hide();
+                }}
+                className="w-full px-4 py-2  flex items-center rounded-lg hover:bg-slate-500 gap-x-1 disabled:cursor-not-allowed"
+              >
+                <p className="text-slate-200 font-semibold">{cmd}</p>
+                <p className="text-slate-400 text-sm">{description}</p>
+              </button>
+            </div>
+          );
+        })}
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
index e26f0f09a8438ddcece20c98d2141bb5bdf0b580..f8b1fc9ba858170fbd1d74ebee3877663fbd1923 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
@@ -50,7 +50,9 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
 
       const chatResult = await Workspace.sendChat(
         workspace,
-        promptMessage.userMessage
+        promptMessage.userMessage,
+        window.localStorage.getItem(`workspace_chat_mode_${workspace.slug}`) ??
+          "chat"
       );
       if (!chatResult) {
         alert("Could not send chat.");
diff --git a/server/models/workspaceChats.js b/server/models/workspaceChats.js
index 3b90cc6143dac14df396a8737b1bd02f9664d538..7a2aafb8555cb672b027c77e6662ceba7fc161b9 100644
--- a/server/models/workspaceChats.js
+++ b/server/models/workspaceChats.js
@@ -64,11 +64,11 @@ const WorkspaceChats = {
 
     return { chat, message: null };
   },
-  forWorkspace: async function (workspaceId = null) {
+  forWorkspace: async function (workspaceId = null, limit = null) {
     if (!workspaceId) return [];
     return await this.where(
       `workspaceId = ${workspaceId} AND include = true`,
-      null,
+      limit,
       "ORDER BY id ASC"
     );
   },
@@ -104,8 +104,8 @@ const WorkspaceChats = {
     const db = await this.db();
     const results = await db.all(
       `SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
-        !!limit ? `LIMIT ${limit}` : ""
-      } ${!!order ? order : ""}`
+        !!order ? order : ""
+      } ${!!limit ? `LIMIT ${limit}` : ""} `
     );
     db.close();
 
diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js
index 9be40b695a3103d1c3ebe231512524b71861b310..7b2cc484750dd9262c7cecedc2a34345e093ffcf 100644
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@@ -59,7 +59,7 @@ function grepCommand(message) {
   return null;
 }
 
-async function chatWithWorkspace(workspace, message, chatMode = "query") {
+async function chatWithWorkspace(workspace, message, chatMode = "chat") {
   const uuid = uuidv4();
   const openai = new OpenAi();
   const VectorDb = getVectorDbClass();
@@ -104,6 +104,8 @@ async function chatWithWorkspace(workspace, message, chatMode = "query") {
       error: null,
     };
   } else {
+    const rawHistory = await WorkspaceChats.forWorkspace(workspace.id, 20);
+    const chatHistory = convertToPromptHistory(rawHistory);
     const {
       response,
       sources,
@@ -112,6 +114,7 @@ async function chatWithWorkspace(workspace, message, chatMode = "query") {
       namespace: workspace.slug,
       input: message,
       workspace,
+      chatHistory,
     });
     if (!response) {
       return {
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 6c9ea2cbf18e092a640627049777384367877be2..43cc63767c41120048e87b395ff81ce1915b855c 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -25,6 +25,7 @@ function toChunks(arr, size) {
 function curateSources(sources = []) {
   const knownDocs = [];
   const documents = [];
+
   for (const source of sources) {
     const { metadata = {} } = source;
     if (
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index bd1c6058ee599c0e4d8a4234efea2b0f54e1a4ff..532f629cd1130fc424f36e03792c51ef7ea7bcdf 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -56,6 +56,21 @@ const Chroma = {
     const openai = new OpenAIApi(config);
     return openai;
   },
+  getChatCompletion: async function (
+    openai,
+    messages = [],
+    { temperature = 0.7 }
+  ) {
+    const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
+    const { data } = await openai.createChatCompletion({
+      model,
+      messages,
+      temperature,
+    });
+
+    if (!data.hasOwnProperty("choices")) return null;
+    return data.choices[0].message.content;
+  },
   llm: function ({ temperature = 0.7 }) {
     const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
     return new OpenAI({
@@ -75,6 +90,24 @@ const Chroma = {
       ? data[0].embedding
       : null;
   },
+  similarityResponse: async function (client, namespace, queryVector) {
+    const collection = await client.getCollection({ name: namespace });
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+    };
+
+    const response = await collection.query({
+      queryEmbeddings: queryVector,
+      nResults: 4,
+    });
+    response.ids[0].forEach((_, i) => {
+      result.contextTexts.push(response.documents[0][i]);
+      result.sourceDocuments.push(response.metadatas[0][i]);
+    });
+
+    return result;
+  },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const collection = await client
@@ -284,6 +317,55 @@ const Chroma = {
       message: false,
     };
   },
+  // This implementation of chat uses the chat history and modifies the system prompt at execution
+  // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+  // because then multi-user support will have all conversations mutating the base vector collection to which then
+  // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
+  chat: async function (reqBody = {}) {
+    const {
+      namespace = null,
+      input,
+      workspace = {},
+      chatHistory = [],
+    } = reqBody;
+    if (!namespace || !input) throw new Error("Invalid request body");
+
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace))) {
+      return {
+        response: null,
+        sources: [],
+        message: "Invalid query - no documents found for workspace!",
+      };
+    }
+
+    const queryVector = await this.embedChunk(this.openai(), input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+    Context:
+    ${contextTexts
+          .map((text, i) => {
+            return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+          })
+          .join("")}`,
+    };
+    const memory = [prompt, ...chatHistory, { role: "user", content: input }];
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
+    });
+
+    return {
+      response: responseText,
+      sources: curateSources(sourceDocuments),
+      message: false,
+    };
+  },
   "namespace-stats": async function (reqBody = {}) {
     const { namespace = null } = reqBody;
     if (!namespace) throw new Error("namespace required");
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index d6aced156f3da0513c9a27752062e24e692df3a5..293e835a18b326ccfbb4be02866c0b74a303c4e2 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -84,6 +84,27 @@ const LanceDb = {
     if (!data.hasOwnProperty("choices")) return null;
     return data.choices[0].message.content;
   },
+  similarityResponse: async function (client, namespace, queryVector) {
+    const collection = await client.openTable(namespace);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+    };
+
+    const response = await collection
+      .search(queryVector)
+      .metricType("cosine")
+      .limit(5)
+      .execute();
+
+    response.forEach((item) => {
+      const { vector: _, ...rest } = item;
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push(rest);
+    });
+
+    return result;
+  },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const collection = await client.openTable(namespace).catch(() => false);
@@ -232,28 +253,78 @@ const LanceDb = {
 
     // LanceDB does not have langchainJS support so we roll our own here.
     const queryVector = await this.embedChunk(this.openai(), input);
-    const collection = await client.openTable(namespace);
-    const relevantResults = await collection
-      .search(queryVector)
-      .metricType("cosine")
-      .limit(2)
-      .execute();
-    const messages = [
-      {
-        role: "system",
-        content: `The following is a friendly conversation between a human and an AI. The AI is very casual and talkative and responds with a friendly tone. If the AI does not know the answer to a question, it truthfully says it does not know.
-      Relevant pieces of information for context of the current query:
-      ${relevantResults.map((result) => result.text).join("\n\n")}`,
-      },
-      { role: "user", content: input },
-    ];
-    const responseText = await this.getChatCompletion(this.openai(), messages, {
-      temperature: workspace?.openAiTemp,
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+    Context:
+    ${contextTexts
+      .map((text, i) => {
+        return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+      })
+      .join("")}`,
+    };
+    const memory = [prompt, { role: "user", content: input }];
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
+    });
+
+    return {
+      response: responseText,
+      sources: curateLanceSources(sourceDocuments),
+      message: false,
+    };
+  },
+  // This implementation of chat uses the chat history and modifies the system prompt at execution
+  // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+  // because then multi-user support will have all conversations mutating the base vector collection to which then
+  // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
+  chat: async function (reqBody = {}) {
+    const {
+      namespace = null,
+      input,
+      workspace = {},
+      chatHistory = [],
+    } = reqBody;
+    if (!namespace || !input) throw new Error("Invalid request body");
+
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace))) {
+      return {
+        response: null,
+        sources: [],
+        message: "Invalid query - no documents found for workspace!",
+      };
+    }
+
+    const queryVector = await this.embedChunk(this.openai(), input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+    Context:
+    ${contextTexts
+      .map((text, i) => {
+        return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+      })
+      .join("")}`,
+    };
+    const memory = [prompt, ...chatHistory, { role: "user", content: input }];
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
     });
 
     return {
       response: responseText,
-      sources: curateLanceSources(relevantResults),
+      sources: curateLanceSources(sourceDocuments),
       message: false,
     };
   },
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index 2dcf2b526bb75b8747258283e7ba201a7ab7d1d5..37198ca493dc1d06dd97d777f9f9a0314e3e663e 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -38,6 +38,21 @@ const Pinecone = {
     const openai = new OpenAIApi(config);
     return openai;
   },
+  getChatCompletion: async function (
+    openai,
+    messages = [],
+    { temperature = 0.7 }
+  ) {
+    const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
+    const { data } = await openai.createChatCompletion({
+      model,
+      messages,
+      temperature,
+    });
+
+    if (!data.hasOwnProperty("choices")) return null;
+    return data.choices[0].message.content;
+  },
   embedChunk: async function (openai, textChunk) {
     const {
       data: { data },
@@ -65,6 +80,27 @@ const Pinecone = {
       0
     );
   },
+  similarityResponse: async function (index, namespace, queryVector) {
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+    };
+    const response = await index.query({
+      queryRequest: {
+        namespace,
+        vector: queryVector,
+        topK: 4,
+        includeMetadata: true,
+      },
+    });
+
+    response.matches.forEach((match) => {
+      result.contextTexts.push(match.metadata.text);
+      result.sourceDocuments.push(match);
+    });
+
+    return result;
+  },
   namespace: async function (index, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const { namespaces } = await index.describeIndexStats1();
@@ -255,10 +291,17 @@ const Pinecone = {
       message: false,
     };
   },
-  // This implementation of chat also expands the memory of the chat itself
-  // and adds more tokens to the PineconeDB instance namespace
+  // This implementation of chat uses the chat history and modifies the system prompt at execution
+  // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+  // because then multi-user support will have all conversations mutating the base vector collection to which then
+  // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
   chat: async function (reqBody = {}) {
-    const { namespace = null, input, workspace = {} } = reqBody;
+    const {
+      namespace = null,
+      input,
+      workspace = {},
+      chatHistory = [],
+    } = reqBody;
     if (!namespace || !input) throw new Error("Invalid request body");
 
     const { pineconeIndex } = await this.connect();
@@ -267,31 +310,33 @@ const Pinecone = {
         "Invalid namespace - has it been collected and seeded yet?"
       );
 
-    const vectorStore = await PineconeStore.fromExistingIndex(this.embedder(), {
+    const queryVector = await this.embedChunk(this.openai(), input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
       pineconeIndex,
       namespace,
-    });
-
-    const memory = new VectorStoreRetrieverMemory({
-      vectorStoreRetriever: vectorStore.asRetriever(1),
-      memoryKey: "history",
-    });
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+Context:
+${contextTexts
+          .map((text, i) => {
+            return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+          })
+          .join("")}`,
+    };
+    const memory = [prompt, ...chatHistory, { role: "user", content: input }];
 
-    const model = this.llm({
-      temperature: workspace?.openAiTemp,
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
     });
-    const prompt =
-      PromptTemplate.fromTemplate(`The following is a friendly conversation between a human and an AI. The AI is very casual and talkative and responds with a friendly tone. If the AI does not know the answer to a question, it truthfully says it does not know.
-  Relevant pieces of previous conversation:
-  {history}
-  
-  Current conversation:
-  Human: {input}
-  AI:`);
 
-    const chain = new LLMChain({ llm: model, prompt, memory });
-    const response = await chain.call({ input });
-    return { response: response.text, sources: [], message: false };
+    return {
+      response: responseText,
+      sources: curateSources(sourceDocuments),
+      message: false,
+    };
   },
 };