From d90571fa33071cf639bcab2b3d961a51e9375c12 Mon Sep 17 00:00:00 2001
From: Timothy Carambat <rambat1010@gmail.com>
Date: Thu, 6 Feb 2025 09:23:05 -0800
Subject: [PATCH] Add improved logging and introspection to web-browsing
 (#3140)

---
 .../agents/aibitat/plugins/web-browsing.js    | 167 +++++++++++++-----
 1 file changed, 127 insertions(+), 40 deletions(-)

diff --git a/server/utils/agents/aibitat/plugins/web-browsing.js b/server/utils/agents/aibitat/plugins/web-browsing.js
index 2d3701d1a..8a0357c4c 100644
--- a/server/utils/agents/aibitat/plugins/web-browsing.js
+++ b/server/utils/agents/aibitat/plugins/web-browsing.js
@@ -1,4 +1,6 @@
 const { SystemSettings } = require("../../../../models/systemSettings");
+const { TokenManager } = require("../../../helpers/tiktoken");
+const tiktoken = new TokenManager();
 
 const webBrowsing = {
   name: "web-browsing",
@@ -12,6 +14,11 @@ const webBrowsing = {
         aibitat.function({
           super: aibitat,
           name: this.name,
+          countTokens: (string) =>
+            tiktoken
+              .countFromString(string)
+              .toString()
+              .replace(/\B(?=(\d{3})+(?!\d))/g, ","),
           description:
             "Searches for a given query using a search engine to get better results for the user query.",
           examples: [
@@ -89,6 +96,18 @@ const webBrowsing = {
             return await this[engine](query);
           },
 
+          /**
+           * Utility function to truncate a string to a given length for debugging
+           * calls to the API while keeping the actual values mostly intact
+           * @param {string} str - The string to truncate
+           * @param {number} length - The length to truncate the string to
+           * @returns {string} The truncated string
+           */
+          middleTruncate(str, length = 5) {
+            if (str.length <= length) return str;
+            return `${str.slice(0, length)}...${str.slice(-length)}`;
+          },
+
           /**
            * Use Google Custom Search Engines
            * Free to set up, easy to use, 100 calls/day
@@ -115,7 +134,12 @@ const webBrowsing = {
               }"`
             );
             const data = await fetch(searchURL)
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ key: this.middleTruncate(process.env.AGENT_GSE_KEY, 5), cx: this.middleTruncate(process.env.AGENT_GSE_CTX, 5), q: query })}`
+                );
+              })
               .then((searchResult) => searchResult?.items || [])
               .then((items) => {
                 return items.map((item) => {
@@ -127,16 +151,20 @@ const webBrowsing = {
                 });
               })
               .catch((e) => {
-                console.log(e);
+                this.super.handlerProps.log(
+                  `${this.name}: Google Search Error: ${e.message}`
+                );
                 return [];
               });
 
             if (data.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(data);
+            return result;
           },
 
           /**
@@ -173,11 +201,17 @@ const webBrowsing = {
                 "X-SearchApi-Source": "AnythingLLM",
               },
             })
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_SEARCHAPI_API_KEY, 5), q: query })}`
+                );
+              })
               .then((data) => {
                 return { response: data, error: null };
               })
               .catch((e) => {
+                this.super.handlerProps.log(`SearchApi Error: ${e.message}`);
                 return { response: null, error: e.message };
               });
             if (error)
@@ -199,10 +233,12 @@ const webBrowsing = {
 
             if (data.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(data);
+            return result;
           },
 
           /**
@@ -235,11 +271,17 @@ const webBrowsing = {
                 redirect: "follow",
               }
             )
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_SERPER_DEV_KEY, 5), q: query })}`
+                );
+              })
               .then((data) => {
                 return { response: data, error: null };
               })
               .catch((e) => {
+                this.super.handlerProps.log(`Serper.dev Error: ${e.message}`);
                 return { response: null, error: e.message };
               });
             if (error)
@@ -259,10 +301,12 @@ const webBrowsing = {
 
             if (data.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(data);
+            return result;
           },
           _bingWebSearch: async function (query) {
             if (!process.env.AGENT_BING_SEARCH_API_KEY) {
@@ -289,7 +333,12 @@ const webBrowsing = {
                   process.env.AGENT_BING_SEARCH_API_KEY,
               },
             })
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_BING_SEARCH_API_KEY, 5), q: query })}`
+                );
+              })
               .then((data) => {
                 const searchResults = data.webPages?.value || [];
                 return searchResults.map((result) => ({
@@ -299,16 +348,20 @@ const webBrowsing = {
                 }));
               })
               .catch((e) => {
-                console.log(e);
+                this.super.handlerProps.log(
+                  `Bing Web Search Error: ${e.message}`
+                );
                 return [];
               });
 
             if (searchResponse.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(searchResponse);
             this.super.introspect(
-              `${this.caller}: I found ${searchResponse.length} results - looking over them now.`
+              `${this.caller}: I found ${searchResponse.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(searchResponse);
+            return result;
           },
           _serplyEngine: async function (
             query,
@@ -353,20 +406,24 @@ const webBrowsing = {
                 "X-User-Agent": device_type,
               },
             })
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_SERPLY_API_KEY, 5), q: query })}`
+                );
+              })
               .then((data) => {
-                if (data?.message === "Unauthorized") {
-                  return {
-                    response: null,
-                    error:
-                      "Unauthorized. Please double check your AGENT_SERPLY_API_KEY",
-                  };
-                }
+                if (data?.message === "Unauthorized")
+                  throw new Error(
+                    "Unauthorized. Please double check your AGENT_SERPLY_API_KEY"
+                  );
                 return { response: data, error: null };
               })
               .catch((e) => {
+                this.super.handlerProps.log(`Serply Error: ${e.message}`);
                 return { response: null, error: e.message };
               });
+
             if (error)
               return `There was an error searching for content. ${error}`;
 
@@ -382,10 +439,12 @@ const webBrowsing = {
 
             if (data.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(data);
+            return result;
           },
           _searXNGEngine: async function (query) {
             let searchURL;
@@ -421,11 +480,19 @@ const webBrowsing = {
                 "User-Agent": "anything-llm",
               },
             })
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ url: searchURL.toString() })}`
+                );
+              })
               .then((data) => {
                 return { response: data, error: null };
               })
               .catch((e) => {
+                this.super.handlerProps.log(
+                  `SearXNG Search Error: ${e.message}`
+                );
                 return { response: null, error: e.message };
               });
             if (error)
@@ -444,10 +511,12 @@ const webBrowsing = {
 
             if (data.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(data);
+            return result;
           },
           _tavilySearch: async function (query) {
             if (!process.env.AGENT_TAVILY_API_KEY) {
@@ -474,11 +543,19 @@ const webBrowsing = {
                 query: query,
               }),
             })
-              .then((res) => res.json())
+              .then((res) => {
+                if (res.ok) return res.json();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_TAVILY_API_KEY, 5), q: query })}`
+                );
+              })
               .then((data) => {
                 return { response: data, error: null };
               })
               .catch((e) => {
+                this.super.handlerProps.log(
+                  `Tavily Search Error: ${e.message}`
+                );
                 return { response: null, error: e.message };
               });
 
@@ -497,10 +574,12 @@ const webBrowsing = {
 
             if (data.length === 0)
               return `No information was found online for the search query.`;
+
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-            return JSON.stringify(data);
+            return result;
           },
           _duckDuckGoEngine: async function (query) {
             this.super.introspect(
@@ -512,15 +591,23 @@ const webBrowsing = {
             const searchURL = new URL("https://html.duckduckgo.com/html");
             searchURL.searchParams.append("q", query);
 
-            const response = await fetch(searchURL.toString());
-
-            if (!response.ok) {
-              return `There was an error searching DuckDuckGo. Status: ${response.status}`;
-            }
+            const response = await fetch(searchURL.toString())
+              .then((res) => {
+                if (res.ok) return res.text();
+                throw new Error(
+                  `${res.status} - ${res.statusText}. params: ${JSON.stringify({ url: searchURL.toString() })}`
+                );
+              })
+              .catch((e) => {
+                this.super.handlerProps.log(
+                  `DuckDuckGo Search Error: ${e.message}`
+                );
+                return null;
+              });
 
-            const html = await response.text();
+            if (!response) return `There was an error searching DuckDuckGo.`;
+            const html = response;
             const data = [];
-
             const results = html.split('<div class="result results_links');
 
             // Skip first element since it's before the first result
@@ -556,11 +643,11 @@ const webBrowsing = {
               return `No information was found online for the search query.`;
             }
 
+            const result = JSON.stringify(data);
             this.super.introspect(
-              `${this.caller}: I found ${data.length} results - looking over them now.`
+              `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
             );
-
-            return JSON.stringify(data);
+            return result;
           },
         });
       },
-- 
GitLab