From 0b665bd1ca059bd9ad41897ce4a4b18d5a8ffab1 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Tue, 19 Mar 2024 11:31:08 +0700
Subject: [PATCH] feat: add wikipedia tool (#648)

Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
---
 .changeset/violet-bags-hide.md           |  5 +++
 examples/agent/wikipedia-tool.ts         | 23 ++++++++++
 packages/core/package.json               |  3 +-
 packages/core/src/tools/WikipediaTool.ts | 54 ++++++++++++++++++++++++
 packages/core/src/tools/index.ts         |  1 +
 packages/edge/package.json               |  7 +--
 packages/edge/scripts/compare-deps.js    | 36 ----------------
 packages/edge/scripts/update-deps.js     | 20 +++++++++
 pnpm-lock.yaml                           | 27 ++++++++++++
 9 files changed, 136 insertions(+), 40 deletions(-)
 create mode 100644 .changeset/violet-bags-hide.md
 create mode 100644 examples/agent/wikipedia-tool.ts
 create mode 100644 packages/core/src/tools/WikipediaTool.ts
 delete mode 100644 packages/edge/scripts/compare-deps.js
 create mode 100644 packages/edge/scripts/update-deps.js

diff --git a/.changeset/violet-bags-hide.md b/.changeset/violet-bags-hide.md
new file mode 100644
index 000000000..11a4ff20f
--- /dev/null
+++ b/.changeset/violet-bags-hide.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+feat: add wikipedia tool
diff --git a/examples/agent/wikipedia-tool.ts b/examples/agent/wikipedia-tool.ts
new file mode 100644
index 000000000..3cb961733
--- /dev/null
+++ b/examples/agent/wikipedia-tool.ts
@@ -0,0 +1,23 @@
+import { OpenAIAgent, WikipediaTool } from "llamaindex";
+
+async function main() {
+  const wikipediaTool = new WikipediaTool();
+
+  // Create an OpenAIAgent with the function tools
+  const agent = new OpenAIAgent({
+    tools: [wikipediaTool],
+    verbose: true,
+  });
+
+  // Chat with the agent
+  const response = await agent.chat({
+    message: "Where is Ho Chi Minh City?",
+  });
+
+  // Print the response
+  console.log(response);
+}
+
+main().then(() => {
+  console.log("Done");
+});
diff --git a/packages/core/package.json b/packages/core/package.json
index a41821927..388fc1dd9 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -41,7 +41,8 @@
     "rake-modified": "^1.0.8",
     "replicate": "^0.25.2",
     "string-strip-html": "^13.4.6",
-    "wink-nlp": "^1.14.3"
+    "wink-nlp": "^1.14.3",
+    "wikipedia": "^2.1.2"
   },
   "devDependencies": {
     "@swc/cli": "^0.3.9",
diff --git a/packages/core/src/tools/WikipediaTool.ts b/packages/core/src/tools/WikipediaTool.ts
new file mode 100644
index 000000000..2764f2726
--- /dev/null
+++ b/packages/core/src/tools/WikipediaTool.ts
@@ -0,0 +1,54 @@
+import { default as wiki } from "wikipedia";
+import type { BaseTool, ToolMetadata } from "../types.js";
+
+export type WikipediaToolParams = {
+  metadata?: ToolMetadata;
+};
+
+type WikipediaCallParams = {
+  query: string;
+  lang?: string;
+};
+
+const DEFAULT_META_DATA: ToolMetadata = {
+  name: "wikipedia_tool",
+  description: "A tool that uses a query engine to search Wikipedia.",
+  parameters: {
+    type: "object",
+    properties: {
+      query: {
+        type: "string",
+        description: "The query to search for",
+      },
+    },
+    required: ["query"],
+  },
+};
+
+export class WikipediaTool implements BaseTool {
+  private readonly DEFAULT_LANG = "en";
+  metadata: ToolMetadata;
+
+  constructor(params?: WikipediaToolParams) {
+    this.metadata = params?.metadata || DEFAULT_META_DATA;
+  }
+
+  async loadData(
+    page: string,
+    lang: string = this.DEFAULT_LANG,
+  ): Promise<string> {
+    wiki.default.setLang(lang);
+    const pageResult = await wiki.default.page(page, { autoSuggest: false });
+    const content = await pageResult.content();
+    return content;
+  }
+
+  async call({
+    query,
+    lang = this.DEFAULT_LANG,
+  }: WikipediaCallParams): Promise<string> {
+    const searchResult = await wiki.default.search(query);
+    if (searchResult.results.length === 0) return "No search results.";
+    return await this.loadData(searchResult.results[0].title, lang);
+  }
+}
diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts
index 32e1bbb9d..2b52c32ed 100644
--- a/packages/core/src/tools/index.ts
+++ b/packages/core/src/tools/index.ts
@@ -1,4 +1,5 @@
 export * from "./QueryEngineTool.js";
+export * from "./WikipediaTool.js";
 export * from "./functionTool.js";
 export * from "./types.js";
 export * from "./utils.js";
diff --git a/packages/edge/package.json b/packages/edge/package.json
index f9e26122f..8f9918caf 100644
--- a/packages/edge/package.json
+++ b/packages/edge/package.json
@@ -40,7 +40,8 @@
     "rake-modified": "^1.0.8",
     "replicate": "^0.25.2",
     "string-strip-html": "^13.4.6",
-    "wink-nlp": "^1.14.3"
+    "wink-nlp": "^1.14.3",
+    "wikipedia": "^2.1.2"
   },
   "engines": {
     "node": ">=18.0.0"
@@ -77,8 +78,8 @@
   },
   "scripts": {
     "copy": "cp -r ../../README.md ../../LICENSE .",
-    "compare": "node scripts/compare-deps.js",
+    "update:deps": "node scripts/update-deps.js",
     "build:core": "pnpm --filter llamaindex build && cp -r ../core/dist . && rm -fr dist/cjs",
-    "build": "pnpm run compare && pnpm run build:core && pnpm copy"
+    "build": "pnpm run update:deps && pnpm run build:core && pnpm copy"
   }
 }
diff --git a/packages/edge/scripts/compare-deps.js b/packages/edge/scripts/compare-deps.js
deleted file mode 100644
index 738058c8c..000000000
--- a/packages/edge/scripts/compare-deps.js
+++ /dev/null
@@ -1,36 +0,0 @@
-import fs from "node:fs";
-import path from "node:path";
-
-const corePackage = readJson(
-  path.join(process.cwd(), "..", "core", "package.json"),
-);
-const edgePackage = readJson(path.join(process.cwd(), "package.json"));
-
-if (!equalObjs(corePackage.dependencies, edgePackage.dependencies)) {
-  console.log(
-    "Dependencies of '@llamaindex/edge' and 'core' package are not the same. Sync dependencies and run build again",
-  );
-  process.exit(1);
-}
-
-function readJson(filePath) {
-  const content = fs.readFileSync(filePath, "utf8");
-  return JSON.parse(content);
-}
-
-function equalObjs(deps1, deps2) {
-  const keys1 = Object.keys(deps1);
-  const keys2 = Object.keys(deps2);
-
-  if (keys1.length !== keys2.length) {
-    return false;
-  }
-
-  for (const key of keys1) {
-    if (deps1[key] !== deps2[key]) {
-      return false;
-    }
-  }
-
-  return true;
-}
diff --git a/packages/edge/scripts/update-deps.js b/packages/edge/scripts/update-deps.js
new file mode 100644
index 000000000..cb8f6d866
--- /dev/null
+++ b/packages/edge/scripts/update-deps.js
@@ -0,0 +1,20 @@
+// Copies the dependencies from the core package to the edge package. Run with each build to ensure that they are the same
+
+import fs from "node:fs";
+import path from "node:path";
+
+const corePackagePath = path.join(process.cwd(), "..", "core", "package.json");
+const edgePackagePath = path.join(process.cwd(), "package.json");
+
+const edgePackage = readJson(edgePackagePath);
+edgePackage.dependencies = readJson(corePackagePath).dependencies;
+writeJson(edgePackagePath, edgePackage);
+
+function readJson(filePath) {
+  const content = fs.readFileSync(filePath, "utf8");
+  return JSON.parse(content);
+}
+
+function writeJson(filePath, json) {
+  fs.writeFileSync(filePath, JSON.stringify(json, null, 2) + "\n");
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 430efc9eb..8dd88f9d1 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -276,6 +276,9 @@ importers:
       string-strip-html:
         specifier: ^13.4.6
         version: 13.4.6
+      wikipedia:
+        specifier: ^2.1.2
+        version: 2.1.2
       wink-nlp:
         specifier: ^1.14.3
         version: 1.14.3
@@ -418,6 +421,9 @@ importers:
       string-strip-html:
         specifier: ^13.4.6
         version: 13.4.6
+      wikipedia:
+        specifier: ^2.1.2
+        version: 2.1.2
       wink-nlp:
         specifier: ^1.14.3
         version: 1.14.3
@@ -5913,6 +5919,11 @@ packages:
       quick-lru: 4.0.1
     dev: true
 
+  /camelcase@4.1.0:
+    resolution: {integrity: sha512-FxAv7HpHrXbh3aPo4o2qxHay2lkLY3x5Mw3KeE4KQE8ysVfziWeRZDwcjauvwBSGEC/nXUPzZy8zeh4HokqOnw==}
+    engines: {node: '>=4'}
+    dev: false
+
   /camelcase@5.3.1:
     resolution: {integrity: sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==}
     engines: {node: '>=6'}
@@ -9264,6 +9275,12 @@ packages:
       once: 1.4.0
       wrappy: 1.0.2
 
+  /infobox-parser@3.6.4:
+    resolution: {integrity: sha512-d2lTlxKZX7WsYxk9/UPt51nkmZv5tbC75SSw4hfHqZ3LpRAn6ug0oru9xI2X+S78va3aUAze3xl/UqMuwLmJUw==}
+    dependencies:
+      camelcase: 4.1.0
+    dev: false
+
   /inherits@2.0.3:
     resolution: {integrity: sha512-x00IRNXNy63jwGkJmzPigoySHbaqpNuzKbBOmzK+g2OdZpQ9w+sxCN+VSB3ja7IAge2OP2qpfxTjeNcyjmW1uw==}
 
@@ -15650,6 +15667,16 @@ packages:
     dependencies:
       string-width: 5.1.2
 
+  /wikipedia@2.1.2:
+    resolution: {integrity: sha512-RAYaMpXC9/E873RaSEtlEa8dXK4e0p5k98GKOd210MtkE5emm6fcnwD+N6ZA4cuffjDWagvhaQKtp/mGp2BOVQ==}
+    engines: {node: '>=10'}
+    dependencies:
+      axios: 1.6.7
+      infobox-parser: 3.6.4
+    transitivePeerDependencies:
+      - debug
+    dev: false
+
   /wildcard@2.0.1:
     resolution: {integrity: sha512-CC1bOL87PIWSBhDcTrdeLo6eGT7mCFtrg0uIJtqJUFyK+eJnzl8A1niH56uu7KMa5XFrtiV+AQuHO3n7DsHnLQ==}
 
-- 
GitLab