From 1e3d82e18485b60173362dd5c7c248abe0581f36 Mon Sep 17 00:00:00 2001
From: timothycarambat <rambat1010@gmail.com>
Date: Thu, 16 Nov 2023 10:25:23 -0800
Subject: [PATCH] patch collector script

---
 collector/scripts/link.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/collector/scripts/link.py b/collector/scripts/link.py
index 6ee3f23ea..cc24a5cae 100644
--- a/collector/scripts/link.py
+++ b/collector/scripts/link.py
@@ -9,6 +9,7 @@ from bs4 import BeautifulSoup
     
 # Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/
 def link():
+  totalTokens = 0
   print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n")
   fqdn_link = input("Paste in the URL of an online article or blog: ")
   if(len(fqdn_link) == 0):
@@ -33,6 +34,7 @@ def link():
   
   link = append_meta(req, full_text, True)
   if(len(full_text) > 0):
+    totalTokens += len(tokenize(full_text))
     source = urlparse(req.url)
     output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json"
     output_path = f"./outputs/website-logs"
@@ -58,7 +60,7 @@ def link():
 
   print(f"\n\n[Success]: article or link content fetched!")
   print(f"////////////////////////////")
-  print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(tokenCount)} using {tokenCount} tokens.")
+  print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(totalTokens)} using {totalTokens} tokens.")
   print(f"////////////////////////////")
   exit(0)
 
-- 
GitLab