patch collector script

1e3d82e1 · timothycarambat · c5dc6863 · 1e3d82e1
Commit 1e3d82e1 authored 1 year ago by timothycarambat
--- a/collector/scripts/link.py
+++ b/collector/scripts/link.py
@@ -9,6 +9,7 @@ from bs4 import BeautifulSoup
    
 # Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/
 def link():
+  totalTokens = 0
  print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n")
  fqdn_link = input("Paste in the URL of an online article or blog: ")
  if(len(fqdn_link) == 0):
@@ -33,6 +34,7 @@ def link():
  
  link = append_meta(req, full_text, True)
  if(len(full_text) > 0):
+    totalTokens += len(tokenize(full_text))
    source = urlparse(req.url)
    output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json"
    output_path = f"./outputs/website-logs"
@@ -58,7 +60,7 @@ def link():

  print(f"\n\n[Success]: article or link content fetched!")
  print(f"////////////////////////////")
-  print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(tokenCount)} using {tokenCount} tokens.")
+  print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(totalTokens)} using {totalTokens} tokens.")
  print(f"////////////////////////////")
  exit(0)