Skip to content
Snippets Groups Projects
Commit 1e3d82e1 authored by timothycarambat's avatar timothycarambat
Browse files

patch collector script

parent c5dc6863
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ from bs4 import BeautifulSoup
# Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/
def link():
totalTokens = 0
print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n")
fqdn_link = input("Paste in the URL of an online article or blog: ")
if(len(fqdn_link) == 0):
......@@ -33,6 +34,7 @@ def link():
link = append_meta(req, full_text, True)
if(len(full_text) > 0):
totalTokens += len(tokenize(full_text))
source = urlparse(req.url)
output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json"
output_path = f"./outputs/website-logs"
......@@ -58,7 +60,7 @@ def link():
print(f"\n\n[Success]: article or link content fetched!")
print(f"////////////////////////////")
print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(tokenCount)} using {tokenCount} tokens.")
print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(totalTokens)} using {totalTokens} tokens.")
print(f"////////////////////////////")
exit(0)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment