diff --git a/collector/extensions/index.js b/collector/extensions/index.js index 7b131b6469ba2e91eb1bab6f78f9ac8ae87acb73..bcf2229f2de91a7c0ed72b1215c436421e2550d7 100644 --- a/collector/extensions/index.js +++ b/collector/extensions/index.js @@ -47,6 +47,25 @@ function extensions(app) { } return; }); + + app.post("/ext/youtube-transcript", async function (request, response) { + try { + const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); + const { success, reason, data } = await loadYouTubeTranscript(reqBody(request)); + response.status(200).json({ success, reason, data }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + title: null, + author: null + } + }); + } + return; + }); } module.exports = extensions; diff --git a/collector/package.json b/collector/package.json index fb9bed67ac7165edc2bd862ebc874d9ee6976f7f..0e81b72a533188908c5fbae9393bc64d2a83abb4 100644 --- a/collector/package.json +++ b/collector/package.json @@ -38,7 +38,9 @@ "slugify": "^1.6.6", "url-pattern": "^1.0.3", "uuid": "^9.0.0", - "wavefile": "^11.0.0" + "wavefile": "^11.0.0", + "youtube-transcript": "^1.0.6", + "youtubei.js": "^8.0.0" }, "devDependencies": { "nodemon": "^2.0.22", diff --git a/collector/utils/extensions/YoutubeTranscript/index.js b/collector/utils/extensions/YoutubeTranscript/index.js new file mode 100644 index 0000000000000000000000000000000000000000..7e88bb7a02695d47d4d801412a84d7079c087801 --- /dev/null +++ b/collector/utils/extensions/YoutubeTranscript/index.js @@ -0,0 +1,95 @@ +const { YoutubeLoader } = require("langchain/document_loaders/web/youtube"); +const fs = require("fs"); +const path = require("path"); +const { default: slugify } = require("slugify"); +const { v4 } = require("uuid"); +const { writeToServerDocuments } = require("../../files"); +const { tokenizeString } = require("../../tokenizer"); + +function validYoutubeVideoUrl(url) { + const UrlPattern = require("url-pattern"); + + const shortPatternMatch = new UrlPattern( + "https\\://youtu.be/(:videoId)" + ).match(url); + const fullPatternMatch = new UrlPattern( + "https\\://(www.)youtube.com/watch?v=(:videoId)" + ).match(url); + const videoId = + shortPatternMatch?.videoId || fullPatternMatch?.videoId || null; + if (!!videoId) return true; + + return false; +} + +async function loadYouTubeTranscript({ url }) { + if (!validYoutubeVideoUrl(url)) { + return { + success: false, + reason: "Invalid URL. Should be youtu.be or youtube.com/watch.", + }; + } + + console.log(`-- Working YouTube ${url} --`); + const loader = YoutubeLoader.createFromUrl(url, { addVideoInfo: true }); + const docs = await loader.load(); + + if (!docs.length) { + return { + success: false, + reason: "No transcript found for that YouTube video.", + }; + } + + const metadata = docs[0].metadata; + let content = ""; + docs.forEach((doc) => (content = content.concat(doc.pageContent))); + + if (!content.length) { + return { + success: false, + reason: "No transcript could be parsed for that YouTube video.", + }; + } + + const outFolder = slugify( + `${metadata.author} YouTube transcripts` + ).toLowerCase(); + const outFolderPath = path.resolve( + __dirname, + `../../../../server/storage/documents/${outFolder}` + ); + if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath); + + const data = { + id: v4(), + url: url + ".youtube", + title: metadata.title || url, + docAuthor: metadata.author, + description: metadata.description, + docSource: url, + chunkSource: url, + published: new Date().toLocaleString(), + wordCount: content.split(" ").length, + pageContent: content, + token_count_estimate: tokenizeString(content).length, + }; + + console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`); + writeToServerDocuments( + data, + `${slugify(metadata.title)}-${data.id}`, + outFolderPath + ); + + return { + success: true, + reason: "test", + data: { + title: metadata.title, + author: metadata.author, + }, + }; +} + +module.exports = loadYouTubeTranscript; diff --git a/collector/yarn.lock b/collector/yarn.lock index 28c610926801dfa0a4d9ad888eb9acd0131a22c9..6501aac95794a40f5af6bd5194407c48a5bd3958 100644 --- a/collector/yarn.lock +++ b/collector/yarn.lock @@ -39,6 +39,11 @@ chalk "^2.4.2" js-tokens "^4.0.0" +"@fastify/busboy@^2.0.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@fastify/busboy/-/busboy-2.1.0.tgz#0709e9f4cb252351c609c6e6d8d6779a8d25edff" + integrity sha512-+KpH+QxZU7O4675t3mnkQKcZZg56u+K/Ct2K+N2AZYNVK8kyeo/bI18tI8aPm3tvNNRyTWfj6s5tnGNlcbQRsA== + "@googleapis/youtube@^9.0.0": version "9.0.0" resolved "https://registry.yarnpkg.com/@googleapis/youtube/-/youtube-9.0.0.tgz#e45f6f5f7eac198c6391782b94b3ca54bacf0b63" @@ -252,6 +257,11 @@ accepts@~1.3.8: mime-types "~2.1.34" negotiator "0.6.3" +acorn@^8.8.0: + version "8.11.2" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.2.tgz#ca0d78b51895be5390a5903c5b3bdcdaf78ae40b" + integrity sha512-nc0Axzp/0FILLEVsm4fNwLCwMttvhEI263QtVPQcbpfZZ3ts0hLsZGOpE6czNlid7CJ9MlyH8reXkpsf3YUY4w== + agent-base@6: version "6.0.2" resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77" @@ -554,6 +564,11 @@ camelcase@6: resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.3.0.tgz#5685b95eb209ac9c0c177467778c9c84df58ba9a" integrity sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA== +centra@^2.6.0: + version "2.6.0" + resolved "https://registry.yarnpkg.com/centra/-/centra-2.6.0.tgz#79117998ee6908642258db263871381aa5d1204a" + integrity sha512-dgh+YleemrT8u85QL11Z6tYhegAs3MMxsaWAq/oXeAmYJ7VxL3SI9TZtnfaEvNDMAPolj25FXIb3S+HCI4wQaQ== + chalk@^2.4.2: version "2.4.2" resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424" @@ -1655,6 +1670,13 @@ isexe@^2.0.0: resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== +jintr@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/jintr/-/jintr-1.1.0.tgz#223a3b07f5e03d410cec6e715c537c8ad1e714c3" + integrity sha512-Tu9wk3BpN2v+kb8yT6YBtue+/nbjeLFv4vvVC4PJ7oCidHKbifWhvORrAbQfxVIQZG+67am/mDagpiGSVtvrZg== + dependencies: + acorn "^8.8.0" + js-tiktoken@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.7.tgz#56933fcd2093e8304060dfde3071bda91812e6f5" @@ -2431,6 +2453,13 @@ pend@~1.2.0: resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" integrity sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg== +phin@^3.5.0: + version "3.7.0" + resolved "https://registry.yarnpkg.com/phin/-/phin-3.7.0.tgz#eeeff7660408515d8cf0c6252901012d4ab7153b" + integrity sha512-DqnVNrpYhKGBZppNKprD+UJylMeEKOZxHgPB+ZP6mGzf3uA2uox4Ep9tUm+rUc8WLIdHT3HcAE4X8fhwQA9JKg== + dependencies: + centra "^2.6.0" + picomatch@^2.0.4, picomatch@^2.2.1: version "2.3.1" resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" @@ -3069,7 +3098,7 @@ tr46@~0.0.3: resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== -tslib@^2.0.1: +tslib@^2.0.1, tslib@^2.5.0: version "2.6.2" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== @@ -3122,6 +3151,13 @@ undici-types@~5.26.4: resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617" integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA== +undici@^5.19.1: + version "5.28.2" + resolved "https://registry.yarnpkg.com/undici/-/undici-5.28.2.tgz#fea200eac65fc7ecaff80a023d1a0543423b4c91" + integrity sha512-wh1pHJHnUeQV5Xa8/kyQhO7WFa8M34l026L5P/+2TYiakvGy5Rdc8jWZVyG7ieht/0WgJLEd3kcU5gKx+6GC8w== + dependencies: + "@fastify/busboy" "^2.0.0" + universalify@^0.1.0: version "0.1.2" resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" @@ -3279,6 +3315,22 @@ yauzl@^2.10.0, yauzl@^2.4.2: buffer-crc32 "~0.2.3" fd-slicer "~1.1.0" +youtube-transcript@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/youtube-transcript/-/youtube-transcript-1.0.6.tgz#8414c04380d3ef1102bd00ca3729e94c46ae7a14" + integrity sha512-k/6uxB9voj/5astl6+q+VArX/aWHhnmle8BucvUCTYTQQEOSVlBiXkrI0KD3o8A0b44MV6q0bmVNiJFIpTlcZA== + dependencies: + phin "^3.5.0" + +youtubei.js@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/youtubei.js/-/youtubei.js-8.0.0.tgz#0fcbe332e263d9be6afe4e3d1917e9ddc1ffbed3" + integrity sha512-kUwHvqoB5vfaGaY1quAGcX5JPIyjr5fjj9Zj/ZwUDCrermz/r5uIkNiJ5cNHkmAJbZP9fdygzNMvGHd7fM445g== + dependencies: + jintr "^1.1.0" + tslib "^2.5.0" + undici "^5.19.1" + zod-to-json-schema@3.20.3: version "3.20.3" resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.20.3.tgz#8c95d8c20f20455ffa0b4b526c29703f35f6d787" diff --git a/frontend/src/components/DataConnectorOption/index.jsx b/frontend/src/components/DataConnectorOption/index.jsx index 84af0ff1e1c93df4cef287492820c17159b5fbef..df7fad0f60124e772dde052d725d3e463752c605 100644 --- a/frontend/src/components/DataConnectorOption/index.jsx +++ b/frontend/src/components/DataConnectorOption/index.jsx @@ -36,4 +36,12 @@ export const DATA_CONNECTORS = { "Import an entire public or private Github repository in a single click.", link: "https://github.com", }, + "youtube-transcript": { + name: "YouTube Transcript", + path: paths.settings.dataConnectors.youtubeTranscript(), + image: ConnectorImages.youtube, + description: + "Import the transcription of an entire YouTube video from a link.", + link: "https://youtube.com", + }, }; diff --git a/frontend/src/components/DataConnectorOption/media/index.js b/frontend/src/components/DataConnectorOption/media/index.js index a339328ef45ce9aa24cbbaa1a75dfcd2a0798bf3..b3bacc1de878a54188b89e9609b6f4f4dad5940c 100644 --- a/frontend/src/components/DataConnectorOption/media/index.js +++ b/frontend/src/components/DataConnectorOption/media/index.js @@ -1,5 +1,9 @@ import Github from "./github.png"; +import YouTube from "./youtube.png"; + const ConnectorImages = { github: Github, + youtube: YouTube, }; + export default ConnectorImages; diff --git a/frontend/src/components/DataConnectorOption/media/youtube.png b/frontend/src/components/DataConnectorOption/media/youtube.png new file mode 100644 index 0000000000000000000000000000000000000000..aed2b0475cafadf03769b671baeeeaa58581a68f Binary files /dev/null and b/frontend/src/components/DataConnectorOption/media/youtube.png differ diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FileRow/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FileRow/index.jsx index f83a9e34c4f2ad043c175e1aab3161aa5752e090..cd695dfcfc4e897324cbd7cb43cb2a840d6ff171 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FileRow/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FileRow/index.jsx @@ -60,16 +60,19 @@ export default function FileRow({ selected ? "bg-sky-500/20" : "" } ${expanded ? "bg-sky-500/10" : ""}`}`} > - <div className="pl-4 col-span-4 flex gap-x-[4px] items-center"> + <div className="pl-2 col-span-6 flex gap-x-[4px] items-center"> <div - className="w-3 h-3 rounded border-[1px] border-white flex justify-center items-center cursor-pointer" + className="shrink-0 w-3 h-3 rounded border-[1px] border-white flex justify-center items-center cursor-pointer" role="checkbox" aria-checked={selected} tabIndex={0} > {selected && <div className="w-2 h-2 bg-white rounded-[2px]" />} </div> - <File className="text-base font-bold w-4 h-4 mr-[3px]" weight="fill" /> + <File + className="shrink-0 text-base font-bold w-4 h-4 mr-[3px]" + weight="fill" + /> <div className="relative" onMouseEnter={handleMouseEnter} @@ -88,7 +91,6 @@ export default function FileRow({ <p className="col-span-2 pl-3.5 whitespace-nowrap"> {formatDate(item?.published)} </p> - <p className="col-span-2 pl-3">{item?.size || "---"}</p> <p className="col-span-2 pl-2 uppercase">{getFileExtension(item.url)}</p> <div className="col-span-2 flex justify-end items-center"> {item?.cached && ( diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FolderRow/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FolderRow/index.jsx index c93a45cd32ffbed6412d0cb40a5e6f1d1dc74bbc..5b7f1be393b63e40300f01043604b8b83a890cb7 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FolderRow/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/FolderRow/index.jsx @@ -53,7 +53,7 @@ export default function FolderRow({ selected ? "bg-sky-500/20" : "" }`} > - <div className="col-span-4 flex gap-x-[4px] items-center"> + <div className="col-span-6 flex gap-x-[4px] items-center"> <div className="shrink-0 w-3 h-3 rounded border-[1px] border-white flex justify-center items-center cursor-pointer" role="checkbox" @@ -79,7 +79,6 @@ export default function FolderRow({ </p> </div> <p className="col-span-2 pl-3.5" /> - <p className="col-span-2 pl-3" /> <p className="col-span-2 pl-2" /> <div className="col-span-2 flex justify-end items-center"> {item.name !== "custom-documents" && ( diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx index dcf625c5e0321e48bfcaaaebe2f8d01b50f6f739..1dd83de9aaefd4453db77be9970348865cec2bb0 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx @@ -71,9 +71,8 @@ export default function Directory({ <div className="relative w-[560px] h-[310px] bg-zinc-900 rounded-2xl"> <div className="rounded-t-2xl text-white/80 text-xs grid grid-cols-12 py-2 px-8 border-b border-white/20 shadow-lg bg-zinc-900 sticky top-0 z-10"> - <p className="col-span-4">Name</p> + <p className="col-span-6">Name</p> <p className="col-span-2">Date</p> - <p className="col-span-2">Size</p> <p className="col-span-2">Kind</p> <p className="col-span-2">Cached</p> </div> diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx index da75ec02fe93731d875bf58aebcf84d33e679c71..ceb75155880f3c7278f5ac0e8c97aa4be5bd34a8 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx @@ -54,7 +54,7 @@ export default function WorkspaceFileRow({ className={`items-center transition-all duration-200 text-white/80 text-xs grid grid-cols-12 py-2 pl-3.5 pr-8 border-b border-white/20 hover:bg-sky-500/20 cursor-pointer ${isMovedItem ? "bg-green-800/40" : ""}`} > - <div className="col-span-4 flex gap-x-[4px] items-center"> + <div className="col-span-6 flex gap-x-[4px] items-center"> <File className="text-base font-bold w-4 h-4 ml-3 mr-[3px]" weight="fill" @@ -77,7 +77,6 @@ export default function WorkspaceFileRow({ <p className="col-span-2 pl-3.5 whitespace-nowrap"> {formatDate(item?.published)} </p> - <p className="col-span-2 pl-3">{item?.size || "---"}</p> <p className="col-span-2 pl-2 uppercase">{getFileExtension(item.url)}</p> <div className="col-span-2 flex justify-end items-center"> {item?.cached && ( diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/index.jsx index 9969e844e268bded928bc4b68ae9d937e4ec07f2..e1ec21dd4d393d7c4bc30c8da953bc4930110ec2 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/WorkspaceDirectory/index.jsx @@ -26,9 +26,8 @@ export default function WorkspaceDirectory({ </div> <div className="relative w-[560px] h-[445px] bg-zinc-900 rounded-2xl mt-5"> <div className="text-white/80 text-xs grid grid-cols-12 py-2 px-8 border-b border-white/20"> - <p className="col-span-4">Name</p> + <p className="col-span-6">Name</p> <p className="col-span-2">Date</p> - <p className="col-span-2">Size</p> <p className="col-span-2">Kind</p> <p className="col-span-2">Cached</p> </div> @@ -56,9 +55,8 @@ export default function WorkspaceDirectory({ }`} > <div className="text-white/80 text-xs grid grid-cols-12 py-2 px-8 border-b border-white/20 bg-zinc-900 sticky top-0 z-10"> - <p className="col-span-4">Name</p> + <p className="col-span-6">Name</p> <p className="col-span-2">Date</p> - <p className="col-span-2">Size</p> <p className="col-span-2">Kind</p> <p className="col-span-2">Cached</p> </div> diff --git a/frontend/src/models/dataConnector.js b/frontend/src/models/dataConnector.js index 45d5750243c3fa720968f8bff66bc897d59e0704..e0b3c0c3e36c7a807d85f3a6184b435931b50125 100644 --- a/frontend/src/models/dataConnector.js +++ b/frontend/src/models/dataConnector.js @@ -42,6 +42,24 @@ const DataConnector = { }); }, }, + youtube: { + transcribe: async ({ url }) => { + return await fetch(`${API_BASE}/ext/youtube/transcript`, { + method: "POST", + headers: baseHeaders(), + body: JSON.stringify({ url }), + }) + .then((res) => res.json()) + .then((res) => { + if (!res.success) throw new Error(res.reason); + return { data: res.data, error: null }; + }) + .catch((e) => { + console.error(e); + return { data: null, error: e.message }; + }); + }, + }, }; export default DataConnector; diff --git a/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/Youtube/index.jsx b/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/Youtube/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..5252e3fd20e1cc11ca757dc75b1e557c2bb139b9 --- /dev/null +++ b/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/Youtube/index.jsx @@ -0,0 +1,114 @@ +import React, { useState } from "react"; +import Sidebar, { SidebarMobileHeader } from "@/components/SettingsSidebar"; +import { isMobile } from "react-device-detect"; +import { DATA_CONNECTORS } from "@/components/DataConnectorOption"; +import System from "@/models/system"; +import showToast from "@/utils/toast"; + +export default function YouTubeTranscriptConnectorSetup() { + const { image } = DATA_CONNECTORS["youtube-transcript"]; + const [loading, setLoading] = useState(false); + const handleSubmit = async (e) => { + e.preventDefault(); + const form = new FormData(e.target); + + try { + setLoading(true); + showToast("Fetching transcript for YouTube video.", "info", { + clear: true, + autoClose: false, + }); + const { data, error } = await System.dataConnectors.youtube.transcribe({ + url: form.get("url"), + }); + + if (!!error) { + showToast(error, "error", { clear: true }); + setLoading(false); + return; + } + + showToast( + `${data.title} by ${data.author} transcription completed. Output folder is ${data.destination}.`, + "success", + { clear: true } + ); + e.target.reset(); + setLoading(false); + return; + } catch (e) { + console.error(e); + showToast(e.message, "error", { clear: true }); + setLoading(false); + } + }; + + return ( + <div className="w-screen h-screen overflow-hidden bg-sidebar flex"> + {!isMobile && <Sidebar />} + <div + style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }} + className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[26px] bg-main-gradient w-full h-full overflow-y-scroll border-4 border-accent" + > + {isMobile && <SidebarMobileHeader />} + <div className="flex w-full"> + <div className="flex flex-col w-full px-1 md:px-20 md:py-12 py-16"> + <div className="flex w-full gap-x-4 items-center pb-6 border-white border-b-2 border-opacity-10"> + <img src={image} alt="YouTube" className="rounded-lg h-16 w-16" /> + <div className="w-full flex flex-col gap-y-1"> + <div className="items-center flex gap-x-4"> + <p className="text-2xl font-semibold text-white"> + Import YouTube transcription + </p> + </div> + <p className="text-sm font-base text-white text-opacity-60"> + From a youtube link, import the entire transcript of that + video for embedding. + </p> + </div> + </div> + + <form className="w-full" onSubmit={handleSubmit}> + <div className="w-full flex flex-col py-2"> + <div className="w-full flex items-center gap-4"> + <div className="flex flex-col w-60"> + <div className="flex flex-col gap-y-1 mb-4"> + <label className="text-white text-sm font-semibold block"> + YouTube video URL + </label> + </div> + <input + type="url" + name="url" + className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="https://youtube.com/watch?v=abc123" + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + </div> + </div> + + <div className="flex flex-col gap-y-2 w-fit"> + <button + type="submit" + disabled={loading} + className="mt-2 text-lg w-fit border border-slate-200 px-4 py-1 rounded-lg text-slate-200 items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800 disabled:bg-slate-200 disabled:text-slate-800" + > + {loading ? "Collecting transcript..." : "Collect transcript"} + </button> + {loading && ( + <p className="text-xs text-zinc-300"> + Once complete, the transcription will be available for + embedding into workspaces in the document picker. + </p> + )} + </div> + </form> + </div> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/index.jsx b/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/index.jsx index cbd66f08a7222709b3a02c3ad45968682a59a313..edb6aae0700473bcc9447788d6bc8d529d18b27a 100644 --- a/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/index.jsx +++ b/frontend/src/pages/GeneralSettings/DataConnectors/Connectors/index.jsx @@ -2,9 +2,11 @@ import paths from "@/utils/paths"; import { lazy } from "react"; import { useParams } from "react-router-dom"; const Github = lazy(() => import("./Github")); +const YouTubeTranscript = lazy(() => import("./Youtube")); const CONNECTORS = { github: Github, + "youtube-transcript": YouTubeTranscript, }; export default function DataConnectorSetup() { diff --git a/frontend/src/pages/GeneralSettings/DataConnectors/index.jsx b/frontend/src/pages/GeneralSettings/DataConnectors/index.jsx index 76dc13d0a1fd3ae24871dd61421cba894d05f2b1..50f4298ed48b33b1f546b75246a39fedec906021 100644 --- a/frontend/src/pages/GeneralSettings/DataConnectors/index.jsx +++ b/frontend/src/pages/GeneralSettings/DataConnectors/index.jsx @@ -29,6 +29,7 @@ export default function DataConnectors() { </div> <div className="py-4 w-full flex md:flex-wrap overflow-x-scroll gap-4 max-w-full"> <DataConnectorOption slug="github" /> + <DataConnectorOption slug="youtube-transcript" /> </div> </div> </div> diff --git a/frontend/src/utils/directories.js b/frontend/src/utils/directories.js index b2a1d493fb3de866acbb6fb8ac4a2662e4ad333e..5a65b5336794fc62ffbcfc4e0f7458c802bc79c2 100644 --- a/frontend/src/utils/directories.js +++ b/frontend/src/utils/directories.js @@ -13,7 +13,7 @@ export function getFileExtension(path) { export function middleTruncate(str, n) { const fileExtensionPattern = /([^.]*)$/; - const extensionMatch = str.match(fileExtensionPattern); + const extensionMatch = str.includes(".") && str.match(fileExtensionPattern); if (str.length <= n) return str; diff --git a/frontend/src/utils/paths.js b/frontend/src/utils/paths.js index c21c1500b7d6905a46245730c9c2d1a1d0ace8c9..2812878fb47480b0ae7743cae3e5947106c28055 100644 --- a/frontend/src/utils/paths.js +++ b/frontend/src/utils/paths.js @@ -83,6 +83,9 @@ export default { github: () => { return "/settings/data-connectors/github"; }, + youtubeTranscript: () => { + return "/settings/data-connectors/youtube-transcript"; + }, }, }, }; diff --git a/server/endpoints/extensions/index.js b/server/endpoints/extensions/index.js index fc545ce3c97134e9703e6d933d3b495af64be29e..1b3770374b02edf1acb49d6dea54572d8ed98025 100644 --- a/server/endpoints/extensions/index.js +++ b/server/endpoints/extensions/index.js @@ -48,6 +48,27 @@ function extensionEndpoints(app) { } } ); + + app.post( + "/ext/youtube/transcript", + [validatedRequest, flexUserRoleValid], + async (request, response) => { + try { + const responseFromProcessor = await forwardExtensionRequest({ + endpoint: "/ext/youtube-transcript", + method: "POST", + body: request.body, + }); + await Telemetry.sendTelemetry("extension_invoked", { + type: "youtube_transcript", + }); + response.status(200).json(responseFromProcessor); + } catch (e) { + console.error(e); + response.sendStatus(500).end(); + } + } + ); } module.exports = { extensionEndpoints };