From be3b0b491676da816f38daa007060837eab08333 Mon Sep 17 00:00:00 2001 From: Sean Hatfield <seanhatfield5@gmail.com> Date: Tue, 6 Aug 2024 10:16:17 -0700 Subject: [PATCH] Youtube loader whitespace fix (#2051) youtube loader whitespace fix --- .../YoutubeTranscript/YoutubeLoader/youtube-transcript.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js b/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js index c81c0ec56..f868875b2 100644 --- a/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js +++ b/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js @@ -47,10 +47,12 @@ class YoutubeTranscript { let transcript = ""; const chunks = transcriptXML.getElementsByTagName("text"); for (const chunk of chunks) { - transcript += chunk.textContent; + // Add space after each text chunk + transcript += chunk.textContent + " "; } - return transcript; + // Trim extra whitespace + return transcript.trim().replace(/\s+/g, " "); } catch (e) { throw new YoutubeTranscriptError(e); } -- GitLab