Skip to content
Snippets Groups Projects
Unverified Commit c6dbd6ea authored by Diicell's avatar Diicell Committed by GitHub
Browse files

TelegramReader fixes not transferred from llama-hub (#10625)

fixes from llama-hub
parent 4220407a
Branches
Tags
No related merge requests found
......@@ -31,7 +31,7 @@ If the `.session` file already existed, it will not login again, so be aware of
To use this loader, you simply need to pass in a entity name.
```python
from llama_index import download_loader
from llama_index.core import download_loader
TelegramReader = download_loader("TelegramReader")
loader = TelegramReader(
......
"""Telegram reader that reads posts/chats and comments to post from Telegram channel or chat."""
import asyncio
import re
from typing import List, Union
from llama_index.core.readers.base import BaseReader
......@@ -47,7 +48,8 @@ class TelegramReader(BaseReader):
self.api_id = api_id
self.api_hash = api_hash
self.phone_number = phone_number
self.loop = asyncio.get_event_loop()
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
def load_data(
self,
......@@ -101,5 +103,15 @@ class TelegramReader(BaseReader):
entity_name, reply_to=post_id, limit=limit
):
if isinstance(message.text, str) and message.text != "":
results.append(Document(text=message.text))
results.append(Document(text=self._remove_links(message.text)))
return results
def _remove_links(self, string) -> str:
"""Removes all URLs from a given string, leaving only the base domain name."""
def replace_match(match):
text = match.group(1)
return text if text else ""
url_pattern = r"https?://(?:www\.)?((?!www\.).)+?"
return re.sub(url_pattern, replace_match, string)
......@@ -14,7 +14,7 @@ ignore_missing_imports = true
python_version = "3.8"
[tool.poetry]
authors = ["Your Name <you@example.com>"]
authors = ["Dias Kalkamanov <diicellman@gmail.com>"]
description = "llama-index readers telegram integration"
license = "MIT"
name = "llama-index-readers-telegram"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment