Skip to content
Snippets Groups Projects
Commit 73258344 authored by “Daniel Griffiths”'s avatar “Daniel Griffiths”
Browse files

removed none types for mypy

parent a8c64a67
No related branches found
No related tags found
No related merge requests found
...@@ -7,16 +7,16 @@ import string ...@@ -7,16 +7,16 @@ import string
class TfidfEncoder(BaseEncoder): class TfidfEncoder(BaseEncoder):
idf: dict | None = None idf: np.ndarray
word_index: dict | None = None word_index: dict
def __init__(self, name: str = "tfidf"): def __init__(self, name: str = "tfidf"):
super().__init__(name=name) super().__init__(name=name)
self.word_index = None self.word_index = {}
self.idf = None self.idf = np.array([])
def __call__(self, docs: list[str]) -> list[list[float]]: def __call__(self, docs: list[str]) -> list[list[float]]:
if self.word_index is None or self.idf is None: if len(self.word_index) == 0 or self.idf.size == 0:
raise ValueError("Vectorizer is not initialized.") raise ValueError("Vectorizer is not initialized.")
if len(docs) == 0: if len(docs) == 0:
raise ValueError("No documents to encode.") raise ValueError("No documents to encode.")
...@@ -43,6 +43,8 @@ class TfidfEncoder(BaseEncoder): ...@@ -43,6 +43,8 @@ class TfidfEncoder(BaseEncoder):
return word_index return word_index
def _compute_tf(self, docs: list[str]) -> np.ndarray: def _compute_tf(self, docs: list[str]) -> np.ndarray:
if len(self.word_index) == 0:
raise ValueError("Word index is not initialized.")
tf = np.zeros((len(docs), len(self.word_index))) tf = np.zeros((len(docs), len(self.word_index)))
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
word_counts = Counter(doc.split()) word_counts = Counter(doc.split())
...@@ -54,6 +56,8 @@ class TfidfEncoder(BaseEncoder): ...@@ -54,6 +56,8 @@ class TfidfEncoder(BaseEncoder):
return tf return tf
def _compute_idf(self, docs: list[str]) -> np.ndarray: def _compute_idf(self, docs: list[str]) -> np.ndarray:
if len(self.word_index) == 0:
raise ValueError("Word index is not initialized.")
idf = np.zeros(len(self.word_index)) idf = np.zeros(len(self.word_index))
for doc in docs: for doc in docs:
words = set(doc.split()) words = set(doc.split())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment