Skip to content
Snippets Groups Projects
Commit f94529e0 authored by “Daniel Griffiths”'s avatar “Daniel Griffiths”
Browse files

fixed: removed none types for mypy

parent c340e089
No related branches found
No related tags found
No related merge requests found
...@@ -7,16 +7,16 @@ import string ...@@ -7,16 +7,16 @@ import string
class TfidfEncoder(BaseEncoder): class TfidfEncoder(BaseEncoder):
idf: dict | None = None idf: np.ndarray
word_index: dict | None = None word_index: dict
def __init__(self, name: str = "tfidf"): def __init__(self, name: str = "tfidf"):
super().__init__(name=name) super().__init__(name=name)
self.word_index = None self.word_index = {}
self.idf = None self.idf = np.array([])
def __call__(self, docs: list[str]) -> list[list[float]]: def __call__(self, docs: list[str]) -> list[list[float]]:
if self.word_index is None or self.idf is None: if len(self.word_index) == 0 or self.idf.size == 0:
raise ValueError("Vectorizer is not initialized.") raise ValueError("Vectorizer is not initialized.")
if len(docs) == 0: if len(docs) == 0:
raise ValueError("No documents to encode.") raise ValueError("No documents to encode.")
...@@ -43,6 +43,8 @@ class TfidfEncoder(BaseEncoder): ...@@ -43,6 +43,8 @@ class TfidfEncoder(BaseEncoder):
return word_index return word_index
def _compute_tf(self, docs: list[str]) -> np.ndarray: def _compute_tf(self, docs: list[str]) -> np.ndarray:
if len(self.word_index) == 0:
raise ValueError("Word index is not initialized.")
tf = np.zeros((len(docs), len(self.word_index))) tf = np.zeros((len(docs), len(self.word_index)))
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
word_counts = Counter(doc.split()) word_counts = Counter(doc.split())
...@@ -54,6 +56,8 @@ class TfidfEncoder(BaseEncoder): ...@@ -54,6 +56,8 @@ class TfidfEncoder(BaseEncoder):
return tf return tf
def _compute_idf(self, docs: list[str]) -> np.ndarray: def _compute_idf(self, docs: list[str]) -> np.ndarray:
if len(self.word_index) == 0:
raise ValueError("Word index is not initialized.")
idf = np.zeros(len(self.word_index)) idf = np.zeros(len(self.word_index))
for doc in docs: for doc in docs:
words = set(doc.split()) words = set(doc.split())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment