From 138d540b9adc3f141851a76cbdcda176d8567496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDaniel=20Griffiths=E2=80=9D?= <Danielgriffiths1790@gmail.com> Date: Mon, 8 Jan 2024 15:34:30 +0000 Subject: [PATCH] feat: added more tfidf tests --- tests/unit/encoders/test_tfidf.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/unit/encoders/test_tfidf.py b/tests/unit/encoders/test_tfidf.py index 5aa8dfc8..21524c91 100644 --- a/tests/unit/encoders/test_tfidf.py +++ b/tests/unit/encoders/test_tfidf.py @@ -39,7 +39,7 @@ class TestTfidfEncoder: isinstance(sublist, list) for sublist in result ), "Each item in result should be a list" - def test_call_method_no_docs(self, tfidf_encoder): + def test_call_method_no_docs_tfidf(self, tfidf_encoder): with pytest.raises(ValueError): tfidf_encoder([]) @@ -60,3 +60,26 @@ class TestTfidfEncoder: def test_call_method_with_uninitialized_model(self, tfidf_encoder): with pytest.raises(ValueError): tfidf_encoder(["test"]) + + def test_call_method_no_docs(self, tfidf_encoder): + with pytest.raises(ValueError, match="No documents to encode."): + tfidf_encoder([]) + + def test_compute_tf_no_word_index(self, tfidf_encoder): + with pytest.raises(ValueError, match="Word index is not initialized."): + tfidf_encoder._compute_tf(["some docs"]) + + def test_compute_tf_with_word_in_word_index(self, tfidf_encoder): + routes = [ + Route( + name="test_route", + utterances=["some docs", "and more docs", "and even more docs"], + ) + ] + tfidf_encoder.fit(routes) + tf = tfidf_encoder._compute_tf(["some docs"]) + assert tf.shape == (1, len(tfidf_encoder.word_index)) + + def test_compute_idf_no_word_index(self, tfidf_encoder): + with pytest.raises(ValueError, match="Word index is not initialized."): + tfidf_encoder._compute_idf(["some docs"]) -- GitLab