Skip to content
Snippets Groups Projects
Unverified Commit b23d85a6 authored by Siraj R Aizlewood's avatar Siraj R Aizlewood
Browse files

Added max_score_in_top_class and new test cases

Test cases aren't just the original Decision utterances now, but semantically similar utterances.

Also added new max_score_in_top_class method, which chooses the top score of the top scoring vector in the top class to compare to the threshold value.
parent efb8a409
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
...@@ -16,9 +16,9 @@ class DecisionLayer: ...@@ -16,9 +16,9 @@ class DecisionLayer:
self._add_decision(decision=decision) self._add_decision(decision=decision)
def __call__(self, text: str, _tan: bool=True, _threshold: float=0.5): def __call__(self, text: str, _method: str='raw', _threshold: float=0.5):
results = self._query(text) results = self._query(text)
decision = self._semantic_classify(results, _tan=_tan, _threshold=_threshold) decision = self._semantic_classify(results, _method=_method, _threshold=_threshold)
# return decision # return decision
return decision return decision
...@@ -65,29 +65,49 @@ class DecisionLayer: ...@@ -65,29 +65,49 @@ class DecisionLayer:
return [ return [
{"decision": d, "score": s.item()} for d, s in zip(decisions, scores) {"decision": d, "score": s.item()} for d, s in zip(decisions, scores)
] ]
def _semantic_classify(self, query_results: dict, _tan: bool=True, _threshold: float=0.5): def _semantic_classify(self, query_results: dict, _method: str='raw', _threshold: float=0.5):
"""Given some text, categorizes.""" """Given some text, categorizes."""
# apply the scoring system to the results and group by category # Initialize score dictionaries
scores_by_class = {} scores_by_class = {}
highest_score_by_class = {}
# Define valid methods
valid_methods = ['raw', 'tan', 'max_score_in_top_class']
# Check if method is valid
if _method not in valid_methods:
raise ValueError(f"Invalid method: {_method}")
# Apply the scoring system to the results and group by category
for result in query_results: for result in query_results:
score = np.tan(result['score'] * (np.pi / 2)) if _tan else result['score'] decision = result['decision']
if result['decision'] in scores_by_class: score = result['score']
scores_by_class[result['decision']] += score
else: # Apply tan transformation if method is 'tan'
scores_by_class[result['decision']] = score if _method == 'tan':
score = np.tan(score * (np.pi / 2))
# sort the categories by score in descending order
sorted_categories = sorted(scores_by_class.items(), key=lambda x: x[1], reverse=True) # Update scores_by_class
scores_by_class[decision] = scores_by_class.get(decision, 0) + score
# Determine if the score is sufficiently high.
if sorted_categories and sorted_categories[0][1] > _threshold: # TODO: This seems arbitrary. # Update highest_score_by_class for 'max_score_in_top_class' method
predicted_class = sorted_categories[0][0] if _method == 'max_score_in_top_class':
else: highest_score_by_class[decision] = max(score, highest_score_by_class.get(decision, 0))
predicted_class = None
# Sort the categories by score in descending order
# return the category with the highest total score sorted_classes = sorted(scores_by_class.items(), key=lambda x: x[1], reverse=True)
return predicted_class, scores_by_class
# Determine if the score is sufficiently high
predicted_class = None
if sorted_classes:
top_class, top_score = sorted_classes[0]
if _method == 'max_score_in_top_class':
top_score = highest_score_by_class[top_class]
if top_score > _threshold:
predicted_class = top_class
# Return the category with the highest total score
return predicted_class, scores_by_class
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment