Skip to content
Snippets Groups Projects
Unverified Commit 705f4355 authored by Siraj R Aizlewood's avatar Siraj R Aizlewood
Browse files

Initial Code

Added simple_categorise which uses sum of Cosine Similarity Scores to determine Category. Option to use tan function to boost scores for closest points, and reduce scores for further away points.
parent 1acdf201
No related branches found
No related tags found
No related merge requests found
...@@ -2,14 +2,13 @@ from decision_layer.encoders import BaseEncoder ...@@ -2,14 +2,13 @@ from decision_layer.encoders import BaseEncoder
from decision_layer.schema import Decision from decision_layer.schema import Decision
import numpy as np import numpy as np
from numpy.linalg import norm from numpy.linalg import norm
class DecisionLayer: class DecisionLayer:
index = None index = None
categories = None categories = None
def __init__(self, encoder: BaseEncoder, decisions: list[Decision] = []): def __init__(self, encoder: BaseEncoder, decisions: list[Decision] = []):
self.encoder = encoder self.encoder = encoder
self.embeddings_classified = False
# if decisions list has been passed, we initialize index now # if decisions list has been passed, we initialize index now
if decisions: if decisions:
# initialize index now # initialize index now
...@@ -17,16 +16,20 @@ class DecisionLayer: ...@@ -17,16 +16,20 @@ class DecisionLayer:
self._add_decision(decision=decision) self._add_decision(decision=decision)
def __call__(self, text: str): def __call__(self, text: str):
results = self._query(text) results = self._query(text)
decision = self.simple_categorise(results)
# return decision
raise NotImplementedError("To implement decision logic based on scores") raise NotImplementedError("To implement decision logic based on scores")
def add(self, decision: Decision): def add(self, decision: Decision, dimensiona):
self._add_decision(devision=decision) self._add_decision(devision=decision)
def _add_decision(self, decision: Decision): def _add_decision(self, decision: Decision):
# create embeddings # create embeddings
embeds = self.encoder(decision.utterances) embeds = self.encoder(decision.utterances)
# create decision array # create decision array
if self.categories is None: if self.categories is None:
self.categories = np.array([decision.name]*len(embeds)) self.categories = np.array([decision.name]*len(embeds))
...@@ -56,3 +59,25 @@ class DecisionLayer: ...@@ -56,3 +59,25 @@ class DecisionLayer:
return [ return [
{"decision": d, "score": s.item()} for d, s in zip(decisions, scores) {"decision": d, "score": s.item()} for d, s in zip(decisions, scores)
] ]
def simple_categorise(self, text: str, top_k: int=5, apply_tan: bool=True):
"""Given some text, categorises it based on the scores from _query."""
# get the results from _query
results = self._query(text, top_k)
# apply the scoring system to the results and group by category
scores_by_category = {}
for result in results:
score = np.tan(result['score'] * (np.pi / 2)) if apply_tan else result['score']
if result['decision'] in scores_by_category:
scores_by_category[result['decision']] += score
else:
scores_by_category[result['decision']] = score
# sort the categories by score in descending order
sorted_categories = sorted(scores_by_category.items(), key=lambda x: x[1], reverse=True)
# return the category with the highest total score
return sorted_categories[0][0] if sorted_categories else None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment