diff --git a/decision_layer/decision_layer.py b/decision_layer/decision_layer.py index 5c97078304be6346063d00fa1a5d6b1f5819ee3c..31de29bd2acc810edad348b3d5d6c309f7582659 100644 --- a/decision_layer/decision_layer.py +++ b/decision_layer/decision_layer.py @@ -1,15 +1,58 @@ from decision_layer.encoders import BaseEncoder from decision_layer.schema import Decision +import numpy as np +from numpy.linalg import norm class DecisionLayer: - def __init__(self, encoder: BaseEncoder, decisions: list[Decision]): + index = None + categories = None + + def __init__(self, encoder: BaseEncoder, decisions: list[Decision] = []): self.encoder = encoder - self.decisions = decisions + # if decisions list has been passed, we initialize index now if decisions: - pass + # initialize index now + for decision in decisions: + self._add_decision(decision=decision) + + def __call__(self, text: str): + results = self._query(text) + raise NotImplementedError("To implement decision logic based on scores") def add(self, decision: Decision): - pass - embeds = encoder(te) \ No newline at end of file + self._add_decision(devision=decision) + + def _add_decision(self, decision: Decision): + # create embeddings + embeds = self.encoder(decision.utterances) + # create decision array + if self.categories is None: + self.categories = np.array([decision.name]*len(embeds)) + else: + str_arr = np.array([decision.name]*len(embeds)) + self.categories = np.concatenate([self.categories, str_arr]) + # create utterance array (the index) + if self.index is None: + self.index = np.array(embeds) + else: + embed_arr = np.array(embeds) + self.index = np.concatenate([self.index, embed_arr]) + + def _query(self, text: str, top_k: int=5): + """Given some text, encodes and searches the index vector space to + retrieve the top_k most similar records. + """ + # create query vector + xq = np.array(self.encoder([text])) + # calculate cosine similarities + sim = np.dot(self.index, xq.T) / (norm(self.index)*norm(xq.T)) + # get indices of top_k records + idx = np.argpartition(sim.T[0], -top_k)[-top_k:] + scores = sim[idx] + # get the utterance categories (decision names) + decisions = self.categories[idx] + return [ + {"decision": d, "score": s.item()} for d, s in zip(decisions, scores) + ] diff --git a/walkthrough.ipynb b/walkthrough.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5d692c8458a3f0a27be1b9f13a95643c06198d30 --- /dev/null +++ b/walkthrough.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Decision Layer Walkthrough" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The decision layer library can be used as a super fast decision making layer on top of LLMs. That means that rather than waiting on a slow agent to decide what to do, we can use the magic of semantic vector space to make decisions. Cutting decision making time down from seconds to milliseconds." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU \\\n", + " decision-layer" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by defining a dictionary mapping decisions to example phrases that should trigger those decisions." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from decision_layer.schema import Decision\n", + "\n", + "politics = Decision(\n", + " name=\"politics\",\n", + " utterances=[\n", + " \"isn't politics the best thing ever\",\n", + " \"why don't you tell me about your political opinions\",\n", + " \"don't you just love the president\"\n", + " \"don't you just hate the president\",\n", + " \"they're going to destroy this country!\",\n", + " \"they will save the country!\"\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's define another for good measure:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chitchat = Decision(\n", + " name=\"chitchat\",\n", + " utterances=[\n", + " \"how's the weather today?\",\n", + " \"how are things going?\",\n", + " \"lovely weather today\",\n", + " \"the weather is horrendous\",\n", + " \"let's go to the chippy\"\n", + " ]\n", + ")\n", + "\n", + "decisions = [politics, chitchat]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we initialize our embedding model (we will add support for Hugging Face):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from decision_layer.encoders import OpenAIEncoder\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n", + "encoder = OpenAIEncoder(name=\"text-embedding-ada-002\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we define the `DecisionLayer`. When called, the decision layer will consume text (a query) and output the category (`Decision`) it belongs to — for now we can only `_query` and get the most similar `Decision` `utterances`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from decision_layer import DecisionLayer\n", + "\n", + "dl = DecisionLayer(encoder=encoder, decisions=decisions)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'decision': 'politics', 'score': 0.24968127755063652},\n", + " {'decision': 'politics', 'score': 0.2536216026530966},\n", + " {'decision': 'politics', 'score': 0.27568433588684954},\n", + " {'decision': 'politics', 'score': 0.27732789989574913},\n", + " {'decision': 'politics', 'score': 0.28110307885950714}]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = dl._query(\"don't you love politics?\")\n", + "out" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "decision-layer", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}