diff --git a/docs/examples/function_calling.ipynb b/docs/examples/function_calling.ipynb index 98beb735134ee28a6c70f796f4f3df1cb4cc5b63..29ca749b96d7a23858d0a785d608c17e2dc2bbdb 100644 --- a/docs/examples/function_calling.ipynb +++ b/docs/examples/function_calling.ipynb @@ -516,6 +516,130 @@ "call(query=\"What is the tech news in the Lithuania?\", functions=tools, router=router)\n", "call(query=\"Hi!\", functions=tools, router=router)" ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.23461914, 0.50146484, -0.048828125, 0.13989258, -0.18029785]\n" + ] + } + ], + "source": [ + "import cohere\n", + "import os\n", + "\n", + "co = cohere.Client(api_key=os.environ[\"COHERE_API_KEY\"])\n", + "texts = [\n", + " \"Hello from Cohere!\",\n", + " \"مرØبًا من كوهير!\",\n", + " \"Hallo von Cohere!\",\n", + " \"Bonjour de Cohere!\",\n", + " \"¡Hola desde Cohere!\",\n", + " \"Olá do Cohere!\",\n", + " \"Ciao da Cohere!\",\n", + " \"您好,æ¥è‡ª Cohereï¼\",\n", + " \"कोहेरे से नमसà¥à¤¤à¥‡!\",\n", + "]\n", + "response = co.embed(texts=texts, model=\"embed-multilingual-v2.0\")\n", + "embeddings = response.embeddings # All text embeddings\n", + "print(embeddings[0][:5]) # Print embeddings for the first text" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "response = co.detect_language(\n", + " texts=[\n", + " \"\"\"Lenkijos įstatymų leidÄ—jai pavedÄ— buvusiam Europos Vadovų Tarybos vadovui Donaldui Tuskui suformuoti VyriausybÄ™ po aÅ¡tuonerius metus trukusio deÅ¡iniųjų valdymo. Ar tai reiÅ¡kia, kad baigsis Lenkijos pykÄiai su Ukraina ir Europos SÄ…junga? Ar tai gera žinia ir Lietuvai? Kokių pokyÄių apskritai reikÄ—tų tikÄ—tis? Apie tai buvo diskutuojama Žinių radijo laidoje „Aktualusis interviu\"\"\"\n", + " ]\n", + ")\n", + "\n", + "response\n", + "texts = [\n", + " \"\"\"Lenkijos įstatymų leidÄ—jai pavedÄ— buvusiam Europos Vadovų Tarybos vadovui Donaldui Tuskui suformuoti VyriausybÄ™ po aÅ¡tuonerius metus trukusio deÅ¡iniųjų valdymo. Ar tai reiÅ¡kia, kad baigsis Lenkijos pykÄiai su Ukraina ir Europos SÄ…junga? Ar tai gera žinia ir Lietuvai? Kokių pokyÄių apskritai reikÄ—tų tikÄ—tis? Apie tai buvo diskutuojama Žinių radijo laidoje „Aktualusis interviu\"\"\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "embeds = co.embed(\n", + " texts=texts, model=\"embed-multilingual-v2.0\", input_type=\"search_document\"\n", + ").embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from annoy import AnnoyIndex\n", + "import numpy as np\n", + "\n", + "# Create the search index, pass the size of embedding\n", + "search_index = AnnoyIndex(np.array(embeds).shape[1], \"angular\")\n", + "\n", + "# Add all the vectors to the search index\n", + "for i in range(len(embeds)):\n", + " search_index.add_item(i, embeds[i])\n", + "search_index.build(10) # 10 trees\n", + "search_index.save(\"test.ann\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"What is the tallest mountain in the world?\"\n", + "input_type_query = \"search_query\"\n", + "\n", + "# Get the query's embedding\n", + "query_embed = co.embed(\n", + " texts=[query], model=\"embed-multilingual-v2.0\", input_type=input_type_query\n", + ").embeddings\n", + "\n", + "# Retrieve the nearest neighbors\n", + "similar_item_ids = search_index.get_nns_by_vector(\n", + " query_embed[0], 10, include_distances=True\n", + ")\n", + "# Format the results\n", + "query_results = pd.DataFrame(\n", + " data={\n", + " \"texts\": df.iloc[similar_item_ids[0]][\"text\"],\n", + " \"distance\": similar_item_ids[1],\n", + " }\n", + ")\n", + "\n", + "\n", + "print(f\"Query:'{query}'\\nNearest neighbors:\")\n", + "print(query_results) # NOTE: Your results might look slightly different to ours" + ] } ], "metadata": { diff --git a/docs/examples/test.ann b/docs/examples/test.ann new file mode 100644 index 0000000000000000000000000000000000000000..85db20d54995bcd7f1199d1816a069e14558aa40 Binary files /dev/null and b/docs/examples/test.ann differ diff --git a/poetry.lock b/poetry.lock index f5d58647509f00de61c09fd94a65fefc965b4453..6cd649bb3a862454e10e17ba66eadeaf900b9871 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -110,6 +110,17 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "annoy" +version = "1.17.3" +description = "Approximate Nearest Neighbors in C++/Python optimized for memory usage and loading/saving to disk." +optional = false +python-versions = "*" +files = [ + {file = "annoy-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c33a5d4d344c136c84976bfb2825760142a8bb25335165e24e11c9afbfa8c2e9"}, + {file = "annoy-1.17.3.tar.gz", hash = "sha256:9cbfebefe0a5f843eba29c6be4c84d601f4f41ad4ded0486f1b88c3b07739c15"}, +] + [[package]] name = "anyio" version = "4.1.0" @@ -2203,4 +2214,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "3300c77d6b6fab3faca403e2f3064a23e9f5ddcd34d63cad42d39b94b1ae5c2b" +content-hash = "8d9e62152a77382c673b01ba8279001795093ae90fc9d67e4073f4c1b5b5c10e" diff --git a/pyproject.toml b/pyproject.toml index b041516c70c5fac4d3adfd7b857e495bc64c372f..dc0b853ec5ac8549dd1c3dfac91323d726d4f3d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ pytest-mock = "^3.12.0" pytest-cov = "^4.1.0" pytest-xdist = "^3.5.0" mypy = "^1.7.1" +annoy = "^1.17.3" [build-system] requires = ["poetry-core"]