From b2ec64c7ce5bbb0f34731ee35f3376e8dbb4b7cc Mon Sep 17 00:00:00 2001 From: Simonas <20096648+simjak@users.noreply.github.com> Date: Mon, 18 Dec 2023 10:26:02 +0200 Subject: [PATCH] wip --- docs/examples/function_calling.ipynb | 124 +++++++++++++++++++++++++++ docs/examples/test.ann | Bin 0 -> 64764 bytes poetry.lock | 15 +++- pyproject.toml | 1 + 4 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 docs/examples/test.ann diff --git a/docs/examples/function_calling.ipynb b/docs/examples/function_calling.ipynb index 98beb735..29ca749b 100644 --- a/docs/examples/function_calling.ipynb +++ b/docs/examples/function_calling.ipynb @@ -516,6 +516,130 @@ "call(query=\"What is the tech news in the Lithuania?\", functions=tools, router=router)\n", "call(query=\"Hi!\", functions=tools, router=router)" ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.23461914, 0.50146484, -0.048828125, 0.13989258, -0.18029785]\n" + ] + } + ], + "source": [ + "import cohere\n", + "import os\n", + "\n", + "co = cohere.Client(api_key=os.environ[\"COHERE_API_KEY\"])\n", + "texts = [\n", + " \"Hello from Cohere!\",\n", + " \"مرØبًا من كوهير!\",\n", + " \"Hallo von Cohere!\",\n", + " \"Bonjour de Cohere!\",\n", + " \"¡Hola desde Cohere!\",\n", + " \"Olá do Cohere!\",\n", + " \"Ciao da Cohere!\",\n", + " \"您好,æ¥è‡ª Cohereï¼\",\n", + " \"कोहेरे से नमसà¥à¤¤à¥‡!\",\n", + "]\n", + "response = co.embed(texts=texts, model=\"embed-multilingual-v2.0\")\n", + "embeddings = response.embeddings # All text embeddings\n", + "print(embeddings[0][:5]) # Print embeddings for the first text" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "response = co.detect_language(\n", + " texts=[\n", + " \"\"\"Lenkijos įstatymų leidÄ—jai pavedÄ— buvusiam Europos Vadovų Tarybos vadovui Donaldui Tuskui suformuoti VyriausybÄ™ po aÅ¡tuonerius metus trukusio deÅ¡iniųjų valdymo. Ar tai reiÅ¡kia, kad baigsis Lenkijos pykÄiai su Ukraina ir Europos SÄ…junga? Ar tai gera žinia ir Lietuvai? Kokių pokyÄių apskritai reikÄ—tų tikÄ—tis? Apie tai buvo diskutuojama Žinių radijo laidoje „Aktualusis interviu\"\"\"\n", + " ]\n", + ")\n", + "\n", + "response\n", + "texts = [\n", + " \"\"\"Lenkijos įstatymų leidÄ—jai pavedÄ— buvusiam Europos Vadovų Tarybos vadovui Donaldui Tuskui suformuoti VyriausybÄ™ po aÅ¡tuonerius metus trukusio deÅ¡iniųjų valdymo. Ar tai reiÅ¡kia, kad baigsis Lenkijos pykÄiai su Ukraina ir Europos SÄ…junga? Ar tai gera žinia ir Lietuvai? Kokių pokyÄių apskritai reikÄ—tų tikÄ—tis? Apie tai buvo diskutuojama Žinių radijo laidoje „Aktualusis interviu\"\"\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "embeds = co.embed(\n", + " texts=texts, model=\"embed-multilingual-v2.0\", input_type=\"search_document\"\n", + ").embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from annoy import AnnoyIndex\n", + "import numpy as np\n", + "\n", + "# Create the search index, pass the size of embedding\n", + "search_index = AnnoyIndex(np.array(embeds).shape[1], \"angular\")\n", + "\n", + "# Add all the vectors to the search index\n", + "for i in range(len(embeds)):\n", + " search_index.add_item(i, embeds[i])\n", + "search_index.build(10) # 10 trees\n", + "search_index.save(\"test.ann\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"What is the tallest mountain in the world?\"\n", + "input_type_query = \"search_query\"\n", + "\n", + "# Get the query's embedding\n", + "query_embed = co.embed(\n", + " texts=[query], model=\"embed-multilingual-v2.0\", input_type=input_type_query\n", + ").embeddings\n", + "\n", + "# Retrieve the nearest neighbors\n", + "similar_item_ids = search_index.get_nns_by_vector(\n", + " query_embed[0], 10, include_distances=True\n", + ")\n", + "# Format the results\n", + "query_results = pd.DataFrame(\n", + " data={\n", + " \"texts\": df.iloc[similar_item_ids[0]][\"text\"],\n", + " \"distance\": similar_item_ids[1],\n", + " }\n", + ")\n", + "\n", + "\n", + "print(f\"Query:'{query}'\\nNearest neighbors:\")\n", + "print(query_results) # NOTE: Your results might look slightly different to ours" + ] } ], "metadata": { diff --git a/docs/examples/test.ann b/docs/examples/test.ann new file mode 100644 index 0000000000000000000000000000000000000000..85db20d54995bcd7f1199d1816a069e14558aa40 GIT binary patch literal 64764 zcmeH~|C3d98ONUmgaO-F1q8R$gRp{#@<lLf_uhMq-Oz9gtyF^Gkj)q{go87rsLc@; zVNnbSaR8w!B1tT)F@V6`d+x`AbeLq+utLl}+BgV=l7{$YkX{G>jPw5C>^bLip3n2V zpV#wzK6gJ=X7<vw?~dVS7R@W0?@}>4oVVceycMT%^-HX{D{t|M#N03P7QVstW@7dV z?^ow6Z037DvEW3`ifzopHr&j8Mc!-y-=5&!mY9Eu=N9hA6N|c5%zqD?5`Hf%SU8e7 zmnIfIoHO5AF!wJ0?<iYwDSjWsW-^+tELhM^ya~L|#K-N}?q%MwoViPxvp;Xevpla# zEIu!9elGWp1`D3#d2PjlfAKvmo12@n;7H!0i8+gF(Y%K5w^b}UzhM3uG+k1#h}^>a zVexF<TpxT)!q>}u>x713*e7L+(eJL{d1-^WNBQ<5`Mu728$RI^G!pL%-h1Nr9=@%? zXMMrqK?RtS&nD)ymo0dY@zL0hz#n<|FUb4n#Cwc6yUFkA#QbEw^DR1^hgZeoBHz~# z_iWj0J~_|g`_8h3+lUME_+ZKWR`|d)wRh8)w+YQBsl`AT{t@2;iPu!Iu$tVvp=)_! z?hs?|qq#rxsEw_{|5Ws@$M0X!LhXZXFdV}C``|MHyLspz&NT|{f54^{A9Xpi7UnO( z=R)ke!rJk?9)IJ>V<|Shh}nbZ)g=qAq#iSfJEvf_`pbF#nZ)dQa$kY(E%^PA9RA1s zKfL#?m>-O;`S^Sb|LM1j&{j(?Jw<PzBRa(uaPjV{sDD@d9>r!kW4AKajXUEpHIJU7 z4gq?|KiHdCG!$L<^5k8dji%lm7B9qZ42&0}=}E4~uuXgABcA^Pe|p2UbXd?G-S0G5 zxT?XdF0tS`G)*S&#n@kk4eQB2!1F(d^C{O2Ws9bxufAfDW4t#uPt!y7Fskn`cRsnH z%hP}ENBCwfMN_%c4{;6g)A2`$^8vNkhTQ<_&YB3OV}r)%AmeMXnbTm!L(H9z)>AO+ z3rG4Yo=MDWiT!V$>A|oM{qig9$1}c|7@NqQH5{VH&p`WfG*#0>X!X;m>u21b;`>DW z4&dGluZt`62R7)AtNGp&pKvSQ&UfOu5or4<T)OjK#rW?St0jji=-?jFXYM$8;3s?> z+utM>?jxr+ut~Y^K|?#^Z{;k4NAZ{F>Ib)3^;V=O{T}L>*0sdE;pm}0;b43ZM&qma zX1#=uP~VaG`ZfIjnKyu&KfoG!8#X&(cADHi$H$bCh4-Q9TWI+@>z}z1IR~TBPcH`3 z__ly}$MOC5#2h^xl8<K{7C&HY8S5m6&I=gpMBm2rMjd|cVqOb%-ozYwzPJNj>{o7P z#SG1U81qBy(B@k4u?M@g=uF4bz3vk9+zsEq!G+ot8_D%+#5l~_>&->(whyh{sKZvi z^(OYsj7_3vzJkUUwBH7YBiP`_zlxt!O9Q#3dqF?mSyOQd?d-#TH}`Z+_n@{zV8cG| z*wdnQ*skRJAoTtWcAfEk47>m0w<)yf06r!V^F?BH=1T1kqPIU<UPHrJe4!;8!}AM_ z4Iy{>DwvC=8m>N!oyS<(SDrYRQRg=1A8jx{9j){5`4Hnp;__SK*#{%|TTRZ~8T>3l z*DJ)l8V#(&Xg~bF&%D=DPV~e|*!E@5y_Nms8zr-o)ILe|@E#GL-jAre;WK(WvG5>z z(Ca@zLlrjdj8{{WA21#eYcsy+0s9o2%h_MBv6Cf>+3!3)T^e%(npvyCv(&$WwtZZ1 z^Kc1ROYWMo1y^8ejM3}y4QTx>vCH%adstjYoJOv*_*qS^^tUbH>W)8Z5NxOB_mgi= z=G^TpxSrZQ1fO1MPxJgZnlD2iePR1L%>NiabI4~GKI@r-X4i<#dbEtHn13IZ<MH<{ zbJBfrI~sbSwT?NPnbU;bEat%^+)=ie{oLP6PHFDMihqwL59jajz6#s^=&0s9zxV0d zjdwGTHF55@XA-%NW6q5gbI0~r(3$x3M%2jM75H67%%w2C5$4n(fPK7%oY<FaBlE7S zSnxya$R~OyXMP=A-h&5rZXt77*KP}W^LynUCWld6iz*g|=xXKrC*;8%6ApwIYcSqk zHXDmA`(kttu`UkH4zd5#gyt*Q&n7PWdTJl7W3Nc-@@K9tu)33ei_!i$b{px})9BrT zu94(45F2{mnpiVKiASs;UEd?%bcVPuV@F-XyI}P^d8Hiq?e=FFr`GWa=J|Syk7Bzd zv~Vx}x{-G+>l5zL1?Wv}Tj6{lv1mBj$D}OCm)|5e08aO!t0%r*<Jy1?Yc0M7KU@L- zw^*LB(d5y=9Q1g4GNL~M;yU)fXlK3I1~|~$&cSH`{vM>(w~*Ijc-}$p9w+7yn3l-v zP4tXoKKoU8Eitc3H4~Hmi|N^7GrjWweurVVgPP4I=dH|7d!-4z&9JD&?{nB+1oIE6 zCG*ee8iegP3+7oz#iqn8{a=mW4Sy3p$=&**?HY8_fAI|dryGmUN%)?C?(X<xJ=$Ju z#$l7PxE>AkxutvcS7G@v`rsQhpsx)UJsQ}1@x^<*pLs5|&#``)KNb5U#O=a;85*WA z)=s>Y*e~M!V`8$8y7d25?1Ck5*vPdIZZFWwdF&3u2~8pEC+vi;&$#~tJrn8u{^XeU zF20HvV*gQc?th}FnZ4~!IPw2O%svu-8y|n-nZ3hy(J!a)QIlAFGjmxNQ8WICpYHF$ z&|KBILxBE>fCz|y2#A0Ph=2%)fCyxbfX)J0tGMzN0TB=Z5fA|p5CIVofiDT@ETB#h z0TB=Z5fA|p5CIVofou}cSs<HrSJomR0wN#+A|L`HAOa$wvw(U)1VlgtL_h>YKm<fU z1hPp$XMt?iU0I8O2#A0Ph=2%)fCz|y&I0NI5fA|p5CIVo0TB=Z5y&P1odvR4cV#UC zA|L`HAOa#F0wN#+It!==L_h>YKm<fU1VlgtL?D|4bQZ{F-IcWnh=2%)fCz|y2#A0P z=q#Wf5CIVo0TB=Z5fA|p5P@tG&{-gxbywCRAOa#F0wN#+A|L`HptFE_Km<fU1Vlgt zL_h>YKm@W$Kxct$)?HbPfCz|y2#A0Ph=2%)fX)Ky0TB=Z5fA|p5CIVo0TIY10i6Z1 zS$AbE0wN#+A|L`HAOa#F0y+z*2Sh*wL_h>YKm<fU1VkX41aub2X5E#w2#A0Ph=2%) yfCz|y2<R-J9uNT$5CIVo0TB=Z5fFiF63|&7n{`*#A|L`HAOa#F0wN#+BJh6`iW`0a literal 0 HcmV?d00001 diff --git a/poetry.lock b/poetry.lock index f5d58647..6cd649bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -110,6 +110,17 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "annoy" +version = "1.17.3" +description = "Approximate Nearest Neighbors in C++/Python optimized for memory usage and loading/saving to disk." +optional = false +python-versions = "*" +files = [ + {file = "annoy-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c33a5d4d344c136c84976bfb2825760142a8bb25335165e24e11c9afbfa8c2e9"}, + {file = "annoy-1.17.3.tar.gz", hash = "sha256:9cbfebefe0a5f843eba29c6be4c84d601f4f41ad4ded0486f1b88c3b07739c15"}, +] + [[package]] name = "anyio" version = "4.1.0" @@ -2203,4 +2214,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "3300c77d6b6fab3faca403e2f3064a23e9f5ddcd34d63cad42d39b94b1ae5c2b" +content-hash = "8d9e62152a77382c673b01ba8279001795093ae90fc9d67e4073f4c1b5b5c10e" diff --git a/pyproject.toml b/pyproject.toml index b041516c..dc0b853e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ pytest-mock = "^3.12.0" pytest-cov = "^4.1.0" pytest-xdist = "^3.5.0" mypy = "^1.7.1" +annoy = "^1.17.3" [build-system] requires = ["poetry-core"] -- GitLab