From ce8a26a08e9b2e7fb2eb668b1a7923de658e9394 Mon Sep 17 00:00:00 2001 From: James Briggs <james.briggs@hotmail.com> Date: Sat, 2 Mar 2024 17:54:27 +0800 Subject: [PATCH] add docstrings --- coverage.xml | 810 +++++++++---------- semantic_router/splitters/consecutive_sim.py | 9 +- semantic_router/splitters/cumulative_sim.py | 9 +- semantic_router/splitters/rolling_window.py | 71 +- 4 files changed, 460 insertions(+), 439 deletions(-) diff --git a/coverage.xml b/coverage.xml index 2144ec09..818c0304 100644 --- a/coverage.xml +++ b/coverage.xml @@ -1,12 +1,12 @@ <?xml version="1.0" ?> -<coverage version="7.4.3" timestamp="1709367522085" lines-valid="2072" lines-covered="1619" line-rate="0.7814" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0"> +<coverage version="7.4.3" timestamp="1709373085574" lines-valid="2072" lines-covered="1331" line-rate="0.6424" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0"> <!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.4.3 --> <!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd --> <sources> <source>/Users/jamesbriggs/Documents/projects/aurelio-labs/semantic-router/semantic_router</source> </sources> <packages> - <package name="." line-rate="0.8981" branch-rate="0" complexity="0"> + <package name="." line-rate="0.7702" branch-rate="0" complexity="0"> <classes> <class name="__init__.py" filename="__init__.py" complexity="0" line-rate="1" branch-rate="0"> <methods/> @@ -18,7 +18,7 @@ <line number="7" hits="1"/> </lines> </class> - <class name="hybrid_layer.py" filename="hybrid_layer.py" complexity="0" line-rate="0.9808" branch-rate="0"> + <class name="hybrid_layer.py" filename="hybrid_layer.py" complexity="0" line-rate="0.2115" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -33,98 +33,98 @@ <line number="18" hits="1"/> <line number="19" hits="1"/> <line number="21" hits="1"/> - <line number="28" hits="1"/> - <line number="29" hits="1"/> - <line number="31" hits="1"/> + <line number="28" hits="0"/> + <line number="29" hits="0"/> + <line number="31" hits="0"/> <line number="32" hits="0"/> <line number="33" hits="0"/> - <line number="35" hits="1"/> - <line number="37" hits="1"/> - <line number="38" hits="1"/> - <line number="39" hits="1"/> - <line number="42" hits="1"/> - <line number="44" hits="1"/> - <line number="48" hits="1"/> + <line number="35" hits="0"/> + <line number="37" hits="0"/> + <line number="38" hits="0"/> + <line number="39" hits="0"/> + <line number="42" hits="0"/> + <line number="44" hits="0"/> + <line number="48" hits="0"/> <line number="50" hits="1"/> - <line number="51" hits="1"/> - <line number="52" hits="1"/> - <line number="53" hits="1"/> - <line number="54" hits="1"/> - <line number="55" hits="1"/> - <line number="57" hits="1"/> + <line number="51" hits="0"/> + <line number="52" hits="0"/> + <line number="53" hits="0"/> + <line number="54" hits="0"/> + <line number="55" hits="0"/> + <line number="57" hits="0"/> <line number="59" hits="1"/> - <line number="60" hits="1"/> + <line number="60" hits="0"/> <line number="62" hits="1"/> - <line number="63" hits="1"/> - <line number="65" hits="1"/> - <line number="67" hits="1"/> - <line number="70" hits="1"/> - <line number="72" hits="1"/> - <line number="73" hits="1"/> - <line number="76" hits="1"/> - <line number="78" hits="1"/> - <line number="81" hits="1"/> - <line number="82" hits="1"/> - <line number="84" hits="1"/> - <line number="85" hits="1"/> - <line number="86" hits="1"/> + <line number="63" hits="0"/> + <line number="65" hits="0"/> + <line number="67" hits="0"/> + <line number="70" hits="0"/> + <line number="72" hits="0"/> + <line number="73" hits="0"/> + <line number="76" hits="0"/> + <line number="78" hits="0"/> + <line number="81" hits="0"/> + <line number="82" hits="0"/> + <line number="84" hits="0"/> + <line number="85" hits="0"/> + <line number="86" hits="0"/> <line number="88" hits="1"/> - <line number="90" hits="1"/> - <line number="91" hits="1"/> - <line number="94" hits="1"/> - <line number="95" hits="1"/> - <line number="98" hits="1"/> - <line number="99" hits="1"/> - <line number="100" hits="1"/> + <line number="90" hits="0"/> + <line number="91" hits="0"/> + <line number="94" hits="0"/> + <line number="95" hits="0"/> + <line number="98" hits="0"/> + <line number="99" hits="0"/> + <line number="100" hits="0"/> <line number="106" hits="1"/> - <line number="107" hits="1"/> - <line number="109" hits="1"/> + <line number="107" hits="0"/> + <line number="109" hits="0"/> <line number="115" hits="1"/> - <line number="116" hits="1"/> - <line number="118" hits="1"/> + <line number="116" hits="0"/> + <line number="118" hits="0"/> <line number="124" hits="1"/> - <line number="129" hits="1"/> - <line number="130" hits="1"/> - <line number="132" hits="1"/> - <line number="133" hits="1"/> - <line number="135" hits="1"/> - <line number="137" hits="1"/> - <line number="139" hits="1"/> - <line number="140" hits="1"/> - <line number="141" hits="1"/> - <line number="143" hits="1"/> - <line number="144" hits="1"/> - <line number="145" hits="1"/> - <line number="146" hits="1"/> - <line number="148" hits="1"/> - <line number="149" hits="1"/> - <line number="150" hits="1"/> - <line number="152" hits="1"/> - <line number="153" hits="1"/> - <line number="155" hits="1"/> - <line number="156" hits="1"/> + <line number="129" hits="0"/> + <line number="130" hits="0"/> + <line number="132" hits="0"/> + <line number="133" hits="0"/> + <line number="135" hits="0"/> + <line number="137" hits="0"/> + <line number="139" hits="0"/> + <line number="140" hits="0"/> + <line number="141" hits="0"/> + <line number="143" hits="0"/> + <line number="144" hits="0"/> + <line number="145" hits="0"/> + <line number="146" hits="0"/> + <line number="148" hits="0"/> + <line number="149" hits="0"/> + <line number="150" hits="0"/> + <line number="152" hits="0"/> + <line number="153" hits="0"/> + <line number="155" hits="0"/> + <line number="156" hits="0"/> <line number="158" hits="1"/> - <line number="160" hits="1"/> - <line number="161" hits="1"/> - <line number="162" hits="1"/> + <line number="160" hits="0"/> + <line number="161" hits="0"/> + <line number="162" hits="0"/> <line number="164" hits="1"/> - <line number="165" hits="1"/> - <line number="166" hits="1"/> - <line number="167" hits="1"/> - <line number="168" hits="1"/> - <line number="169" hits="1"/> - <line number="170" hits="1"/> - <line number="172" hits="1"/> - <line number="175" hits="1"/> - <line number="176" hits="1"/> - <line number="179" hits="1"/> - <line number="180" hits="1"/> - <line number="182" hits="1"/> - <line number="183" hits="1"/> + <line number="165" hits="0"/> + <line number="166" hits="0"/> + <line number="167" hits="0"/> + <line number="168" hits="0"/> + <line number="169" hits="0"/> + <line number="170" hits="0"/> + <line number="172" hits="0"/> + <line number="175" hits="0"/> + <line number="176" hits="0"/> + <line number="179" hits="0"/> + <line number="180" hits="0"/> + <line number="182" hits="0"/> + <line number="183" hits="0"/> <line number="185" hits="1"/> - <line number="186" hits="1"/> - <line number="187" hits="1"/> - <line number="189" hits="1"/> + <line number="186" hits="0"/> + <line number="187" hits="0"/> + <line number="189" hits="0"/> </lines> </class> <class name="layer.py" filename="layer.py" complexity="0" line-rate="0.8949" branch-rate="0"> @@ -426,7 +426,7 @@ <line number="30" hits="1"/> </lines> </class> - <class name="route.py" filename="route.py" complexity="0" line-rate="0.8851" branch-rate="0"> + <class name="route.py" filename="route.py" complexity="0" line-rate="0.908" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -439,8 +439,8 @@ <line number="10" hits="1"/> <line number="12" hits="1"/> <line number="13" hits="1"/> - <line number="14" hits="0"/> - <line number="15" hits="0"/> + <line number="14" hits="1"/> + <line number="15" hits="1"/> <line number="18" hits="1"/> <line number="19" hits="1"/> <line number="20" hits="1"/> @@ -518,7 +518,7 @@ <line number="160" hits="0"/> </lines> </class> - <class name="schema.py" filename="schema.py" complexity="0" line-rate="0.9016" branch-rate="0"> + <class name="schema.py" filename="schema.py" complexity="0" line-rate="0.8852" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -568,7 +568,7 @@ <line number="66" hits="1"/> <line number="67" hits="1"/> <line number="69" hits="1"/> - <line number="70" hits="1"/> + <line number="70" hits="0"/> <line number="72" hits="1"/> <line number="73" hits="1"/> <line number="75" hits="1"/> @@ -663,7 +663,7 @@ </class> </classes> </package> - <package name="encoders" line-rate="0.9574" branch-rate="0" complexity="0"> + <package name="encoders" line-rate="0.6679" branch-rate="0" complexity="0"> <classes> <class name="__init__.py" filename="encoders/__init__.py" complexity="0" line-rate="1" branch-rate="0"> <methods/> @@ -697,7 +697,7 @@ <line number="15" hits="1"/> </lines> </class> - <class name="bm25.py" filename="encoders/bm25.py" complexity="0" line-rate="0.9574" branch-rate="0"> + <class name="bm25.py" filename="encoders/bm25.py" complexity="0" line-rate="0.3404" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -711,45 +711,45 @@ <line number="18" hits="1"/> <line number="19" hits="1"/> <line number="20" hits="1"/> - <line number="21" hits="0"/> - <line number="22" hits="0"/> - <line number="27" hits="1"/> - <line number="29" hits="1"/> - <line number="30" hits="1"/> - <line number="31" hits="1"/> - <line number="32" hits="1"/> + <line number="21" hits="1"/> + <line number="22" hits="1"/> + <line number="27" hits="0"/> + <line number="29" hits="0"/> + <line number="30" hits="0"/> + <line number="31" hits="0"/> + <line number="32" hits="0"/> <line number="34" hits="1"/> - <line number="35" hits="1"/> - <line number="36" hits="1"/> - <line number="37" hits="1"/> - <line number="38" hits="1"/> - <line number="39" hits="1"/> - <line number="41" hits="1"/> + <line number="35" hits="0"/> + <line number="36" hits="0"/> + <line number="37" hits="0"/> + <line number="38" hits="0"/> + <line number="39" hits="0"/> + <line number="41" hits="0"/> <line number="43" hits="1"/> - <line number="44" hits="1"/> - <line number="45" hits="1"/> - <line number="46" hits="1"/> - <line number="47" hits="1"/> - <line number="48" hits="1"/> - <line number="49" hits="1"/> - <line number="51" hits="1"/> - <line number="53" hits="1"/> - <line number="54" hits="1"/> - <line number="55" hits="1"/> - <line number="56" hits="1"/> - <line number="57" hits="1"/> - <line number="58" hits="1"/> - <line number="59" hits="1"/> - <line number="60" hits="1"/> - <line number="61" hits="1"/> + <line number="44" hits="0"/> + <line number="45" hits="0"/> + <line number="46" hits="0"/> + <line number="47" hits="0"/> + <line number="48" hits="0"/> + <line number="49" hits="0"/> + <line number="51" hits="0"/> + <line number="53" hits="0"/> + <line number="54" hits="0"/> + <line number="55" hits="0"/> + <line number="56" hits="0"/> + <line number="57" hits="0"/> + <line number="58" hits="0"/> + <line number="59" hits="0"/> + <line number="60" hits="0"/> + <line number="61" hits="0"/> <line number="63" hits="1"/> - <line number="64" hits="1"/> - <line number="65" hits="1"/> - <line number="66" hits="1"/> - <line number="67" hits="1"/> + <line number="64" hits="0"/> + <line number="65" hits="0"/> + <line number="66" hits="0"/> + <line number="67" hits="0"/> </lines> </class> - <class name="clip.py" filename="encoders/clip.py" complexity="0" line-rate="0.939" branch-rate="0"> + <class name="clip.py" filename="encoders/clip.py" complexity="0" line-rate="0.2927" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -770,70 +770,70 @@ <line number="20" hits="1"/> <line number="21" hits="1"/> <line number="23" hits="1"/> - <line number="24" hits="1"/> - <line number="25" hits="1"/> + <line number="24" hits="0"/> + <line number="25" hits="0"/> <line number="27" hits="1"/> - <line number="33" hits="1"/> - <line number="34" hits="1"/> - <line number="35" hits="1"/> - <line number="37" hits="1"/> - <line number="38" hits="1"/> - <line number="39" hits="1"/> - <line number="40" hits="1"/> - <line number="41" hits="1"/> - <line number="43" hits="1"/> - <line number="44" hits="1"/> - <line number="45" hits="1"/> - <line number="47" hits="1"/> - <line number="48" hits="1"/> - <line number="49" hits="1"/> + <line number="33" hits="0"/> + <line number="34" hits="0"/> + <line number="35" hits="0"/> + <line number="37" hits="0"/> + <line number="38" hits="0"/> + <line number="39" hits="0"/> + <line number="40" hits="0"/> + <line number="41" hits="0"/> + <line number="43" hits="0"/> + <line number="44" hits="0"/> + <line number="45" hits="0"/> + <line number="47" hits="0"/> + <line number="48" hits="0"/> + <line number="49" hits="0"/> <line number="51" hits="1"/> - <line number="52" hits="1"/> - <line number="53" hits="1"/> - <line number="54" hits="1"/> - <line number="55" hits="1"/> - <line number="61" hits="1"/> - <line number="62" hits="1"/> - <line number="63" hits="1"/> - <line number="64" hits="1"/> - <line number="70" hits="1"/> - <line number="71" hits="1"/> + <line number="52" hits="0"/> + <line number="53" hits="0"/> + <line number="54" hits="0"/> + <line number="55" hits="0"/> + <line number="61" hits="0"/> + <line number="62" hits="0"/> + <line number="63" hits="0"/> + <line number="64" hits="0"/> + <line number="70" hits="0"/> + <line number="71" hits="0"/> <line number="72" hits="0"/> <line number="73" hits="0"/> - <line number="79" hits="1"/> - <line number="80" hits="1"/> - <line number="82" hits="1"/> - <line number="86" hits="1"/> - <line number="87" hits="1"/> - <line number="89" hits="1"/> - <line number="90" hits="1"/> - <line number="91" hits="1"/> + <line number="79" hits="0"/> + <line number="80" hits="0"/> + <line number="82" hits="0"/> + <line number="86" hits="0"/> + <line number="87" hits="0"/> + <line number="89" hits="0"/> + <line number="90" hits="0"/> + <line number="91" hits="0"/> <line number="93" hits="1"/> - <line number="94" hits="1"/> + <line number="94" hits="0"/> <line number="95" hits="0"/> - <line number="96" hits="1"/> + <line number="96" hits="0"/> <line number="97" hits="0"/> - <line number="98" hits="1"/> - <line number="99" hits="1"/> + <line number="98" hits="0"/> + <line number="99" hits="0"/> <line number="101" hits="0"/> - <line number="102" hits="1"/> + <line number="102" hits="0"/> <line number="104" hits="1"/> - <line number="105" hits="1"/> - <line number="108" hits="1"/> - <line number="109" hits="1"/> - <line number="110" hits="1"/> - <line number="111" hits="1"/> + <line number="105" hits="0"/> + <line number="108" hits="0"/> + <line number="109" hits="0"/> + <line number="110" hits="0"/> + <line number="111" hits="0"/> <line number="113" hits="1"/> - <line number="114" hits="1"/> - <line number="115" hits="1"/> - <line number="118" hits="1"/> - <line number="119" hits="1"/> - <line number="120" hits="1"/> - <line number="121" hits="1"/> + <line number="114" hits="0"/> + <line number="115" hits="0"/> + <line number="118" hits="0"/> + <line number="119" hits="0"/> + <line number="120" hits="0"/> + <line number="121" hits="0"/> <line number="123" hits="1"/> - <line number="124" hits="1"/> - <line number="125" hits="1"/> - <line number="126" hits="1"/> + <line number="124" hits="0"/> + <line number="125" hits="0"/> + <line number="126" hits="0"/> </lines> </class> <class name="cohere.py" filename="encoders/cohere.py" complexity="0" line-rate="1" branch-rate="0"> @@ -870,7 +870,7 @@ <line number="49" hits="1"/> </lines> </class> - <class name="fastembed.py" filename="encoders/fastembed.py" complexity="0" line-rate="0.8667" branch-rate="0"> + <class name="fastembed.py" filename="encoders/fastembed.py" complexity="0" line-rate="0.6667" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -890,22 +890,22 @@ <line number="23" hits="1"/> <line number="24" hits="1"/> <line number="25" hits="1"/> - <line number="26" hits="0"/> - <line number="27" hits="0"/> - <line number="33" hits="1"/> - <line number="40" hits="1"/> - <line number="42" hits="1"/> - <line number="43" hits="1"/> + <line number="26" hits="1"/> + <line number="27" hits="1"/> + <line number="33" hits="0"/> + <line number="40" hits="0"/> + <line number="42" hits="0"/> + <line number="43" hits="0"/> <line number="45" hits="1"/> - <line number="46" hits="1"/> - <line number="47" hits="1"/> - <line number="48" hits="1"/> - <line number="49" hits="1"/> + <line number="46" hits="0"/> + <line number="47" hits="0"/> + <line number="48" hits="0"/> + <line number="49" hits="0"/> <line number="50" hits="0"/> <line number="51" hits="0"/> </lines> </class> - <class name="huggingface.py" filename="encoders/huggingface.py" complexity="0" line-rate="0.9667" branch-rate="0"> + <class name="huggingface.py" filename="encoders/huggingface.py" complexity="0" line-rate="0.4" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -929,45 +929,45 @@ <line number="25" hits="1"/> <line number="26" hits="1"/> <line number="27" hits="1"/> - <line number="33" hits="1"/> - <line number="34" hits="1"/> - <line number="35" hits="1"/> - <line number="36" hits="1"/> - <line number="42" hits="1"/> - <line number="44" hits="1"/> - <line number="49" hits="1"/> - <line number="51" hits="1"/> + <line number="33" hits="0"/> + <line number="34" hits="0"/> + <line number="35" hits="0"/> + <line number="36" hits="0"/> + <line number="42" hits="0"/> + <line number="44" hits="0"/> + <line number="49" hits="0"/> + <line number="51" hits="0"/> <line number="52" hits="0"/> - <line number="55" hits="1"/> - <line number="56" hits="1"/> - <line number="57" hits="1"/> - <line number="59" hits="1"/> + <line number="55" hits="0"/> + <line number="56" hits="0"/> + <line number="57" hits="0"/> + <line number="59" hits="0"/> <line number="61" hits="1"/> - <line number="68" hits="1"/> - <line number="69" hits="1"/> - <line number="70" hits="1"/> - <line number="72" hits="1"/> - <line number="76" hits="1"/> - <line number="77" hits="1"/> - <line number="79" hits="1"/> - <line number="80" hits="1"/> - <line number="83" hits="1"/> - <line number="84" hits="1"/> + <line number="68" hits="0"/> + <line number="69" hits="0"/> + <line number="70" hits="0"/> + <line number="72" hits="0"/> + <line number="76" hits="0"/> + <line number="77" hits="0"/> + <line number="79" hits="0"/> + <line number="80" hits="0"/> + <line number="83" hits="0"/> + <line number="84" hits="0"/> <line number="88" hits="0"/> - <line number="92" hits="1"/> - <line number="93" hits="1"/> - <line number="95" hits="1"/> - <line number="96" hits="1"/> - <line number="97" hits="1"/> + <line number="92" hits="0"/> + <line number="93" hits="0"/> + <line number="95" hits="0"/> + <line number="96" hits="0"/> + <line number="97" hits="0"/> <line number="99" hits="1"/> - <line number="100" hits="1"/> - <line number="101" hits="1"/> - <line number="104" hits="1"/> + <line number="100" hits="0"/> + <line number="101" hits="0"/> + <line number="104" hits="0"/> <line number="108" hits="1"/> - <line number="109" hits="1"/> - <line number="110" hits="1"/> - <line number="113" hits="1"/> - <line number="114" hits="1"/> + <line number="109" hits="0"/> + <line number="110" hits="0"/> + <line number="113" hits="0"/> + <line number="114" hits="0"/> </lines> </class> <class name="mistral.py" filename="encoders/mistral.py" complexity="0" line-rate="0.9512" branch-rate="0"> @@ -1138,7 +1138,7 @@ <line number="80" hits="1"/> </lines> </class> - <class name="vit.py" filename="encoders/vit.py" complexity="0" line-rate="0.9254" branch-rate="0"> + <class name="vit.py" filename="encoders/vit.py" complexity="0" line-rate="0.3134" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -1157,57 +1157,57 @@ <line number="18" hits="1"/> <line number="19" hits="1"/> <line number="21" hits="1"/> - <line number="22" hits="1"/> - <line number="23" hits="1"/> + <line number="22" hits="0"/> + <line number="23" hits="0"/> <line number="25" hits="1"/> - <line number="26" hits="1"/> - <line number="27" hits="1"/> - <line number="28" hits="1"/> - <line number="29" hits="1"/> - <line number="35" hits="1"/> - <line number="36" hits="1"/> - <line number="37" hits="1"/> - <line number="38" hits="1"/> - <line number="39" hits="1"/> - <line number="45" hits="1"/> - <line number="46" hits="1"/> + <line number="26" hits="0"/> + <line number="27" hits="0"/> + <line number="28" hits="0"/> + <line number="29" hits="0"/> + <line number="35" hits="0"/> + <line number="36" hits="0"/> + <line number="37" hits="0"/> + <line number="38" hits="0"/> + <line number="39" hits="0"/> + <line number="45" hits="0"/> + <line number="46" hits="0"/> <line number="47" hits="0"/> <line number="48" hits="0"/> - <line number="54" hits="1"/> - <line number="55" hits="1"/> - <line number="56" hits="1"/> - <line number="58" hits="1"/> - <line number="62" hits="1"/> - <line number="64" hits="1"/> - <line number="65" hits="1"/> - <line number="67" hits="1"/> + <line number="54" hits="0"/> + <line number="55" hits="0"/> + <line number="56" hits="0"/> + <line number="58" hits="0"/> + <line number="62" hits="0"/> + <line number="64" hits="0"/> + <line number="65" hits="0"/> + <line number="67" hits="0"/> <line number="69" hits="1"/> - <line number="70" hits="1"/> + <line number="70" hits="0"/> <line number="71" hits="0"/> - <line number="72" hits="1"/> + <line number="72" hits="0"/> <line number="73" hits="0"/> - <line number="74" hits="1"/> - <line number="75" hits="1"/> + <line number="74" hits="0"/> + <line number="75" hits="0"/> <line number="77" hits="0"/> - <line number="78" hits="1"/> + <line number="78" hits="0"/> <line number="80" hits="1"/> - <line number="81" hits="1"/> - <line number="82" hits="1"/> - <line number="83" hits="1"/> - <line number="84" hits="1"/> + <line number="81" hits="0"/> + <line number="82" hits="0"/> + <line number="83" hits="0"/> + <line number="84" hits="0"/> <line number="86" hits="1"/> - <line number="87" hits="1"/> - <line number="88" hits="1"/> - <line number="89" hits="1"/> + <line number="87" hits="0"/> + <line number="88" hits="0"/> + <line number="89" hits="0"/> <line number="91" hits="1"/> - <line number="96" hits="1"/> - <line number="97" hits="1"/> - <line number="98" hits="1"/> - <line number="99" hits="1"/> - <line number="100" hits="1"/> - <line number="101" hits="1"/> - <line number="107" hits="1"/> - <line number="108" hits="1"/> + <line number="96" hits="0"/> + <line number="97" hits="0"/> + <line number="98" hits="0"/> + <line number="99" hits="0"/> + <line number="100" hits="0"/> + <line number="101" hits="0"/> + <line number="107" hits="0"/> + <line number="108" hits="0"/> </lines> </class> <class name="zure.py" filename="encoders/zure.py" complexity="0" line-rate="0.9589" branch-rate="0"> @@ -1533,7 +1533,7 @@ </class> </classes> </package> - <package name="llms" line-rate="0.915" branch-rate="0" complexity="0"> + <package name="llms" line-rate="0.7585" branch-rate="0" complexity="0"> <classes> <class name="__init__.py" filename="llms/__init__.py" complexity="0" line-rate="1" branch-rate="0"> <methods/> @@ -1547,7 +1547,7 @@ <line number="8" hits="1"/> </lines> </class> - <class name="base.py" filename="llms/base.py" complexity="0" line-rate="0.9756" branch-rate="0"> + <class name="base.py" filename="llms/base.py" complexity="0" line-rate="0.7317" branch-rate="0"> <methods/> <lines> <line number="1" hits="1"/> @@ -1571,8 +1571,8 @@ <line number="31" hits="1"/> <line number="34" hits="1"/> <line number="35" hits="1"/> - <line number="36" hits="1"/> - <line number="37" hits="1"/> + <line number="36" hits="0"/> + <line number="37" hits="0"/> <line number="38" hits="1"/> <line number="39" hits="1"/> <line number="40" hits="1"/> @@ -1582,15 +1582,15 @@ <line number="48" hits="1"/> <line number="98" hits="1"/> <line number="99" hits="1"/> - <line number="101" hits="1"/> + <line number="101" hits="0"/> <line number="102" hits="0"/> - <line number="104" hits="1"/> - <line number="105" hits="1"/> - <line number="106" hits="1"/> - <line number="107" hits="1"/> - <line number="108" hits="1"/> - <line number="109" hits="1"/> - <line number="110" hits="1"/> + <line number="104" hits="0"/> + <line number="105" hits="0"/> + <line number="106" hits="0"/> + <line number="107" hits="0"/> + <line number="108" hits="0"/> + <line number="109" hits="0"/> + <line number="110" hits="0"/> </lines> </class> <class name="cohere.py" filename="llms/cohere.py" complexity="0" line-rate="0.9655" branch-rate="0"> @@ -1627,49 +1627,49 @@ <line number="48" hits="1"/> </lines> </class> - <class name="llamacpp.py" filename="llms/llamacpp.py" complexity="0" line-rate="0.9" branch-rate="0"> + <class name="llamacpp.py" filename="llms/llamacpp.py" complexity="0" line-rate="0" branch-rate="0"> <methods/> <lines> - <line number="1" hits="1"/> - <line number="2" hits="1"/> - <line number="3" hits="1"/> - <line number="5" hits="1"/> - <line number="7" hits="1"/> - <line number="8" hits="1"/> - <line number="9" hits="1"/> - <line number="12" hits="1"/> - <line number="13" hits="1"/> - <line number="14" hits="1"/> - <line number="15" hits="1"/> - <line number="16" hits="1"/> - <line number="18" hits="1"/> - <line number="26" hits="1"/> - <line number="33" hits="1"/> - <line number="34" hits="1"/> - <line number="35" hits="1"/> - <line number="36" hits="1"/> - <line number="38" hits="1"/> - <line number="42" hits="1"/> - <line number="43" hits="1"/> - <line number="50" hits="1"/> - <line number="51" hits="1"/> - <line number="53" hits="1"/> + <line number="1" hits="0"/> + <line number="2" hits="0"/> + <line number="3" hits="0"/> + <line number="5" hits="0"/> + <line number="7" hits="0"/> + <line number="8" hits="0"/> + <line number="9" hits="0"/> + <line number="12" hits="0"/> + <line number="13" hits="0"/> + <line number="14" hits="0"/> + <line number="15" hits="0"/> + <line number="16" hits="0"/> + <line number="18" hits="0"/> + <line number="26" hits="0"/> + <line number="33" hits="0"/> + <line number="34" hits="0"/> + <line number="35" hits="0"/> + <line number="36" hits="0"/> + <line number="38" hits="0"/> + <line number="42" hits="0"/> + <line number="43" hits="0"/> + <line number="50" hits="0"/> + <line number="51" hits="0"/> + <line number="53" hits="0"/> <line number="54" hits="0"/> - <line number="55" hits="1"/> + <line number="55" hits="0"/> <line number="56" hits="0"/> <line number="57" hits="0"/> <line number="58" hits="0"/> - <line number="60" hits="1"/> - <line number="61" hits="1"/> - <line number="62" hits="1"/> - <line number="63" hits="1"/> - <line number="64" hits="1"/> - <line number="65" hits="1"/> - <line number="66" hits="1"/> - <line number="68" hits="1"/> - <line number="70" hits="1"/> - <line number="73" hits="1"/> - <line number="74" hits="1"/> + <line number="60" hits="0"/> + <line number="61" hits="0"/> + <line number="62" hits="0"/> + <line number="63" hits="0"/> + <line number="64" hits="0"/> + <line number="65" hits="0"/> + <line number="66" hits="0"/> + <line number="68" hits="0"/> + <line number="70" hits="0"/> + <line number="73" hits="0"/> + <line number="74" hits="0"/> </lines> </class> <class name="mistral.py" filename="llms/mistral.py" complexity="0" line-rate="0.8333" branch-rate="0"> @@ -2027,164 +2027,167 @@ <line number="63" hits="1"/> <line number="64" hits="0"/> <line number="65" hits="0"/> - <line number="66" hits="0"/> <line number="67" hits="0"/> <line number="68" hits="0"/> <line number="69" hits="0"/> - <line number="71" hits="1"/> - <line number="72" hits="0"/> - <line number="73" hits="0"/> - <line number="74" hits="0"/> + <line number="70" hits="0"/> <line number="75" hits="0"/> <line number="76" hits="0"/> - <line number="80" hits="0"/> + <line number="77" hits="0"/> + <line number="78" hits="0"/> + <line number="79" hits="0"/> <line number="81" hits="0"/> - <line number="83" hits="1"/> + <line number="82" hits="0"/> + <line number="83" hits="0"/> <line number="84" hits="0"/> <line number="85" hits="0"/> <line number="86" hits="0"/> - <line number="87" hits="0"/> - <line number="88" hits="0"/> + <line number="88" hits="1"/> + <line number="89" hits="0"/> + <line number="90" hits="0"/> + <line number="91" hits="0"/> + <line number="92" hits="0"/> <line number="93" hits="0"/> <line number="94" hits="0"/> <line number="96" hits="1"/> <line number="97" hits="0"/> <line number="98" hits="0"/> + <line number="99" hits="0"/> + <line number="100" hits="0"/> <line number="101" hits="0"/> - <line number="102" hits="0"/> <line number="105" hits="0"/> <line number="106" hits="0"/> - <line number="108" hits="0"/> + <line number="108" hits="1"/> <line number="109" hits="0"/> <line number="110" hits="0"/> <line number="111" hits="0"/> <line number="112" hits="0"/> <line number="113" hits="0"/> <line number="118" hits="0"/> + <line number="119" hits="0"/> + <line number="121" hits="1"/> + <line number="122" hits="0"/> + <line number="123" hits="0"/> <line number="126" hits="0"/> <line number="127" hits="0"/> <line number="130" hits="0"/> + <line number="131" hits="0"/> + <line number="133" hits="0"/> + <line number="134" hits="0"/> <line number="135" hits="0"/> <line number="136" hits="0"/> <line number="137" hits="0"/> <line number="138" hits="0"/> - <line number="139" hits="0"/> - <line number="141" hits="0"/> - <line number="142" hits="0"/> <line number="143" hits="0"/> - <line number="145" hits="0"/> <line number="151" hits="0"/> - <line number="153" hits="1"/> + <line number="152" hits="0"/> + <line number="155" hits="0"/> + <line number="160" hits="0"/> + <line number="161" hits="0"/> + <line number="162" hits="0"/> + <line number="163" hits="0"/> <line number="164" hits="0"/> - <line number="165" hits="0"/> <line number="166" hits="0"/> - <line number="169" hits="0"/> + <line number="167" hits="0"/> + <line number="168" hits="0"/> <line number="170" hits="0"/> - <line number="171" hits="0"/> - <line number="173" hits="0"/> - <line number="174" hits="0"/> - <line number="175" hits="0"/> <line number="176" hits="0"/> - <line number="178" hits="0"/> - <line number="179" hits="0"/> - <line number="182" hits="0"/> - <line number="183" hits="0"/> - <line number="185" hits="0"/> - <line number="188" hits="0"/> + <line number="178" hits="1"/> + <line number="189" hits="0"/> + <line number="190" hits="0"/> + <line number="191" hits="0"/> + <line number="194" hits="0"/> + <line number="195" hits="0"/> <line number="196" hits="0"/> + <line number="198" hits="0"/> + <line number="199" hits="0"/> <line number="200" hits="0"/> <line number="201" hits="0"/> - <line number="202" hits="0"/> - <line number="205" hits="0"/> - <line number="206" hits="0"/> + <line number="203" hits="0"/> + <line number="204" hits="0"/> <line number="207" hits="0"/> - <line number="215" hits="0"/> - <line number="216" hits="0"/> - <line number="220" hits="0"/> - <line number="222" hits="0"/> - <line number="223" hits="0"/> + <line number="208" hits="0"/> + <line number="210" hits="0"/> + <line number="213" hits="0"/> + <line number="221" hits="0"/> + <line number="225" hits="0"/> <line number="226" hits="0"/> <line number="227" hits="0"/> - <line number="235" hits="0"/> - <line number="236" hits="0"/> - <line number="242" hits="0"/> - <line number="243" hits="0"/> - <line number="246" hits="0"/> + <line number="230" hits="0"/> + <line number="231" hits="0"/> + <line number="232" hits="0"/> + <line number="240" hits="0"/> + <line number="241" hits="0"/> + <line number="245" hits="0"/> <line number="247" hits="0"/> - <line number="250" hits="0"/> - <line number="255" hits="0"/> - <line number="256" hits="0"/> - <line number="259" hits="0"/> + <line number="248" hits="0"/> + <line number="251" hits="0"/> + <line number="252" hits="0"/> <line number="260" hits="0"/> <line number="261" hits="0"/> - <line number="264" hits="0"/> + <line number="267" hits="0"/> <line number="268" hits="0"/> - <line number="279" hits="0"/> - <line number="281" hits="1"/> - <line number="287" hits="0"/> - <line number="288" hits="0"/> + <line number="271" hits="0"/> + <line number="272" hits="0"/> + <line number="275" hits="0"/> + <line number="280" hits="0"/> + <line number="281" hits="0"/> + <line number="284" hits="0"/> + <line number="285" hits="0"/> + <line number="286" hits="0"/> <line number="289" hits="0"/> - <line number="290" hits="0"/> - <line number="294" hits="0"/> - <line number="296" hits="0"/> - <line number="297" hits="0"/> - <line number="298" hits="0"/> - <line number="301" hits="0"/> - <line number="302" hits="0"/> - <line number="303" hits="0"/> - <line number="309" hits="0"/> - <line number="317" hits="0"/> - <line number="318" hits="0"/> + <line number="293" hits="0"/> + <line number="304" hits="0"/> + <line number="306" hits="1"/> + <line number="312" hits="0"/> + <line number="313" hits="0"/> + <line number="314" hits="0"/> + <line number="315" hits="0"/> + <line number="319" hits="0"/> + <line number="321" hits="0"/> + <line number="322" hits="0"/> + <line number="323" hits="0"/> <line number="326" hits="0"/> <line number="327" hits="0"/> <line number="328" hits="0"/> <line number="334" hits="0"/> - <line number="337" hits="0"/> - <line number="338" hits="0"/> - <line number="339" hits="0"/> - <line number="340" hits="0"/> - <line number="341" hits="0"/> <line number="342" hits="0"/> <line number="343" hits="0"/> - <line number="344" hits="0"/> - <line number="347" hits="0"/> - <line number="348" hits="0"/> - <line number="349" hits="0"/> - <line number="350" hits="0"/> - <line number="354" hits="0"/> - <line number="355" hits="0"/> - <line number="357" hits="1"/> - <line number="360" hits="0"/> - <line number="361" hits="0"/> + <line number="351" hits="0"/> + <line number="352" hits="0"/> + <line number="353" hits="0"/> + <line number="359" hits="0"/> <line number="362" hits="0"/> <line number="363" hits="0"/> <line number="364" hits="0"/> <line number="365" hits="0"/> + <line number="366" hits="0"/> + <line number="367" hits="0"/> + <line number="368" hits="0"/> + <line number="369" hits="0"/> <line number="372" hits="0"/> <line number="373" hits="0"/> <line number="374" hits="0"/> - <line number="376" hits="0"/> - <line number="377" hits="0"/> + <line number="375" hits="0"/> + <line number="379" hits="0"/> <line number="380" hits="0"/> + <line number="382" hits="1"/> <line number="385" hits="0"/> + <line number="386" hits="0"/> <line number="387" hits="0"/> <line number="388" hits="0"/> <line number="389" hits="0"/> <line number="390" hits="0"/> - <line number="391" hits="0"/> - <line number="392" hits="0"/> - <line number="393" hits="0"/> - <line number="394" hits="0"/> - <line number="396" hits="0"/> <line number="397" hits="0"/> <line number="398" hits="0"/> - <line number="403" hits="1"/> - <line number="404" hits="0"/> + <line number="399" hits="0"/> + <line number="401" hits="0"/> + <line number="402" hits="0"/> <line number="405" hits="0"/> - <line number="407" hits="0"/> - <line number="408" hits="0"/> - <line number="409" hits="0"/> <line number="410" hits="0"/> + <line number="412" hits="0"/> + <line number="413" hits="0"/> + <line number="414" hits="0"/> <line number="415" hits="0"/> <line number="416" hits="0"/> <line number="417" hits="0"/> @@ -2193,9 +2196,6 @@ <line number="421" hits="0"/> <line number="422" hits="0"/> <line number="423" hits="0"/> - <line number="424" hits="0"/> - <line number="425" hits="0"/> - <line number="426" hits="0"/> </lines> </class> <class name="utils.py" filename="splitters/utils.py" complexity="0" line-rate="0.3636" branch-rate="0"> diff --git a/semantic_router/splitters/consecutive_sim.py b/semantic_router/splitters/consecutive_sim.py index 4a2e1106..775d5d2c 100644 --- a/semantic_router/splitters/consecutive_sim.py +++ b/semantic_router/splitters/consecutive_sim.py @@ -23,7 +23,14 @@ class ConsecutiveSimSplitter(BaseSplitter): encoder.score_threshold = score_threshold self.score_threshold = score_threshold - def __call__(self, docs: List[Any]): + def __call__(self, docs: List[Any]) -> List[DocumentSplit]: + """Split documents into smaller chunks based on semantic similarity. + + :param docs: list of text documents to be split, if only wanted to + split a single document, pass it as a list with a single element. + + :return: list of DocumentSplit objects containing the split documents. + """ # Check if there's only a single document if len(docs) == 1: raise ValueError( diff --git a/semantic_router/splitters/cumulative_sim.py b/semantic_router/splitters/cumulative_sim.py index e9dd8deb..0e50a354 100644 --- a/semantic_router/splitters/cumulative_sim.py +++ b/semantic_router/splitters/cumulative_sim.py @@ -23,7 +23,14 @@ class CumulativeSimSplitter(BaseSplitter): encoder.score_threshold = score_threshold self.score_threshold = score_threshold - def __call__(self, docs: List[str]): + def __call__(self, docs: List[str]) -> List[DocumentSplit]: + """Split documents into smaller chunks based on semantic similarity. + + :param docs: list of text documents to be split, if only wanted to + split a single document, pass it as a list with a single element. + + :return: list of DocumentSplit objects containing the split documents. + """ total_docs = len(docs) # Check if there's only a single document if total_docs == 1: diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py index dc5110a6..092433fe 100644 --- a/semantic_router/splitters/rolling_window.py +++ b/semantic_router/splitters/rolling_window.py @@ -60,7 +60,39 @@ class RollingWindowSplitter(BaseSplitter): self.split_tokens_tolerance = split_tokens_tolerance self.statistics: SplitStatistics - def encode_documents(self, docs: List[str]) -> np.ndarray: + def __call__(self, docs: List[str]) -> List[DocumentSplit]: + """Split documents into smaller chunks based on semantic similarity. + + :param docs: list of text documents to be split, if only wanted to + split a single document, pass it as a list with a single element. + + :return: list of DocumentSplit objects containing the split documents. + """ + if not docs: + raise ValueError("At least one document is required for splitting.") + + if len(docs) == 1: + token_count = tiktoken_length(docs[0]) + if token_count > self.max_split_tokens: + logger.warning( + f"Single document exceeds the maximum token limit " + f"of {self.max_split_tokens}. " + "Splitting to sentences before semantically splitting." + ) + docs = split_to_sentences(docs[0]) + encoded_docs = self._encode_documents(docs) + similarities = self._calculate_similarity_scores(encoded_docs) + if self.dynamic_threshold: + self._find_optimal_threshold(docs, similarities) + else: + self.calculated_threshold = self.encoder.score_threshold + split_indices = self._find_split_indices(similarities=similarities) + splits = self._split_documents(docs, split_indices, similarities) + self.plot_similarity_scores(similarities, split_indices, splits) + logger.info(self.statistics) + return splits + + def _encode_documents(self, docs: List[str]) -> np.ndarray: try: embeddings = self.encoder(docs) return np.array(embeddings) @@ -68,7 +100,7 @@ class RollingWindowSplitter(BaseSplitter): logger.error(f"Error encoding documents {docs}: {e}") raise - def calculate_similarity_scores(self, encoded_docs: np.ndarray) -> List[float]: + def _calculate_similarity_scores(self, encoded_docs: np.ndarray) -> List[float]: raw_similarities = [] for idx in range(1, len(encoded_docs)): window_start = max(0, idx - self.window_size) @@ -80,7 +112,7 @@ class RollingWindowSplitter(BaseSplitter): raw_similarities.append(curr_sim_score) return raw_similarities - def find_split_indices(self, similarities: List[float]) -> List[int]: + def _find_split_indices(self, similarities: List[float]) -> List[int]: split_indices = [] for idx, score in enumerate(similarities): logger.debug(f"Similarity score at index {idx}: {score}") @@ -93,7 +125,7 @@ class RollingWindowSplitter(BaseSplitter): split_indices.append(idx + 1) return split_indices - def find_optimal_threshold(self, docs: List[str], similarity_scores: List[float]): + def _find_optimal_threshold(self, docs: List[str], similarity_scores: List[float]): token_counts = [tiktoken_length(doc) for doc in docs] cumulative_token_counts = np.cumsum([0] + token_counts) @@ -109,7 +141,7 @@ class RollingWindowSplitter(BaseSplitter): median_tokens = 0 while low <= high: self.calculated_threshold = (low + high) / 2 - split_indices = self.find_split_indices(similarity_scores) + split_indices = self._find_split_indices(similarity_scores) logger.debug( f"Iteration {iteration}: Trying threshold: {self.calculated_threshold}" ) @@ -150,7 +182,7 @@ class RollingWindowSplitter(BaseSplitter): return self.calculated_threshold - def split_documents( + def _split_documents( self, docs: List[str], split_indices: List[int], similarities: List[float] ) -> List[DocumentSplit]: """ @@ -370,7 +402,7 @@ class RollingWindowSplitter(BaseSplitter): a specified threshold. """ sentences = [sentence for doc in docs for sentence in split_to_sentences(doc)] - encoded_sentences = self.encode_documents(sentences) + encoded_sentences = self._encode_documents(sentences) similarity_scores = [] for i in range(window_size, len(encoded_sentences)): @@ -399,28 +431,3 @@ class RollingWindowSplitter(BaseSplitter): f"First sentence after similarity score " f"below {threshold}: {sentences[i + window_size]}" ) - - def __call__(self, docs: List[str]) -> List[DocumentSplit]: - if not docs: - raise ValueError("At least one document is required for splitting.") - - if len(docs) == 1: - token_count = tiktoken_length(docs[0]) - if token_count > self.max_split_tokens: - logger.warning( - f"Single document exceeds the maximum token limit " - f"of {self.max_split_tokens}. " - "Splitting to sentences before semantically splitting." - ) - docs = split_to_sentences(docs[0]) - encoded_docs = self.encode_documents(docs) - similarities = self.calculate_similarity_scores(encoded_docs) - if self.dynamic_threshold: - self.find_optimal_threshold(docs, similarities) - else: - self.calculated_threshold = self.encoder.score_threshold - split_indices = self.find_split_indices(similarities=similarities) - splits = self.split_documents(docs, split_indices, similarities) - self.plot_similarity_scores(similarities, split_indices, splits) - logger.info(self.statistics) - return splits -- GitLab