diff --git a/recipes/quickstart/Getting_to_know_Llama.ipynb b/recipes/quickstart/Getting_to_know_Llama.ipynb index 4a4e56738c9212a54321ceed4846f486dd15cd5a..b3dbf21c57850e74fcadc2496e9f51cd43f14871 100644 --- a/recipes/quickstart/Getting_to_know_Llama.ipynb +++ b/recipes/quickstart/Getting_to_know_Llama.ipynb @@ -196,7 +196,7 @@ "### **1.1 - What is Llama 3?**\n", "\n", "* State of the art (SOTA), Open Source LLM\n", - "* 8B, 70B\n", + "* 8B, 70B - base and instruct models\n", "* Choosing model: Size, Quality, Cost, Speed\n", "* Pretrained + Chat\n", "* [Meta Llama 3 Blog](https://ai.meta.com/blog/meta-llama-3/)\n", @@ -275,9 +275,7 @@ "source": [ "## **2 - Using and Comparing Llama 3 and Llama 2**\n", "\n", - "In this notebook, we will use the Llama 2 70b chat and Llama 3 8b and 70b instruct models hosted on [Groq](https://console.groq.com/). You'll need to first [sign in](https://console.groq.com/) with your github or gmail account, then get an [API token](https://console.groq.com/keys) to try Groq out for free. (Groq runs Llama models very fast and they only support one Llama 2 model: the Llama 2 70b chat).\n", - "\n", - "**Note: You can also use other Llama hosting providers such as [Replicate](https://replicate.com/blog/run-llama-3-with-an-api?input=python), [Togther](https://docs.together.ai/docs/quickstart). Simply click the links here to see how to run `pip install` and use their freel trial API key with example code to modify the following three cells in 2.1 and 2.2.**\n" + "We will be using Llama 2 7b & 70b chat and Llama 3 8b & 70b instruct models hosted on [Replicate](https://replicate.com/search?query=llama) to run the examples here. You will need to first sign in with Replicate with your github account, then create a free API token [here](https://replicate.com/account/api-tokens) that you can use for a while. You can also use other Llama 3 cloud providers such as [Groq](https://console.groq.com/), [Together](https://api.together.xyz/playground/language/meta-llama/Llama-3-8b-hf), or [Anyscale](https://app.endpoints.anyscale.com/playground).\n" ] }, { @@ -297,7 +295,7 @@ }, "outputs": [], "source": [ - "!pip install groq" + "!pip install replicate" ] }, { @@ -305,7 +303,7 @@ "metadata": {}, "source": [ "### **2.2 - Create helpers for Llama 2 and Llama 3**\n", - "First, set your Groq API token as environment variables.\n" + "First, set your Replicate API token as environment variables.\n" ] }, { @@ -319,16 +317,16 @@ "import os\n", "from getpass import getpass\n", "\n", - "GROQ_API_TOKEN = getpass()\n", + "REPLICATE_API_TOKEN = getpass()\n", "\n", - "os.environ[\"GROQ_API_KEY\"] = GROQ_API_TOKEN" + "os.environ[\"REPLICATE_API_TOKEN\"] = REPLICATE_API_TOKEN" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 8b/70b instruct models, not the base models." + "Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 instruct and Llama 2 chat models, not the base models." ] }, { @@ -339,53 +337,35 @@ }, "outputs": [], "source": [ - "from groq import Groq\n", - "\n", - "client = Groq(\n", - " api_key=os.environ.get(\"GROQ_API_KEY\"),\n", - ")\n", + "import replicate\n", "\n", - "def llama2(prompt, temperature=0.0, input_print=True):\n", - " chat_completion = client.chat.completions.create(\n", - " messages=[\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt,\n", - " }\n", - " ],\n", - " model=\"llama2-70b-4096\",\n", - " temperature=temperature,\n", - " )\n", + "def llama2_7b(prompt):\n", + " output = replicate.run(\n", + " \"meta/llama-2-7b-chat\",\n", + " input={\"prompt\": prompt}\n", + " )\n", + " return ''.join(output)\n", "\n", - " return (chat_completion.choices[0].message.content)\n", + "def llama2_70b(prompt):\n", + " output = replicate.run(\n", + " \"meta/llama-2-70b-chat\",\n", + " input={\"prompt\": prompt}\n", + " )\n", + " return ''.join(output)\n", "\n", - "def llama3_8b(prompt, temperature=0.0, input_print=True):\n", - " chat_completion = client.chat.completions.create(\n", - " messages=[\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt,\n", - " }\n", - " ],\n", - " model=\"llama3-8b-8192\",\n", - " temperature=temperature,\n", - " )\n", + "def llama3_8b(prompt):\n", + " output = replicate.run(\n", + " \"meta/meta-llama-3-8b-instruct\",\n", + " input={\"prompt\": prompt}\n", + " )\n", + " return ''.join(output)\n", "\n", - " return (chat_completion.choices[0].message.content)\n", - "\n", - "def llama3_70b(prompt, temperature=0.0, input_print=True):\n", - " chat_completion = client.chat.completions.create(\n", - " messages=[\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt,\n", - " }\n", - " ],\n", - " model=\"llama3-70b-8192\",\n", - " temperature=temperature,\n", - " )\n", - "\n", - " return (chat_completion.choices[0].message.content)" + "def llama3_70b(prompt):\n", + " output = replicate.run(\n", + " \"meta/meta-llama-3-70b-instruct\",\n", + " input={\"prompt\": prompt}\n", + " )\n", + " return ''.join(output)" ] }, { @@ -406,7 +386,7 @@ "outputs": [], "source": [ "prompt = \"The typical color of a llama is: \"\n", - "output = llama2(prompt)\n", + "output = llama2_7b(prompt)\n", "md(output)" ] }, @@ -420,6 +400,16 @@ "md(output)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = llama2_7b(\"The typical color of a llama is what? Answer in one word.\")\n", + "md(output)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -430,6 +420,13 @@ "md(output)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note: Llama 3 follows instructions better than Llama 2 in single-turn chat.**" + ] + }, { "cell_type": "markdown", "metadata": { @@ -457,7 +454,7 @@ "outputs": [], "source": [ "prompt_chat = \"What is the average lifespan of a Llama? Answer the question in few words.\"\n", - "output = llama2(prompt_chat)\n", + "output = llama2_7b(prompt_chat)\n", "md(output)" ] }, @@ -483,7 +480,7 @@ "source": [ "# example without previous context. LLM's are stateless and cannot understand \"they\" without previous context\n", "prompt_chat = \"What animal family are they? Answer the question in few words.\"\n", - "output = llama2(prompt_chat)\n", + "output = llama2_7b(prompt_chat)\n", "md(output)" ] }, @@ -497,6 +494,16 @@ "md(output)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = llama2_70b(prompt_chat)\n", + "md(output)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -536,7 +543,7 @@ "Assistant: 15-20 years.\n", "User: What animal family are they?\n", "\"\"\"\n", - "output = llama2(prompt_chat)\n", + "output = llama2_7b(prompt_chat)\n", "md(output)" ] }, @@ -579,7 +586,17 @@ "\n", "Answer the question with one word.\n", "\"\"\"\n", - "output = llama2(prompt_chat)\n", + "output = llama2_7b(prompt_chat)\n", + "md(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = llama2_70b(prompt_chat)\n", "md(output)" ] }, @@ -597,7 +614,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Both Llama 3 8b and Llama 2 70b follows instructions (e.g. \"Answer the question with one word\") better than Llama 2 7b.**" + "**Both Llama 3 8b and Llama 2 70b follows instructions (e.g. \"Answer the question with one word\") better than Llama 2 7b in multi-turn chat.**" ] }, { @@ -640,7 +657,7 @@ "\n", "Give one word response.\n", "'''\n", - "output = llama2(prompt)\n", + "output = llama2_7b(prompt)\n", "md(output)" ] }, @@ -684,7 +701,7 @@ "Give one word response.\n", "'''\n", "\n", - "output = llama2(prompt)\n", + "output = llama2_7b(prompt)\n", "md(output)" ] }, @@ -704,7 +721,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Note: Llama 2, with few shots, has the same output \"Neutral\" as Llama 3.**" + "**Note: Llama 2, with few shots, has the same output \"Neutral\" as Llama 3, but Llama 2 doesn't follow instructions (Give one word response) well.**" ] }, { @@ -894,6 +911,7 @@ "outputs": [], "source": [ "!pip install langchain\n", + "!pip install langchain-community\n", "!pip install sentence-transformers\n", "!pip install faiss-cpu\n", "!pip install bs4\n", @@ -936,40 +954,53 @@ "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\"))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You'll need to first sign in at [Groq](https://console.groq.com/login) with your github or gmail account, then get an API token to try Groq out for free." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from langchain_groq import ChatGroq\n", - "llm = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")\n", + "import os\n", + "from getpass import getpass\n", "\n", - "from langchain.chains import ConversationalRetrievalChain\n", - "chain = ConversationalRetrievalChain.from_llm(llm,\n", - " vectorstore.as_retriever(),\n", - " return_source_documents=True)\n", + "GROQ_API_TOKEN = getpass()\n", "\n", - "result = chain({\"question\": \"What’s new with Llama 3?\", \"chat_history\": []})\n", - "md(result['answer'])\n" + "os.environ[\"GROQ_API_KEY\"] = GROQ_API_TOKEN" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "NmEhBe3Kiyre" - }, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_groq import ChatGroq\n", + "llm = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "# Query against your own data\n", "from langchain.chains import ConversationalRetrievalChain\n", - "chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)\n", "\n", - "chat_history = []\n", - "query = \"What’s new with Llama 3?\"\n", - "result = chain({\"question\": query, \"chat_history\": chat_history})\n", - "md(result['answer'])" + "# Query against your own data\n", + "chain = ConversationalRetrievalChain.from_llm(llm,\n", + " vectorstore.as_retriever(),\n", + " return_source_documents=True)\n", + "\n", + "# no chat history passed\n", + "result = chain({\"question\": \"What’s new with Llama 3?\", \"chat_history\": []})\n", + "md(result['answer'])\n" ] }, { @@ -1083,7 +1114,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.14" } }, "nbformat": 4,