diff --git a/demo_apps/OctoAI_API_examples/Getting_to_know_Llama.ipynb b/demo_apps/OctoAI_API_examples/Getting_to_know_Llama.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d0105e10bb7393f00f56617df4171397d2649911 --- /dev/null +++ b/demo_apps/OctoAI_API_examples/Getting_to_know_Llama.ipynb @@ -0,0 +1,1029 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "LERqQn5v8-ak" + }, + "source": [ + "# **Getting to know Llama 2: Everything you need to start building**\n", + "Our goal in this session is to provide a guided tour of Llama 2, including understanding different Llama 2 models, how and where to access them, Generative AI and Chatbot architectures, prompt engineering, RAG (Retrieval Augmented Generation), Fine-tuning and more. All this is implemented with a starter code for you to take it and use it in your Llama 2 projects." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ioVMNcTesSEk" + }, + "source": [ + "##**0 - Prerequisites**\n", + "* Basic understanding of Large Language Models\n", + "\n", + "* Basic understanding of Python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "executionInfo": { + "elapsed": 248, + "status": "ok", + "timestamp": 1695832228254, + "user": { + "displayName": "Amit Sangani", + "userId": "11552178012079240149" + }, + "user_tz": 420 + }, + "id": "ktEA7qXmwdUM" + }, + "outputs": [], + "source": [ + "# presentation layer code\n", + "\n", + "import base64\n", + "from IPython.display import Image, display\n", + "import matplotlib.pyplot as plt\n", + "\n", + "def mm(graph):\n", + " graphbytes = graph.encode(\"ascii\")\n", + " base64_bytes = base64.b64encode(graphbytes)\n", + " base64_string = base64_bytes.decode(\"ascii\")\n", + " display(Image(url=\"https://mermaid.ink/img/\" + base64_string))\n", + "\n", + "def genai_app_arch():\n", + " mm(\"\"\"\n", + " flowchart TD\n", + " A[Users] --> B(Applications e.g. mobile, web)\n", + " B --> |Hosted API|C(Platforms e.g. Custom, OctoAI, HuggingFace, Replicate)\n", + " B -- optional --> E(Frameworks e.g. LangChain)\n", + " C-->|User Input|D[Llama 2]\n", + " D-->|Model Output|C\n", + " E --> C\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "def rag_arch():\n", + " mm(\"\"\"\n", + " flowchart TD\n", + " A[User Prompts] --> B(Frameworks e.g. LangChain)\n", + " B <--> |Database, Docs, XLS|C[fa:fa-database External Data]\n", + " B -->|API|D[Llama 2]\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "def llama2_family():\n", + " mm(\"\"\"\n", + " graph LR;\n", + " llama-2 --> llama-2-7b\n", + " llama-2 --> llama-2-13b\n", + " llama-2 --> llama-2-70b\n", + " llama-2-7b --> llama-2-7b-chat\n", + " llama-2-13b --> llama-2-13b-chat\n", + " llama-2-70b --> llama-2-70b-chat\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "def apps_and_llms():\n", + " mm(\"\"\"\n", + " graph LR;\n", + " users --> apps\n", + " apps --> frameworks\n", + " frameworks --> platforms\n", + " platforms --> Llama 2\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "import ipywidgets as widgets\n", + "from IPython.display import display, Markdown\n", + "\n", + "# Create a text widget\n", + "API_KEY = widgets.Password(\n", + " value='',\n", + " placeholder='',\n", + " description='API_KEY:',\n", + " disabled=False\n", + ")\n", + "\n", + "def md(t):\n", + " display(Markdown(t))\n", + "\n", + "def bot_arch():\n", + " mm(\"\"\"\n", + " graph LR;\n", + " user --> prompt\n", + " prompt --> i_safety\n", + " i_safety --> context\n", + " context --> Llama_2\n", + " Llama_2 --> output\n", + " output --> o_safety\n", + " i_safety --> memory\n", + " o_safety --> memory\n", + " memory --> context\n", + " o_safety --> user\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "def fine_tuned_arch():\n", + " mm(\"\"\"\n", + " graph LR;\n", + " Custom_Dataset --> Pre-trained_Llama\n", + " Pre-trained_Llama --> Fine-tuned_Llama\n", + " Fine-tuned_Llama --> RLHF\n", + " RLHF --> |Loss:Cross-Entropy|Fine-tuned_Llama\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "def load_data_faiss_arch():\n", + " mm(\"\"\"\n", + " graph LR;\n", + " documents --> textsplitter\n", + " textsplitter --> embeddings\n", + " embeddings --> vectorstore\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n", + "\n", + "def mem_context():\n", + " mm(\"\"\"\n", + " graph LR\n", + " context(text)\n", + " user_prompt --> context\n", + " instruction --> context\n", + " examples --> context\n", + " memory --> context\n", + " context --> tokenizer\n", + " tokenizer --> embeddings\n", + " embeddings --> LLM\n", + " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", + " \"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i4Np_l_KtIno" + }, + "source": [ + "##**1 - Understanding Llama 2**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGPSI3M5PGTi" + }, + "source": [ + "### **1.1 - What is Llama 2?**\n", + "\n", + "* State of the art (SOTA), Open Source LLM\n", + "* 7B, 13B, 70B\n", + "* Pretrained + Chat\n", + "* Choosing model: Size, Quality, Cost, Speed\n", + "* [Research paper](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/)\n", + "\n", + "* [Responsible use guide](https://ai.meta.com/llama/responsible-use-guide/)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 240 + }, + "executionInfo": { + "elapsed": 248, + "status": "ok", + "timestamp": 1695832233087, + "user": { + "displayName": "Amit Sangani", + "userId": "11552178012079240149" + }, + "user_tz": 420 + }, + "id": "OXRCC7wexZXd", + "outputId": "1feb1918-df4b-4cec-d09e-ffe55c12090b" + }, + "outputs": [], + "source": [ + "llama2_family()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aYeHVVh45bdT" + }, + "source": [ + "###**1.2 - Accessing Llama 2**\n", + "* Download + Self Host (on-premise)\n", + "* Hosted API Platform (e.g. [OctoAI](https://octoai.cloud/), [Replicate](https://replicate.com/meta))\n", + "* Hosted Container Platform (e.g. [Azure](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/introducing-llama-2-on-azure/ba-p/3881233), [AWS](https://aws.amazon.com/blogs/machine-learning/llama-2-foundation-models-from-meta-are-now-available-in-amazon-sagemaker-jumpstart/), [GCP](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/139))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kBuSay8vtzL4" + }, + "source": [ + "### **1.3 - Use Cases of Llama 2**\n", + "* Content Generation\n", + "* Chatbots\n", + "* Summarization\n", + "* Programming (e.g. Code Llama)\n", + "\n", + "* and many more..." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sd54g0OHuqBY" + }, + "source": [ + "##**2 - Using Llama 2**\n", + "\n", + "In this notebook, we are going to access [Llama 13b chat model](https://octoai.cloud/tools/text/chat?mode=demo&model=llama-2-13b-chat-fp16) using hosted API from OctoAI." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h3YGMDJidHtH" + }, + "source": [ + "### **2.1 - Install dependencies**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VhN6hXwx7FCp" + }, + "outputs": [], + "source": [ + "# Install dependencies and initialize\n", + "%pip install -qU \\\n", + " octoai-sdk \\\n", + " langchain \\\n", + " sentence_transformers \\\n", + " pdf2image \\\n", + " pdfminer \\\n", + " pdfminer.six \\\n", + " unstructured \\\n", + " faiss-cpu \\\n", + " pillow-heif \\\n", + " opencv-python \\\n", + " unstructured-inference \\\n", + " pikepdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z8Y8qjEjmg50" + }, + "outputs": [], + "source": [ + "# model on OctoAI platform that we will use for inferencing\n", + "# We will use llama 13b chat model hosted on OctoAI server ()\n", + "\n", + "llama2_13b = \"llama-2-13b-chat-fp16\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8hkWpqWD28ho" + }, + "outputs": [], + "source": [ + "# We will use OctoAI hosted cloud environment\n", + "# Obtain OctoAI API key → https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token\n", + "\n", + "# enter your replicate api token\n", + "from getpass import getpass\n", + "import os\n", + "\n", + "OCTOAI_API_TOKEN = getpass()\n", + "os.environ[\"OCTOAI_API_TOKEN\"] = OCTOAI_API_TOKEN\n", + "\n", + "# alternatively, you can also store the tokens in environment variables and load it here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bVCHZmETk36v" + }, + "outputs": [], + "source": [ + "# we will use OctoAI's hosted API\n", + "from octoai.client import Client\n", + "\n", + "client = Client(OCTOAI_API_TOKEN)\n", + "\n", + "# text completion with input prompt\n", + "def Completion(prompt):\n", + " output = client.chat.completions.create(\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt\n", + " }\n", + " ],\n", + " model=\"llama-2-13b-chat-fp16\",\n", + " max_tokens=1000\n", + " )\n", + " return output.choices[0].message.content\n", + "\n", + "# chat completion with input prompt and system prompt\n", + "def ChatCompletion(prompt, system_prompt=None):\n", + " output = client.chat.completions.create(\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": system_prompt\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt\n", + " }\n", + " ],\n", + " model=\"llama-2-13b-chat-fp16\",\n", + " max_tokens=1000\n", + " )\n", + " return output.choices[0].message.content" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5Jxq0pmf6L73" + }, + "source": [ + "### **2.2 - Basic completion**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H93zZBIk6tNU" + }, + "outputs": [], + "source": [ + "output = Completion(prompt=\"The typical color of a llama is: \")\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "StccjUDh6W0Q" + }, + "source": [ + "### **2.3 - System prompts**\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VRnFogxd6rTc" + }, + "outputs": [], + "source": [ + "output = ChatCompletion(\n", + " prompt=\"The typical color of a llama is: \",\n", + " system_prompt=\"respond with only one word\"\n", + " )\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hp4GNa066pYy" + }, + "source": [ + "### **2.4 - Response formats**\n", + "* Can support different formatted outputs e.g. text, JSON, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HTN79h4RptgQ" + }, + "outputs": [], + "source": [ + "output = ChatCompletion(\n", + " prompt=\"The typical color of a llama is: \",\n", + " system_prompt=\"response in json format\"\n", + " )\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cWs_s9y-avIT" + }, + "source": [ + "## **3 - Gen AI Application Architecture**\n", + "\n", + "Here is the high-level tech stack/architecture of Generative AI application." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 446 + }, + "executionInfo": { + "elapsed": 405, + "status": "ok", + "timestamp": 1695832253437, + "user": { + "displayName": "Amit Sangani", + "userId": "11552178012079240149" + }, + "user_tz": 420 + }, + "id": "j9BGuI-9AOL5", + "outputId": "72b2613f-a434-4219-f063-52a409af97cc" + }, + "outputs": [], + "source": [ + "genai_app_arch()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6UlxBtbgys6j" + }, + "source": [ + "##4 - **Chatbot Architecture**\n", + "\n", + "Here are the key components and the information flow in a chatbot.\n", + "\n", + "* User Prompts\n", + "* Input Safety\n", + "* Llama 2\n", + "* Output Safety\n", + "\n", + "* Memory & Context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 178 + }, + "executionInfo": { + "elapsed": 249, + "status": "ok", + "timestamp": 1695832257063, + "user": { + "displayName": "Amit Sangani", + "userId": "11552178012079240149" + }, + "user_tz": 420 + }, + "id": "tO5HnB56ys6t", + "outputId": "f222d35b-626f-4dc1-b7af-a156a0f3d58b" + }, + "outputs": [], + "source": [ + "bot_arch()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r4DyTLD5ys6t" + }, + "source": [ + "### **4.1 - Chat conversation**\n", + "* LLMs are stateless\n", + "* Single Turn\n", + "\n", + "* Multi Turn (Memory)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EMM_egWMys6u" + }, + "outputs": [], + "source": [ + "# example of single turn chat\n", + "prompt_chat = \"What is the average lifespan of a Llama?\"\n", + "output = ChatCompletion(prompt=prompt_chat, system_prompt=\"answer the last question in few words\")\n", + "md(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sZ7uVKDYucgi" + }, + "outputs": [], + "source": [ + "# example without previous context. LLM's are stateless and cannot understand \"they\" without previous context\n", + "prompt_chat = \"What animal family are they?\"\n", + "output = ChatCompletion(prompt=prompt_chat, system_prompt=\"answer the last question in few words\")\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WQl3wmfbyBQ1" + }, + "source": [ + "Chat app requires us to send in previous context to LLM to get in valid responses. Below is an example of Multi-turn chat." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t7SZe5fT3HG3" + }, + "outputs": [], + "source": [ + "# example of multi-turn chat, with storing previous context\n", + "prompt_chat = \"\"\"\n", + "User: What is the average lifespan of a Llama?\n", + "Assistant: Sure! The average lifespan of a llama is around 20-30 years.\n", + "User: What animal family are they?\n", + "\"\"\"\n", + "output = ChatCompletion(prompt=prompt_chat, system_prompt=\"answer the last question\")\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "moXnmJ_xyD10" + }, + "source": [ + "### **4.2 - Prompt Engineering**\n", + "* Prompt engineering refers to the science of designing effective prompts to get desired responses\n", + "\n", + "* Helps reduce hallucination\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t-v-FeZ4ztTB" + }, + "source": [ + "#### **4.2.1 - In-Context Learning (e.g. Zero-shot, Few-shot)**\n", + " * In-context learning - specific method of prompt engineering where demonstration of task are provided as part of prompt.\n", + " 1. Zero-shot learning - model is performing tasks without any\n", + "input examples.\n", + " 2. Few or “N-Shot” Learning - model is performing and behaving based on input examples in user's prompt." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6W71MFNZyRkQ" + }, + "outputs": [], + "source": [ + "# Zero-shot example. To get positive/negative/neutral sentiment, we need to give examples in the prompt\n", + "prompt = '''\n", + "Classify: I saw a Gecko.\n", + "Sentiment: ?\n", + "'''\n", + "output = ChatCompletion(prompt, system_prompt=\"one word response\")\n", + "md(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MCQRjf1Y1RYJ" + }, + "outputs": [], + "source": [ + "# By giving examples to Llama, it understands the expected output format.\n", + "\n", + "prompt = '''\n", + "Classify: I love Llamas!\n", + "Sentiment: Positive\n", + "Classify: I dont like Snakes.\n", + "Sentiment: Negative\n", + "Classify: I saw a Gecko.\n", + "Sentiment:'''\n", + "\n", + "output = ChatCompletion(prompt, system_prompt=\"One word response\")\n", + "md(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8UmdlTmpDZxA" + }, + "outputs": [], + "source": [ + "# another zero-shot learning\n", + "prompt = '''\n", + "QUESTION: Vicuna?\n", + "ANSWER:'''\n", + "\n", + "output = ChatCompletion(prompt, system_prompt=\"one word response\")\n", + "md(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M_EcsUo1zqFD" + }, + "outputs": [], + "source": [ + "# Another few-shot learning example with formatted prompt.\n", + "\n", + "prompt = '''\n", + "QUESTION: Llama?\n", + "ANSWER: Yes\n", + "QUESTION: Alpaca?\n", + "ANSWER: Yes\n", + "QUESTION: Rabbit?\n", + "ANSWER: No\n", + "QUESTION: Vicuna?\n", + "ANSWER:'''\n", + "\n", + "output = ChatCompletion(prompt, system_prompt=\"one word response\")\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mbr124Y197xl" + }, + "source": [ + "#### **4.2.2 - Chain of Thought**\n", + "\"Chain of thought\" enables complex reasoning through logical step by step thinking and generates meaningful and contextually relevant responses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xn8zmLBQzpgj" + }, + "outputs": [], + "source": [ + "# Standard prompting\n", + "prompt = '''\n", + "Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does Llama have now?\n", + "'''\n", + "\n", + "output = ChatCompletion(prompt, system_prompt=\"provide short answer\")\n", + "md(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lKNOj79o1Kwu" + }, + "outputs": [], + "source": [ + "# Chain-Of-Thought prompting\n", + "prompt = '''\n", + "Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does Llama have now?\n", + "Let's think step by step.\n", + "'''\n", + "\n", + "output = ChatCompletion(prompt, system_prompt=\"provide short answer\")\n", + "md(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C7tDW-AH770Y" + }, + "source": [ + "### **4.3 - Retrieval Augmented Generation (RAG)**\n", + "* Prompt Eng Limitations - Knowledge cutoff & lack of specialized data\n", + "\n", + "* Retrieval Augmented Generation(RAG) allows us to retrieve snippets of information from external data sources and augment it to the user's prompt to get tailored responses from Llama 2.\n", + "\n", + "For our demo, we are going to download an external PDF file from a URL and query against the content in the pdf file to get contextually relevant information back with the help of Llama!\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 259 + }, + "executionInfo": { + "elapsed": 329, + "status": "ok", + "timestamp": 1695832267093, + "user": { + "displayName": "Amit Sangani", + "userId": "11552178012079240149" + }, + "user_tz": 420 + }, + "id": "Fl1LPltpRQD9", + "outputId": "4410c9bf-3559-4a05-cebb-a5731bb094c1" + }, + "outputs": [], + "source": [ + "rag_arch()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JJaGMLl_4vYm" + }, + "source": [ + "#### **4.3.1 - LangChain**\n", + "LangChain is a framework that helps make it easier to implement RAG." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aoqU3KTcHTWN" + }, + "outputs": [], + "source": [ + "# langchain setup\n", + "from langchain.llms.octoai_endpoint import OctoAIEndpoint\n", + "# Use the Llama 2 model hosted on OctoAI\n", + "# Temperature: Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value\n", + "# top_p: When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens\n", + "# max_new_tokens: Maximum number of tokens to generate. A word is generally 2-3 tokens\n", + "llama_model = OctoAIEndpoint(\n", + " endpoint_url=\"https://text.octoai.run/v1/chat/completions\",\n", + " model_kwargs={\n", + " \"model\": llama2_13b,\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"You are a helpful, respectful and honest assistant.\"\n", + " }\n", + " ],\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"temperature\": 0.75\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gAV2EkZqcruF" + }, + "outputs": [], + "source": [ + "# Step 1: load the external data source. In our case, we will load Meta’s “Responsible Use Guide” pdf document.\n", + "from langchain.document_loaders import OnlinePDFLoader\n", + "loader = OnlinePDFLoader(\"https://ai.meta.com/static-resource/responsible-use-guide/\")\n", + "documents = loader.load()\n", + "\n", + "# Step 2: Get text splits from document\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n", + "all_splits = text_splitter.split_documents(documents)\n", + "\n", + "# Step 3: Use the embedding model\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.embeddings import OctoAIEmbeddings\n", + "embeddings = OctoAIEmbeddings(endpoint_url=\"https://text.octoai.run/v1/embeddings\")\n", + "\n", + "# Step 4: Use vector store to store embeddings\n", + "vectorstore = FAISS.from_documents(all_splits, embeddings)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K2l8S5tBxlkc" + }, + "source": [ + "#### **4.3.2 - LangChain Q&A Retriever**\n", + "* ConversationalRetrievalChain\n", + "\n", + "* Query the Source documents\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NmEhBe3Kiyre" + }, + "outputs": [], + "source": [ + "# Query against your own data\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "chain = ConversationalRetrievalChain.from_llm(llama_model, vectorstore.as_retriever(), return_source_documents=True)\n", + "\n", + "chat_history = []\n", + "query = \"How is Meta approaching open science in two short sentences?\"\n", + "result = chain.invoke({\"question\": query, \"chat_history\": chat_history})\n", + "md(result['answer'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CelLHIvoy2Ke" + }, + "outputs": [], + "source": [ + "# This time your previous question and answer will be included as a chat history which will enable the ability\n", + "# to ask follow up questions.\n", + "chat_history = [(query, result[\"answer\"])]\n", + "query = \"How is it benefiting the world?\"\n", + "result = chain({\"question\": query, \"chat_history\": chat_history})\n", + "md(result['answer'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TEvefAWIJONx" + }, + "source": [ + "## **5 - Fine-Tuning Models**\n", + "\n", + "* Limitatons of Prompt Eng and RAG\n", + "* Fine-Tuning Arch\n", + "* Types (PEFT, LoRA, QLoRA)\n", + "* Using PyTorch for Pre-Training & Fine-Tuning\n", + "\n", + "* Evals + Quality\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 79 + }, + "executionInfo": { + "elapsed": 327, + "status": "ok", + "timestamp": 1695832272878, + "user": { + "displayName": "Amit Sangani", + "userId": "11552178012079240149" + }, + "user_tz": 420 + }, + "id": "0a9CvJ8YcTzV", + "outputId": "56a6d573-a195-4e3c-834d-a3b23485186c" + }, + "outputs": [], + "source": [ + "fine_tuned_arch()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_8lcgdZa8onC" + }, + "source": [ + "## **6 - Responsible AI**\n", + "\n", + "* Power + Responsibility\n", + "* Hallucinations\n", + "* Input & Output Safety\n", + "* Red-teaming (simulating real-world cyber attackers)\n", + "* [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pbqb006R-T_k" + }, + "source": [ + "##**7 - Conclusion**\n", + "* Active research on LLMs and Llama\n", + "* Leverage the power of Llama and its open community\n", + "* Safety and responsible use is paramount!\n", + "\n", + "* Call-To-Action\n", + " * [Replicate Free Credits](https://replicate.fyi/connect2023) for Connect attendees!\n", + " * This notebook is available through Llama Github recipes\n", + " * Use Llama in your projects and give us feedback\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gSz5dTMxp7xo" + }, + "source": [ + "#### **Resources**\n", + "- [GitHub - Llama 2](https://github.com/facebookresearch/llama)\n", + "- [Github - LLama 2 Recipes](https://github.com/facebookresearch/llama-recipes)\n", + "- [Llama 2](https://ai.meta.com/llama/)\n", + "- [Research Paper](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/)\n", + "- [Model Card](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md)\n", + "- [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/)\n", + "- [Acceptable Use Policy](https://ai.meta.com/llama/use-policy/)\n", + "- [OctoAI](https://octoai.cloud/)\n", + "- [LangChain](https://www.langchain.com/)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V7aI6fhZp-KC" + }, + "source": [ + "#### **Authors & Contact**\n", + " * asangani@meta.com, [Amit Sangani | LinkedIn](https://www.linkedin.com/in/amitsangani/)\n", + " * mohsena@meta.com, [Mohsen Agsen | LinkedIn](https://www.linkedin.com/in/mohsen-agsen-62a9791/)\n", + " * Adapted to run on OctoAI by Thierry Moreau - tmoreau@octo.ai" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ioVMNcTesSEk" + ], + "machine_shape": "hm", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}