Contextual Generate model (#17913)

6ba3f55c · Sean Smith · GitHub · 5e37074b · 6ba3f55c · 6ba3f55c
Unverified Commit 6ba3f55c authored 4 days ago by Sean Smith Committed by GitHub 4 days ago
--- a/llama-index-integrations/llms/llama-index-llms-contextual/BUILD
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/BUILD
+poetry_requirements(
+    name="poetry",
+)
--- a/llama-index-integrations/llms/llama-index-llms-contextual/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/README.md
+# Contextual LLM Integration for LlamaIndex
+
+This package provides a Contextual LLM integration for LlamaIndex.
+
+## Installation
+
+```bash
+pip install llama-index-llms-contextual
+```
+
+## Usage
+
+```python
+from llama_index.llms.contextual import Contextual
+
+llm = Contextual(model="contextual-clm", api_key="your_api_key")
+
+response = llm.complete("Explain the importance of Grounded Language Models.")
+```
--- a/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/BUILD
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/BUILD
+python_sources()
--- a/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/__init__.py
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/__init__.py
+from llama_index.llms.contextual.base import Contextual
+
+__all__ = ["Contextual"]
--- a/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/base.py
+from typing import Any, Optional
+
+from llama_index.llms.openai_like import OpenAILike
+from pydantic import Field
+from typing import List
+from llama_index.core.llms.callbacks import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.core.base.llms.types import (
+    CompletionResponse,
+    ChatResponse,
+    ChatResponseGen,
+    MessageRole,
+    ChatMessage,
+)
+
+
+from contextual import ContextualAI
+
+
+class Contextual(OpenAILike):
+    """
+    Generate a response using Contextual's Grounded Language Model (GLM), an LLM engineered specifically to prioritize faithfulness to in-context retrievals over parametric knowledge to reduce hallucinations in Retrieval-Augmented Generation.
+
+    The total request cannot exceed 32,000 tokens. Email glm-feedback@contextual.ai with any feedback or questions.
+
+    Examples:
+        `pip install llama-index-llms-contextual`
+
+        ```python
+        from llama_index.llms.contextual import Contextual
+
+        # Set up the Contextual class with the required model and API key
+        llm = Contextual(model="contextual-clm", api_key="your_api_key")
+
+        # Call the complete method with a query
+        response = llm.complete("Explain the importance of low latency LLMs")
+
+        print(response)
+        ```
+    """
+
+    model: str = Field(
+        description="The model to use. Currently only supports `v1`.", default="v1"
+    )
+    api_key: str = Field(description="The API key to use.", default=None)
+    base_url: str = Field(
+        description="The base URL to use.",
+        default="https://api.contextual.ai/v1/generate",
+    )
+    avoid_commentary: bool = Field(
+        description="Flag to indicate whether the model should avoid providing additional commentary in responses. Commentary is conversational in nature and does not contain verifiable claims; therefore, commentary is not strictly grounded in available context. However, commentary may provide useful context which improves the helpfulness of responses.",
+        default=False,
+    )
+    client: Any = Field(default=None, description="Contextual AI Client")
+
+    def __init__(
+        self,
+        model: str,
+        api_key: str,
+        base_url: str = None,
+        avoid_commentary: bool = False,
+        **openai_llm_kwargs: Any,
+    ) -> None:
+        super().__init__(
+            model=model,
+            api_key=api_key,
+            api_base=base_url,
+            is_chat_model=openai_llm_kwargs.pop("is_chat_model", True),
+            **openai_llm_kwargs,
+        )
+
+        try:
+            self.client = ContextualAI(api_key=api_key, base_url=base_url)
+        except Exception as e:
+            raise ValueError(f"Error initializing ContextualAI client: {e}")
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "contextual-clm"
+
+    # Synchronous Methods
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, knowledge: Optional[List[str]] = None, **kwargs
+    ) -> CompletionResponse:
+        """
+        Generate completion for the given prompt.
+
+        Args:
+            prompt (str): The input prompt to generate completion for.
+            **kwargs: Additional keyword arguments for the API request.
+
+        Returns:
+            str: The generated text completion.
+        """
+        messages_list = [{"role": MessageRole.USER, "content": prompt}]
+        response = self._generate(
+            knowledge=knowledge,
+            messages=messages_list,
+            model=self.model,
+            system_prompt=self.system_prompt,
+            **kwargs,
+        )
+        return CompletionResponse(text=response)
+
+    @llm_chat_callback()
+    def chat(self, messages: List[ChatMessage], **kwargs) -> ChatResponse:
+        """
+        Generate a chat response for the given messages.
+        """
+        messages_list = [
+            {"role": msg.role, "content": msg.blocks[0].text} for msg in messages
+        ]
+        response = self._generate(
+            knowledge=kwargs.get("knowledge_base", None),
+            messages=messages_list,
+            model=self.model,
+            system_prompt=self.system_prompt,
+            **kwargs,
+        )
+        return ChatResponse(
+            message=ChatMessage(role=MessageRole.ASSISTANT, content=response)
+        )
+
+    @llm_chat_callback()
+    def stream_chat(self, messages: List[ChatMessage], **kwargs) -> ChatResponseGen:
+        """
+        Generate a chat response for the given messages.
+        """
+        raise NotImplementedError("stream methods not implemented in Contextual")
+
+    @llm_completion_callback()
+    def stream_complete(self, prompt: str, **kwargs) -> ChatResponseGen:
+        """
+        Generate a chat response for the given messages.
+        """
+        raise NotImplementedError("stream methods not implemented in Contextual")
+
+    # ===== Async Endpoints =====
+    @llm_chat_callback()
+    async def achat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> ChatResponse:
+        raise NotImplementedError("async methods not implemented in Contextual")
+
+    @llm_chat_callback()
+    async def astream_chat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> ChatResponseAsyncGen:
+        raise NotImplementedError("async methods not implemented in Contextual")
+
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        raise NotImplementedError("async methods not implemented in Contextual")
+
+    @llm_completion_callback()
+    async def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        raise NotImplementedError("async methods not implemented in Contextual")
+
+    def _generate(
+        self, knowledge, messages, system_prompt, **kwargs
+    ) -> CompletionResponse:
+        """
+        Generate completion for the given prompt.
+        """
+        raw_message = self.client.generate.create(
+            messages=messages,
+            knowledge=knowledge or [],
+            model=self.model,
+            system_prompt=system_prompt,
+            avoid_commentary=self.avoid_commentary,
+            temperature=kwargs.get("temperature", 0.0),
+            max_new_tokens=kwargs.get("max_tokens", 1024),
+            top_p=kwargs.get("top_p", 1),
+        )
+        return raw_message.response
--- a/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/test-contextual.ipynb
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/llama_index/llms/contextual/test-contextual.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Contextual GLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install llama-index-llms-contextual"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CompletionResponse(text=\"I apologize, but I am unable to provide information about Grounded Language Models. I am an AI assistant created by Contextual AI. I don't have relevant documentation about that topic, but feel free to ask me something else!\", additional_kwargs={}, raw=None, logprobs=None, delta=None)"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from llama_index.llms.contextual import Contextual\n",
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "\n",
+    "# Set up the Contextual class with the required model and API key\n",
+    "# Store the API key in a .env file as CONTEXTUAL_API_KEY\n",
+    "load_dotenv()\n",
+    "llm = Contextual(model=\"v1\", api_key=os.getenv(\"CONTEXTUAL_API_KEY\"))\n",
+    "\n",
+    "# Call the complete method with a query\n",
+    "llm.complete(\n",
+    "    \"Explain the importance of Grounded Language Models.\",\n",
+    "    temperature=0.5,\n",
+    "    max_tokens=1024,\n",
+    "    top_p=0.9,\n",
+    "    avoid_commentary=False,\n",
+    "    system_prompt=\"You are a helpful assistant that can answer questions and help with tasks.\",\n",
+    "    knowledge=[\"The sky is blue\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CompletionResponse(text='The sky is blue.', additional_kwargs={}, raw=None, logprobs=None, delta=None)"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm.complete(\n",
+    "    \"what color is the sky?\",\n",
+    "    knowledge=[\"The sky is blue\"],\n",
+    "    avoid_commentary=False,\n",
+    "    temperature=0.9,\n",
+    "    max_tokens=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AuthenticationError",
+     "evalue": "Error code: 401 - {'error': {'message': 'Incorrect API key provided: key-KCj3*****************************************izwo. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAuthenticationError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[39], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mllama_index\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat_engine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtypes\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ChatMessage\n\u001b[0;32m----> 3\u001b[0m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mChatMessage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwhat color is the sky?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/projects/llama_index/llama-index-core/llama_index/core/instrumentation/dispatcher.py:322\u001b[0m, in \u001b[0;36mDispatcher.span.<locals>.wrapper\u001b[0;34m(func, instance, args, kwargs)\u001b[0m\n\u001b[1;32m    319\u001b[0m             _logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to reset active_span_id: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 322\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    323\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, asyncio\u001b[38;5;241m.\u001b[39mFuture):\n\u001b[1;32m    324\u001b[0m         \u001b[38;5;66;03m# If the result is a Future, wrap it\u001b[39;00m\n\u001b[1;32m    325\u001b[0m         new_future \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mensure_future(result)\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai_like/base.py:117\u001b[0m, in \u001b[0;36mOpenAILike.chat\u001b[0;34m(self, messages, **kwargs)\u001b[0m\n\u001b[1;32m    114\u001b[0m     completion_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcomplete(prompt, formatted\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m completion_response_to_chat_response(completion_response)\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/projects/llama_index/llama-index-core/llama_index/core/instrumentation/dispatcher.py:322\u001b[0m, in \u001b[0;36mDispatcher.span.<locals>.wrapper\u001b[0;34m(func, instance, args, kwargs)\u001b[0m\n\u001b[1;32m    319\u001b[0m             _logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to reset active_span_id: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 322\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    323\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, asyncio\u001b[38;5;241m.\u001b[39mFuture):\n\u001b[1;32m    324\u001b[0m         \u001b[38;5;66;03m# If the result is a Future, wrap it\u001b[39;00m\n\u001b[1;32m    325\u001b[0m         new_future \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mensure_future(result)\n",
+      "File \u001b[0;32m~/projects/llama_index/llama-index-core/llama_index/core/llms/callbacks.py:173\u001b[0m, in \u001b[0;36mllm_chat_callback.<locals>.wrap.<locals>.wrapped_llm_chat\u001b[0;34m(_self, messages, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m event_id \u001b[38;5;241m=\u001b[39m callback_manager\u001b[38;5;241m.\u001b[39mon_event_start(\n\u001b[1;32m    165\u001b[0m     CBEventType\u001b[38;5;241m.\u001b[39mLLM,\n\u001b[1;32m    166\u001b[0m     payload\u001b[38;5;241m=\u001b[39m{\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    170\u001b[0m     },\n\u001b[1;32m    171\u001b[0m )\n\u001b[1;32m    172\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 173\u001b[0m     f_return_val \u001b[38;5;241m=\u001b[39m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_self\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    174\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    175\u001b[0m     callback_manager\u001b[38;5;241m.\u001b[39mon_event_end(\n\u001b[1;32m    176\u001b[0m         CBEventType\u001b[38;5;241m.\u001b[39mLLM,\n\u001b[1;32m    177\u001b[0m         payload\u001b[38;5;241m=\u001b[39m{EventPayload\u001b[38;5;241m.\u001b[39mEXCEPTION: e},\n\u001b[1;32m    178\u001b[0m         event_id\u001b[38;5;241m=\u001b[39mevent_id,\n\u001b[1;32m    179\u001b[0m     )\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai/base.py:379\u001b[0m, in \u001b[0;36mOpenAI.chat\u001b[0;34m(self, messages, **kwargs)\u001b[0m\n\u001b[1;32m    377\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    378\u001b[0m     chat_fn \u001b[38;5;241m=\u001b[39m completion_to_chat_decorator(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_complete)\n\u001b[0;32m--> 379\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mchat_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai/base.py:107\u001b[0m, in \u001b[0;36mllm_retry_decorator.<locals>.wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     98\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m f(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    100\u001b[0m retry \u001b[38;5;241m=\u001b[39m create_retry_decorator(\n\u001b[1;32m    101\u001b[0m     max_retries\u001b[38;5;241m=\u001b[39mmax_retries,\n\u001b[1;32m    102\u001b[0m     random_exponential\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    105\u001b[0m     max_seconds\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m    106\u001b[0m )\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:336\u001b[0m, in \u001b[0;36mBaseRetrying.wraps.<locals>.wrapped_f\u001b[0;34m(*args, **kw)\u001b[0m\n\u001b[1;32m    334\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[1;32m    335\u001b[0m wrapped_f\u001b[38;5;241m.\u001b[39mstatistics \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mstatistics  \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[0;32m--> 336\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:475\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m    473\u001b[0m retry_state \u001b[38;5;241m=\u001b[39m RetryCallState(retry_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, fn\u001b[38;5;241m=\u001b[39mfn, args\u001b[38;5;241m=\u001b[39margs, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m    474\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 475\u001b[0m     do \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    476\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n\u001b[1;32m    477\u001b[0m         \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:376\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[0;34m(self, retry_state)\u001b[0m\n\u001b[1;32m    374\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    375\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m action \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miter_state\u001b[38;5;241m.\u001b[39mactions:\n\u001b[0;32m--> 376\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[43maction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    377\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:398\u001b[0m, in \u001b[0;36mBaseRetrying._post_retry_check_actions.<locals>.<lambda>\u001b[0;34m(rs)\u001b[0m\n\u001b[1;32m    396\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_post_retry_check_actions\u001b[39m(\u001b[38;5;28mself\u001b[39m, retry_state: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetryCallState\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    397\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miter_state\u001b[38;5;241m.\u001b[39mis_explicit_retry \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miter_state\u001b[38;5;241m.\u001b[39mretry_run_result):\n\u001b[0;32m--> 398\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_add_action_func(\u001b[38;5;28;01mlambda\u001b[39;00m rs: \u001b[43mrs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutcome\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m    399\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m    401\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mafter \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.10.16/lib/python3.10/concurrent/futures/_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    449\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[1;32m    450\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[0;32m--> 451\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    453\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[1;32m    455\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.10.16/lib/python3.10/concurrent/futures/_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    401\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[1;32m    402\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 403\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[1;32m    404\u001b[0m     \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m    405\u001b[0m         \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m    406\u001b[0m         \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:478\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m    476\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n\u001b[1;32m    477\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 478\u001b[0m         result \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    479\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m:  \u001b[38;5;66;03m# noqa: B902\u001b[39;00m\n\u001b[1;32m    480\u001b[0m         retry_state\u001b[38;5;241m.\u001b[39mset_exception(sys\u001b[38;5;241m.\u001b[39mexc_info())  \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai/base.py:475\u001b[0m, in \u001b[0;36mOpenAI._chat\u001b[0;34m(self, messages, **kwargs)\u001b[0m\n\u001b[1;32m    469\u001b[0m message_dicts \u001b[38;5;241m=\u001b[39m to_openai_message_dicts(\n\u001b[1;32m    470\u001b[0m     messages,\n\u001b[1;32m    471\u001b[0m     model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel,\n\u001b[1;32m    472\u001b[0m )\n\u001b[1;32m    474\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreuse_client:\n\u001b[0;32m--> 475\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompletions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    476\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessage_dicts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    477\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    478\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_model_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    479\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    480\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    481\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m client:\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_utils/_utils.py:279\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    277\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    278\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 279\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/resources/chat/completions/completions.py:879\u001b[0m, in \u001b[0;36mCompletions.create\u001b[0;34m(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    837\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m    838\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m    839\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    876\u001b[0m     timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m    877\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ChatCompletion \u001b[38;5;241m|\u001b[39m Stream[ChatCompletionChunk]:\n\u001b[1;32m    878\u001b[0m     validate_response_format(response_format)\n\u001b[0;32m--> 879\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    880\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/chat/completions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    881\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    882\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    883\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    884\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    885\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maudio\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43maudio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    886\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    887\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfunction_call\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunction_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    888\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfunctions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunctions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    889\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogit_bias\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogit_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    890\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    891\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_completion_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_completion_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    892\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    893\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetadata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    894\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodalities\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodalities\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    895\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mn\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    896\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparallel_tool_calls\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mparallel_tool_calls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    897\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprediction\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    898\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    899\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mreasoning_effort\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mreasoning_effort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    900\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    901\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseed\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    902\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mservice_tier\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mservice_tier\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    903\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstop\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    904\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstore\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    905\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    906\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    907\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    908\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    909\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    910\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_logprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_logprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    911\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    912\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    913\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    914\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcompletion_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCompletionCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    915\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    916\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    917\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    918\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    919\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mChatCompletion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    920\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    921\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mChatCompletionChunk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    922\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_base_client.py:1296\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1282\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1283\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1284\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1291\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1292\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1293\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1294\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1295\u001b[0m     )\n\u001b[0;32m-> 1296\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_base_client.py:973\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    970\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    971\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 973\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    974\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    975\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    976\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    977\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    978\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    979\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_base_client.py:1077\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1074\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1076\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1077\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1079\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1080\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1081\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1085\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1086\u001b[0m )\n",
+      "\u001b[0;31mAuthenticationError\u001b[0m: Error code: 401 - {'error': {'message': 'Incorrect API key provided: key-KCj3*****************************************izwo. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"
+     ]
+    }
+   ],
+   "source": [
+    "from llama_index.core.chat_engine.types import ChatMessage\n",
+    "\n",
+    "llm.chat([ChatMessage(role=\"user\", content=\"what color is the sky?\")])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llama-index-VCjo73HL-py3.10",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:markdown id: tags:
+
+# Contextual GLM
+
+%% Cell type:code id: tags:
+
+``` python
+!pip install llama-index-llms-contextual
+```
+
+%% Cell type:code id: tags:
+
+``` python
+from llama_index.llms.contextual import Contextual
+from dotenv import load_dotenv
+import os
+
+# Set up the Contextual class with the required model and API key
+# Store the API key in a .env file as CONTEXTUAL_API_KEY
+load_dotenv()
+llm = Contextual(model="v1", api_key=os.getenv("CONTEXTUAL_API_KEY"))
+
+# Call the complete method with a query
+llm.complete(
+    "Explain the importance of Grounded Language Models.",
+    temperature=0.5,
+    max_tokens=1024,
+    top_p=0.9,
+    avoid_commentary=False,
+    system_prompt="You are a helpful assistant that can answer questions and help with tasks.",
+    knowledge=["The sky is blue"],
+)
+```
+
+%% Output
+
+    CompletionResponse(text="I apologize, but I am unable to provide information about Grounded Language Models. I am an AI assistant created by Contextual AI. I don't have relevant documentation about that topic, but feel free to ask me something else!", additional_kwargs={}, raw=None, logprobs=None, delta=None)
+
+%% Cell type:code id: tags:
+
+``` python
+llm.complete(
+    "what color is the sky?",
+    knowledge=["The sky is blue"],
+    avoid_commentary=False,
+    temperature=0.9,
+    max_tokens=1,
+)
+```
+
+%% Output
+
+    CompletionResponse(text='The sky is blue.', additional_kwargs={}, raw=None, logprobs=None, delta=None)
+
+%% Cell type:code id: tags:
+
+``` python
+from llama_index.core.chat_engine.types import ChatMessage
+
+llm.chat([ChatMessage(role="user", content="what color is the sky?")])
+```
+
+%% Output
+
+    ---------------------------------------------------------------------------
+    AuthenticationError                       Traceback (most recent call last)
+Cell     In[39], line 3
+          1 from llama_index.core.chat_engine.types import ChatMessage
+    ----> 3 llm.chat([ChatMessage(role="user", content="what color is the sky?")])
+File     ~/projects/llama_index/llama-index-core/llama_index/core/instrumentation/dispatcher.py:322, in Dispatcher.span.<locals>.wrapper(func, instance, args, kwargs)
+        319             _logger.debug(f"Failed to reset active_span_id: {e}")
+        321 try:
+    --> 322     result = func(*args, **kwargs)
+        323     if isinstance(result, asyncio.Future):
+        324         # If the result is a Future, wrap it
+        325         new_future = asyncio.ensure_future(result)
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai_like/base.py:117, in OpenAILike.chat(self, messages, **kwargs)
+        114     completion_response = self.complete(prompt, formatted=True, **kwargs)
+        115     return completion_response_to_chat_response(completion_response)
+    --> 117 return super().chat(messages, **kwargs)
+File     ~/projects/llama_index/llama-index-core/llama_index/core/instrumentation/dispatcher.py:322, in Dispatcher.span.<locals>.wrapper(func, instance, args, kwargs)
+        319             _logger.debug(f"Failed to reset active_span_id: {e}")
+        321 try:
+    --> 322     result = func(*args, **kwargs)
+        323     if isinstance(result, asyncio.Future):
+        324         # If the result is a Future, wrap it
+        325         new_future = asyncio.ensure_future(result)
+File     ~/projects/llama_index/llama-index-core/llama_index/core/llms/callbacks.py:173, in llm_chat_callback.<locals>.wrap.<locals>.wrapped_llm_chat(_self, messages, **kwargs)
+        164 event_id = callback_manager.on_event_start(
+        165     CBEventType.LLM,
+        166     payload={
+       (...)
+        170     },
+        171 )
+        172 try:
+    --> 173     f_return_val = f(_self, messages, **kwargs)
+        174 except BaseException as e:
+        175     callback_manager.on_event_end(
+        176         CBEventType.LLM,
+        177         payload={EventPayload.EXCEPTION: e},
+        178         event_id=event_id,
+        179     )
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai/base.py:379, in OpenAI.chat(self, messages, **kwargs)
+        377 else:
+        378     chat_fn = completion_to_chat_decorator(self._complete)
+    --> 379 return chat_fn(messages, **kwargs)
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai/base.py:107, in llm_retry_decorator.<locals>.wrapper(self, *args, **kwargs)
+         98     return f(self, *args, **kwargs)
+        100 retry = create_retry_decorator(
+        101     max_retries=max_retries,
+        102     random_exponential=True,
+       (...)
+        105     max_seconds=20,
+        106 )
+    --> 107 return retry(f)(self, *args, **kwargs)
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:336, in BaseRetrying.wraps.<locals>.wrapped_f(*args, **kw)
+        334 copy = self.copy()
+        335 wrapped_f.statistics = copy.statistics  # type: ignore[attr-defined]
+    --> 336 return copy(f, *args, **kw)
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:475, in Retrying.__call__(self, fn, *args, **kwargs)
+        473 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
+        474 while True:
+    --> 475     do = self.iter(retry_state=retry_state)
+        476     if isinstance(do, DoAttempt):
+        477         try:
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:376, in BaseRetrying.iter(self, retry_state)
+        374 result = None
+        375 for action in self.iter_state.actions:
+    --> 376     result = action(retry_state)
+        377 return result
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:398, in BaseRetrying._post_retry_check_actions.<locals>.<lambda>(rs)
+        396 def _post_retry_check_actions(self, retry_state: "RetryCallState") -> None:
+        397     if not (self.iter_state.is_explicit_retry or self.iter_state.retry_run_result):
+    --> 398         self._add_action_func(lambda rs: rs.outcome.result())
+        399         return
+        401     if self.after is not None:
+File     ~/.pyenv/versions/3.10.16/lib/python3.10/concurrent/futures/_base.py:451, in Future.result(self, timeout)
+        449     raise CancelledError()
+        450 elif self._state == FINISHED:
+    --> 451     return self.__get_result()
+        453 self._condition.wait(timeout)
+        455 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+File     ~/.pyenv/versions/3.10.16/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self)
+        401 if self._exception:
+        402     try:
+    --> 403         raise self._exception
+        404     finally:
+        405         # Break a reference cycle with the exception in self._exception
+        406         self = None
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/tenacity/__init__.py:478, in Retrying.__call__(self, fn, *args, **kwargs)
+        476 if isinstance(do, DoAttempt):
+        477     try:
+    --> 478         result = fn(*args, **kwargs)
+        479     except BaseException:  # noqa: B902
+        480         retry_state.set_exception(sys.exc_info())  # type: ignore[arg-type]
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/llama_index/llms/openai/base.py:475, in OpenAI._chat(self, messages, **kwargs)
+        469 message_dicts = to_openai_message_dicts(
+        470     messages,
+        471     model=self.model,
+        472 )
+        474 if self.reuse_client:
+    --> 475     response = client.chat.completions.create(
+        476         messages=message_dicts,
+        477         stream=False,
+        478         **self._get_model_kwargs(**kwargs),
+        479     )
+        480 else:
+        481     with client:
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_utils/_utils.py:279, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
+        277             msg = f"Missing required argument: {quote(missing[0])}"
+        278     raise TypeError(msg)
+    --> 279 return func(*args, **kwargs)
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/resources/chat/completions/completions.py:879, in Completions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
+        837 @required_args(["messages", "model"], ["messages", "model", "stream"])
+        838 def create(
+        839     self,
+       (...)
+        876     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        877 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        878     validate_response_format(response_format)
+    --> 879     return self._post(
+        880         "/chat/completions",
+        881         body=maybe_transform(
+        882             {
+        883                 "messages": messages,
+        884                 "model": model,
+        885                 "audio": audio,
+        886                 "frequency_penalty": frequency_penalty,
+        887                 "function_call": function_call,
+        888                 "functions": functions,
+        889                 "logit_bias": logit_bias,
+        890                 "logprobs": logprobs,
+        891                 "max_completion_tokens": max_completion_tokens,
+        892                 "max_tokens": max_tokens,
+        893                 "metadata": metadata,
+        894                 "modalities": modalities,
+        895                 "n": n,
+        896                 "parallel_tool_calls": parallel_tool_calls,
+        897                 "prediction": prediction,
+        898                 "presence_penalty": presence_penalty,
+        899                 "reasoning_effort": reasoning_effort,
+        900                 "response_format": response_format,
+        901                 "seed": seed,
+        902                 "service_tier": service_tier,
+        903                 "stop": stop,
+        904                 "store": store,
+        905                 "stream": stream,
+        906                 "stream_options": stream_options,
+        907                 "temperature": temperature,
+        908                 "tool_choice": tool_choice,
+        909                 "tools": tools,
+        910                 "top_logprobs": top_logprobs,
+        911                 "top_p": top_p,
+        912                 "user": user,
+        913             },
+        914             completion_create_params.CompletionCreateParams,
+        915         ),
+        916         options=make_request_options(
+        917             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        918         ),
+        919         cast_to=ChatCompletion,
+        920         stream=stream or False,
+        921         stream_cls=Stream[ChatCompletionChunk],
+        922     )
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_base_client.py:1296, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
+       1282 def post(
+       1283     self,
+       1284     path: str,
+       (...)
+       1291     stream_cls: type[_StreamT] | None = None,
+       1292 ) -> ResponseT | _StreamT:
+       1293     opts = FinalRequestOptions.construct(
+       1294         method="post", url=path, json_data=body, files=to_httpx_files(files), **options
+       1295     )
+    -> 1296     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_base_client.py:973, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
+        970 else:
+        971     retries_taken = 0
+    --> 973 return self._request(
+        974     cast_to=cast_to,
+        975     options=options,
+        976     stream=stream,
+        977     stream_cls=stream_cls,
+        978     retries_taken=retries_taken,
+        979 )
+File     ~/Library/Caches/pypoetry/virtualenvs/llama-index-VCjo73HL-py3.10/lib/python3.10/site-packages/openai/_base_client.py:1077, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
+       1074         err.response.read()
+       1076     log.debug("Re-raising status error")
+    -> 1077     raise self._make_status_error_from_response(err.response) from None
+       1079 return self._process_response(
+       1080     cast_to=cast_to,
+       1081     options=options,
+       (...)
+       1085     retries_taken=retries_taken,
+       1086 )
+    AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: key-KCj3*****************************************izwo. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
--- a/llama-index-integrations/llms/llama-index-llms-contextual/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/pyproject.toml
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.llms.contextual"
+
+[tool.llamahub.class_authors]
+Contextual = "sean-smith"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["Sean Smith <sean.smith@contextual.ai>"]
+description = "llama-index contextual integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-llms-contextual"
+readme = "README.md"
+version = "0.0.1"
+
+[tool.poetry.dependencies]
+python = ">=3.9,<4.0"
+llama-index-llms-openai-like = "^0.3.3"
+contextual-client = "^0.4.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
--- a/llama-index-integrations/llms/llama-index-llms-contextual/tests/BUILD
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/tests/BUILD
+python_sources()
--- a/llama-index-integrations/llms/llama-index-llms-contextual/tests/__init__.py
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/tests/__init__.py
--- a/llama-index-integrations/llms/llama-index-llms-contextual/tests/test.py
+++ b/llama-index-integrations/llms/llama-index-llms-contextual/tests/test.py
+from llama_index.core.base.llms.base import BaseLLM
+from llama_index.llms.contextual import Contextual
+
+
+def test_llm_class():
+    names_of_base_classes = [b.__name__ for b in Contextual.__mro__]
+    assert BaseLLM.__name__ in names_of_base_classes