From e78db2865c2f7f0a12f98f2418e88e6da06a321a Mon Sep 17 00:00:00 2001
From: Anindyadeep <proanindyadeep@gmail.com>
Date: Fri, 15 Mar 2024 04:40:25 +0530
Subject: [PATCH] Prem AI LlamaIndex integration (migrating PR from org account
 to personal account) (#11954)

---
 docs/examples/embeddings/premai.ipynb         | 182 ++++++++++
 docs/examples/llm/premai.ipynb                | 258 ++++++++++++++
 docs/module_guides/models/llms/modules.md     |   9 +
 .../llama_index/cli/upgrade/mappings.json     |   2 +
 .../core/command_line/mappings.json           |   2 +
 .../llama-index-embeddings-premai/.gitignore  | 153 ++++++++
 .../llama-index-embeddings-premai/BUILD       |   3 +
 .../llama-index-embeddings-premai/Makefile    |  17 +
 .../llama-index-embeddings-premai/README.md   |   1 +
 .../llama_index/embeddings/premai/BUILD       |   5 +
 .../llama_index/embeddings/premai/__init__.py |   3 +
 .../llama_index/embeddings/premai/base.py     |  84 +++++
 .../embeddings/premai/pyproject.toml          |  63 ++++
 .../pyproject.toml                            |  63 ++++
 .../llama-index-embeddings-premai/tests/BUILD |   3 +
 .../tests/__init__.py                         |   0
 .../tests/test_embeddings_prem.py             |   9 +
 .../llms/llama-index-llms-premai/.gitignore   | 153 ++++++++
 .../llms/llama-index-llms-premai/BUILD        |   3 +
 .../llms/llama-index-llms-premai/Makefile     |  17 +
 .../llms/llama-index-llms-premai/README.md    |   1 +
 .../llama_index/llms/premai/BUILD             |   1 +
 .../llama_index/llms/premai/__init__.py       |   3 +
 .../llama_index/llms/premai/base.py           | 332 ++++++++++++++++++
 .../llama-index-llms-premai/pyproject.toml    |  63 ++++
 .../llms/llama-index-llms-premai/tests/BUILD  |   1 +
 .../llama-index-llms-premai/tests/__init__.py |   0
 .../tests/test_llms_prem.py                   |   7 +
 28 files changed, 1438 insertions(+)
 create mode 100644 docs/examples/embeddings/premai.ipynb
 create mode 100644 docs/examples/llm/premai.ipynb
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/.gitignore
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/BUILD
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/Makefile
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/README.md
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/BUILD
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/__init__.py
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/base.py
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/pyproject.toml
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/pyproject.toml
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/BUILD
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/__init__.py
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/test_embeddings_prem.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/.gitignore
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/BUILD
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/Makefile
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/README.md
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/BUILD
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/__init__.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/base.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/pyproject.toml
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/tests/BUILD
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/tests/__init__.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-premai/tests/test_llms_prem.py

diff --git a/docs/examples/embeddings/premai.ipynb b/docs/examples/embeddings/premai.ipynb
new file mode 100644
index 0000000000..7744b5868a
--- /dev/null
+++ b/docs/examples/embeddings/premai.ipynb
@@ -0,0 +1,182 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/drive/176IfpC2akqWOhDpVSnAA_eLEbzt4a5Fw?usp=sharing\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PremAI Embeddings\n",
+    "\n",
+    ">[PremAI](https://app.premai.io) is an unified platform that let's you build powerful production-ready GenAI powered applications with least effort, so that you can focus more on user experience and overall growth. \n",
+    "\n",
+    "\n",
+    "In this section we are going to dicuss how we can get access to different embedding model using `PremEmbeddings` with llama-index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation and setup\n",
+    "\n",
+    "We start by installing llama-index and premai-sdk. You can type the following command to install:\n",
+    "\n",
+    "```bash\n",
+    "pip install premai llama-index\n",
+    "```\n",
+    "\n",
+    "Before proceeding further, please make sure that you have made an account on Prem and already started a project. If not, then here's how you can start for free:\n",
+    "\n",
+    "1. Sign in to [PremAI](https://app.premai.io/accounts/login/), if you are coming for the first time and create your API key [here](https://app.premai.io/api_keys/).\n",
+    "\n",
+    "2. Go to [app.premai.io](https://app.premai.io) and this will take you to the project's dashboard. \n",
+    "\n",
+    "3. Create a project and this will generate a project-id (written as ID). This ID will help you to interact with your deployed application. \n",
+    "\n",
+    "Congratulations on creating your first deployed application on Prem 🎉 Now we can use langchain to interact with our application. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-index-llms-premai"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.embeddings.premai import PremAIEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup PremAIEmbeddings instance in LlamaIndex \n",
+    "\n",
+    "Once we imported our required modules, let's setup our client. For now let's assume that our `project_id` is `8`. But make sure you use your project-id, otherwise it will throw error. In case of embeddings you also have to additionally pass `model`. Here are the list of available models on PremAI.\n",
+    "\n",
+    "| Provider    | Slug                                     | Context Tokens |\n",
+    "|-------------|------------------------------------------|----------------|\n",
+    "| cohere      | embed-english-v3.0                       | N/A            |\n",
+    "| openai      | text-embedding-3-small                   | 8191           |\n",
+    "| openai      | text-embedding-3-large                   | 8191           |\n",
+    "| openai      | text-embedding-ada-002                   | 8191           |\n",
+    "| replicate   | replicate/all-mpnet-base-v2              | N/A            |\n",
+    "| together    | togethercomputer/Llama-2-7B-32K-Instruct | N/A            |\n",
+    "| mistralai   | mistral-embed                            | 4096           |\n",
+    "\n",
+    "To change the model, you simply need to copy the `slug` and access your embedding model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import getpass\n",
+    "\n",
+    "if os.environ.get(\"PREMAI_API_KEY\") is None:\n",
+    "    os.environ[\"PREMAI_API_KEY\"] = getpass.getpass(\"PremAI API Key:\")\n",
+    "\n",
+    "prem_embedding = PremAIEmbeddings(\n",
+    "    project_id=8, model_name=\"text-embedding-3-large\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Calling the Embedding Model\n",
+    "\n",
+    "Now you are all set. Now let's start using our embedding model with a single query followed by multiple queries (which is also called as a document)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"Hello, this is a test query\"\n",
+    "query_result = prem_embedding.get_text_embedding(query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dimension of embeddings: 3072\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Dimension of embeddings: {len(query_result)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.02129288576543331,\n",
+       " 0.0008162345038726926,\n",
+       " -0.004556538071483374,\n",
+       " 0.02918623760342598,\n",
+       " -0.02547479420900345]"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_result[:5]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/examples/llm/premai.ipynb b/docs/examples/llm/premai.ipynb
new file mode 100644
index 0000000000..d43a6a380c
--- /dev/null
+++ b/docs/examples/llm/premai.ipynb
@@ -0,0 +1,258 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/drive/1F0V_eClPOpS_2HIW-F2mCivgqBfFo2TR?usp=sharing\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PremAI LlamaIndex\n",
+    "\n",
+    ">[PremAI](https://app.premai.io) is an unified platform that let's you build powerful production-ready GenAI powered applications with least effort, so that you can focus more on user experience and overall growth. \n",
+    "\n",
+    "\n",
+    "This example goes over how to use LlamaIndex to interact with `ChatPrem` models. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation and setup\n",
+    "\n",
+    "We start by installing llama-index and premai-sdk. You can type the following command to install:\n",
+    "\n",
+    "```bash\n",
+    "pip install premai llama-index\n",
+    "```\n",
+    "\n",
+    "Before proceeding further, please make sure that you have made an account on Prem and already started a project. If not, then here's how you can start for free:\n",
+    "\n",
+    "1. Sign in to [PremAI](https://app.premai.io/accounts/login/), if you are coming for the first time and create your API key [here](https://app.premai.io/api_keys/).\n",
+    "\n",
+    "2. Go to [app.premai.io](https://app.premai.io) and this will take you to the project's dashboard. \n",
+    "\n",
+    "3. Create a project and this will generate a project-id (written as ID). This ID will help you to interact with your deployed application. \n",
+    "\n",
+    "4. Head over to LaunchPad (the one with 🚀 icon). And there deploy your model of choice. Your default model will be `gpt-4`. You can also set and fix different generation paramters (like: max-tokens, temperature etc) and also pre-set your system prompt. \n",
+    "\n",
+    "Congratulations on creating your first deployed application on Prem 🎉 Now we can use llama-index to interact with our application. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-index-llms-premai"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.premai import PremAI\n",
+    "from llama_index.core.llms import ChatMessage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup ChatPrem instance in LlamaIndex \n",
+    "\n",
+    "Once we imported our required modules, let's setup our client. For now let's assume that our `project_id` is `8`. But make sure you use your project-id, otherwise it will throw error.\n",
+    "\n",
+    "In order to use llama-index with PremAI, you do not need to pass any model name or set any parameters with our chat-client. All of those will use the default model name and paramters of the LaunchPad model. \n",
+    "\n",
+    "`NOTE:` If you change the `model` or any other parameter like `temperature` while setting the client, it will override existing default configurations. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import getpass\n",
+    "\n",
+    "if os.environ.get(\"PREMAI_API_KEY\") is None:\n",
+    "    os.environ[\"PREMAI_API_KEY\"] = getpass.getpass(\"PremAI API Key:\")\n",
+    "\n",
+    "prem_chat = PremAI(project_id=8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Calling the Model\n",
+    "\n",
+    "Now you are all set. We can now start with interacting with our application. Let's start by building simple chat request and responses using llama-index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "messages = [\n",
+    "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
+    "    ChatMessage(\n",
+    "        role=\"user\", content=\"Write an essay about your school in 500 words\"\n",
+    "    ),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Please note that: You can provide system prompt here too, like this:\n",
+    "\n",
+    "```python\n",
+    "messages = [\n",
+    "    ChatMessage(role=\"system\", content=\"Act like a pirate\"),\n",
+    "    ChatMessage(role=\"user\", content=\"What is your name\"),\n",
+    "    ChatMessage(role=\"user\", content=\"Where do you live, write an essay in 500 words\"),\n",
+    "]\n",
+    "```\n",
+    "\n",
+    "On the other hand you can also instantiate your object with system prompt like this:\n",
+    "\n",
+    "```python\n",
+    "chat = PremAI(project_id=8, system_prompt=\"Act like nemo fish\")\n",
+    "```\n",
+    "\n",
+    "In both of the scenarios, you are going to override your system prompt that was fixed while deploying the application from the platform. And, specifically in this case, if you override system prompt while instantiating the `PremAI` class then system message in `ChatMessage` won't provide any affect. \n",
+    "\n",
+    "So if you want to override system prompt for any experimental cases, either you need to provide that while instantiating the class or while writing `ChatMessage` with a role `system`. \n",
+    "\n",
+    "\n",
+    "Now let's call the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content=\"I'm here to assist you with any questions or tasks you have, but I'm not able to write essays. However, if you need help brainstorming ideas or organizing your thoughts for your essay about your school, I'd be happy to help with that. Just let me know how I can assist you further!\", additional_kwargs={}), raw={'role': <RoleEnum.ASSISTANT: 'assistant'>, 'content': \"I'm here to assist you with any questions or tasks you have, but I'm not able to write essays. However, if you need help brainstorming ideas or organizing your thoughts for your essay about your school, I'd be happy to help with that. Just let me know how I can assist you further!\"}, delta=None, additional_kwargs={})]\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = prem_chat.chat(messages)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also convert your chat function to a completion function. Here's how it works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "completion = prem_chat.complete(\"Paul Graham is \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Streaming \n",
+    "\n",
+    "In this section, let's see how we can stream tokens using llama-index and PremAI. It is very similar to above methos. Here's how you do it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I'm here to assist you with writing tasks, but I don't have personal experiences or attend school. However, I can help you brainstorm ideas, outline your essay, or provide information on various school-related topics. Just let me know how I can assist you further!"
+     ]
+    }
+   ],
+   "source": [
+    "streamed_response = prem_chat.stream_chat(messages)\n",
+    "\n",
+    "for response_delta in streamed_response:\n",
+    "    print(response_delta.delta, end=\"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And this will stream tokens one after the other. Similar to `complete` method, we have `stream_complete` method which does streaming of tokens for completion. Here's how do that."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hello! I'm here and ready to assist you. How can I help you today?"
+     ]
+    }
+   ],
+   "source": [
+    "# This will stream tokens one by one\n",
+    "\n",
+    "streamed_response = prem_chat.stream_complete(\"hello how are you\")\n",
+    "\n",
+    "for response_delta in streamed_response:\n",
+    "    print(response_delta.delta, end=\"\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/module_guides/models/llms/modules.md b/docs/module_guides/models/llms/modules.md
index f3a8704e60..2e169ba263 100644
--- a/docs/module_guides/models/llms/modules.md
+++ b/docs/module_guides/models/llms/modules.md
@@ -197,6 +197,15 @@ maxdepth: 1
 /cookbooks/mistralai.ipynb
 ```
 
+## PremAI
+
+```{toctree}
+---
+maxdepth: 1
+---
+/examples/llm/premai.ipynb
+```
+
 ## MonsterAPI
 
 ```{toctree}
diff --git a/llama-index-cli/llama_index/cli/upgrade/mappings.json b/llama-index-cli/llama_index/cli/upgrade/mappings.json
index 45d01daa27..2832442a98 100644
--- a/llama-index-cli/llama_index/cli/upgrade/mappings.json
+++ b/llama-index-cli/llama_index/cli/upgrade/mappings.json
@@ -510,6 +510,7 @@
   "GeminiEmbedding": "llama_index.embeddings.gemini",
   "FastEmbedEmbedding": "llama_index.embeddings.fastembed",
   "FireworksEmbedding": "llama_index.embeddings.fireworks",
+  "PremAIEmbeddings": "llama_index.embeddings.premai",
   "InstructorEmbedding": "llama_index.embeddings.instructor",
   "OptimumEmbedding": "llama_index.embeddings.huggingface_optimum",
   "LangchainEmbedding": "llama_index.embeddings.langchain",
@@ -810,6 +811,7 @@
   "VllmServer": "llama_index.llms.vllm",
   "Maritalk": "llama_index.llms.maritalk",
   "Groq": "llama_index.llms.groq",
+  "PremAI": "llama_index.llms.premai",
   "Neutrino": "llama_index.llms.neutrino",
   "MonsterLLM": "llama_index.llms.monsterapi",
   "PredibaseLLM": "llama_index.llms.predibase",
diff --git a/llama-index-core/llama_index/core/command_line/mappings.json b/llama-index-core/llama_index/core/command_line/mappings.json
index 378f8498e5..6073c967a5 100644
--- a/llama-index-core/llama_index/core/command_line/mappings.json
+++ b/llama-index-core/llama_index/core/command_line/mappings.json
@@ -510,6 +510,7 @@
   "IntelEmbedding": "llama_index.embeddings.huggingface_optimum_intel",
   "JinaEmbedding": "llama_index.embeddings.jinaai",
   "NomicEmbedding": "llama_index.embeddings.nomic",
+  "PremAIEmbeddings": "llama_index.embeddings.premai",
   "GeminiEmbedding": "llama_index.embeddings.gemini",
   "FastEmbedEmbedding": "llama_index.embeddings.fastembed",
   "FireworksEmbedding": "llama_index.embeddings.fireworks",
@@ -808,6 +809,7 @@
   "OpenLLMAPI": "llama_index.llms.openllm",
   "LiteLLM": "llama_index.llms.litellm",
   "SageMakerLLM": "llama_index.llms.sagemaker_endpoint",
+  "PremAI": "llama_index.llms.premai",
   "Portkey": "llama_index.llms.portkey",
   "Vllm": "llama_index.llms.vllm",
   "VllmServer": "llama_index.llms.vllm",
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-premai/.gitignore
new file mode 100644
index 0000000000..990c18de22
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-premai/BUILD
new file mode 100644
index 0000000000..0896ca890d
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-premai/Makefile
new file mode 100644
index 0000000000..b9eab05aa3
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-premai/README.md
new file mode 100644
index 0000000000..fa2c3eb200
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/README.md
@@ -0,0 +1 @@
+# LlamaIndex Embeddings Integration: PremAI
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/BUILD
new file mode 100644
index 0000000000..131a9f87d8
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/BUILD
@@ -0,0 +1,5 @@
+poetry_requirements(
+    name="poetry",
+)
+
+python_sources()
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/__init__.py
new file mode 100644
index 0000000000..c7536f877a
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.embeddings.premai.base import PremAIEmbeddings
+
+__all__ = ["PremAIEmbeddings"]
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/base.py
new file mode 100644
index 0000000000..30ebbf1ce3
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/base.py
@@ -0,0 +1,84 @@
+"""PremAI embeddings file."""
+
+from typing import Any, List, Optional
+
+from llama_index.core.base.embeddings.base import (
+    BaseEmbedding,
+)
+from llama_index.core.bridge.pydantic import PrivateAttr
+from llama_index.core.callbacks.base import CallbackManager
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
+from llama_index.core.bridge.pydantic import Field
+
+from premai import Prem
+
+
+class PremAIEmbeddings(BaseEmbedding):
+    """Class for PremAI embeddings."""
+
+    project_id: int = Field(
+        description=(
+            "The project ID in which the experiments or deployments are carried out. can find all your projects here: https://app.premai.io/projects/"
+        )
+    )
+    premai_api_key: Optional[str] = Field(
+        description="Prem AI API Key. Get it here: https://app.premai.io/api_keys/"
+    )
+
+    model_name: str = Field(
+        description=("The Embedding model to choose from"),
+    )
+
+    # Instance variables initialized via Pydantic's mechanism
+    _premai_client: "Prem" = PrivateAttr()
+
+    def __init__(
+        self,
+        project_id: int,
+        model_name: str,
+        premai_api_key: Optional[str] = None,
+        callback_manager: Optional[CallbackManager] = None,
+        **kwargs: Any,
+    ):
+        api_key = get_from_param_or_env("api_key", premai_api_key, "PREMAI_API_KEY", "")
+
+        if not api_key:
+            raise ValueError(
+                "You must provide an API key to use PremAI. "
+                "You can either pass it in as an argument or set it `PREMAI_API_KEY`."
+            )
+        self._premai_client = Prem(api_key=api_key)
+        super().__init__(
+            project_id=project_id,
+            model_name=model_name,
+            callback_manager=callback_manager,
+            **kwargs,
+        )
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "PremAIEmbeddings"
+
+    def _get_query_embedding(self, query: str) -> List[float]:
+        """Get query embedding."""
+        embedding_response = self._premai_client.embeddings.create(
+            project_id=self.project_id, model=self.model_name, input=query
+        )
+        return embedding_response.data[0].embedding
+
+    async def _aget_query_embedding(self, query: str) -> List[float]:
+        raise NotImplementedError("Async calls are not available in this version.")
+
+    def _get_text_embedding(self, text: str) -> List[float]:
+        """Get text embedding."""
+        embedding_response = self._premai_client.embeddings.create(
+            project_id=self.project_id, model=self.model_name, input=[text]
+        )
+        return embedding_response.data[0].embedding
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get text embeddings."""
+        embeddings = self._premai_client.embeddings.create(
+            self, model=self.model_name, project_id=self.project_id, input=texts
+        ).data
+        return [embedding.embedding for embedding in embeddings]
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/pyproject.toml
new file mode 100644
index 0000000000..d31a328fd7
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.embeddings.premai"
+
+[tool.llamahub.class_authors]
+PremAIEmbeddings = "llama-index"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["PremAI"]
+description = "llama-index embeddings premai integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-embeddings-premai"
+readme = "README.md"
+version = "0.1.3"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.10.1"
+premai = "^0.3.20"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-premai/pyproject.toml
new file mode 100644
index 0000000000..d31a328fd7
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.embeddings.premai"
+
+[tool.llamahub.class_authors]
+PremAIEmbeddings = "llama-index"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["PremAI"]
+description = "llama-index embeddings premai integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-embeddings-premai"
+readme = "README.md"
+version = "0.1.3"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.10.1"
+premai = "^0.3.20"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/BUILD
new file mode 100644
index 0000000000..ca21d4d9cc
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/BUILD
@@ -0,0 +1,3 @@
+python_tests(
+    dependencies=["llama-index-integrations/embeddings/llama-index-embeddings-premai/llama_index/embeddings/premai/base.py"]
+)
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/test_embeddings_prem.py b/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/test_embeddings_prem.py
new file mode 100644
index 0000000000..e57b8f1d7d
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-premai/tests/test_embeddings_prem.py
@@ -0,0 +1,9 @@
+from llama_index.core.base.embeddings.base import BaseEmbedding
+from llama_index.embeddings.premai import PremAIEmbeddings
+
+
+def test_embedding_class():
+    emb = PremAIEmbeddings(
+        project_id=8, model_name="text-embedding-3-large", premai_api_key="test"
+    )
+    assert isinstance(emb, BaseEmbedding)
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/.gitignore b/llama-index-integrations/llms/llama-index-llms-premai/.gitignore
new file mode 100644
index 0000000000..990c18de22
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/BUILD b/llama-index-integrations/llms/llama-index-llms-premai/BUILD
new file mode 100644
index 0000000000..0896ca890d
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/Makefile b/llama-index-integrations/llms/llama-index-llms-premai/Makefile
new file mode 100644
index 0000000000..b9eab05aa3
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/README.md b/llama-index-integrations/llms/llama-index-llms-premai/README.md
new file mode 100644
index 0000000000..7a9b8f740b
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/README.md
@@ -0,0 +1 @@
+# LlamaIndex Llms Integration: PremAI
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/BUILD b/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/BUILD
new file mode 100644
index 0000000000..db46e8d6c9
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/__init__.py b/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/__init__.py
new file mode 100644
index 0000000000..c8dc52eadd
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.llms.premai.base import PremAI
+
+__all__ = ["PremAI"]
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/base.py b/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/base.py
new file mode 100644
index 0000000000..94c4b20f9e
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/llama_index/llms/premai/base.py
@@ -0,0 +1,332 @@
+"""PremAI's API integration with llama-index to interact with deployed projects."""
+
+from typing import Any, Dict, Optional, Sequence, Callable
+
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
+from llama_index.core.types import BaseOutputParser, PydanticProgramMode
+from llama_index.core.base.llms.generic_utils import (
+    chat_to_completion_decorator,
+    get_from_param_or_env,
+    stream_chat_to_completion_decorator,
+)
+
+from llama_index.core.llms.callbacks import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.callbacks import CallbackManager
+from llama_index.core.llms.callbacks import llm_chat_callback
+from llama_index.core.llms.llm import LLM
+
+from premai import Prem
+
+
+# FIXME: The current version does not support stop tokens and number of responses i.e. n > 1
+# TODO: Fetch the default values from prem-sdk
+
+
+class ChatPremError(Exception):
+    pass
+
+
+class PremAI(LLM):
+    """PremAI LLM Provider."""
+
+    project_id: int = Field(
+        description=(
+            "The project ID in which the experiments or deployments are carried out. can find all your projects here: https://app.premai.io/projects/"
+        )
+    )
+
+    session_id: Optional[str] = Field(
+        description="The ID of the session to use. It helps to track the chat history."
+    )
+
+    premai_api_key: Optional[str] = Field(
+        description="Prem AI API Key. Get it here: https://app.premai.io/api_keys/"
+    )
+
+    model: Optional[str] = Field(
+        description=(
+            "Name of the model. This is an optional parameter. The default model is the one deployed from Prem's LaunchPad. An example: https://app.premai.io/projects/<project-id>/launchpad. If model name is other than default model then it will override the calls from the model deployed from launchpad."
+        ),
+    )
+    system_prompt: Optional[str] = Field(
+        description=(
+            "System prompts helps the model to guide the generation and the way it acts. Default system prompt is the one set on your deployed LaunchPad model under the specified project."
+        ),
+    )
+
+    max_tokens: Optional[int] = Field(
+        description=("The max number of tokens to output from the LLM. ")
+    )
+
+    temperature: Optional[float] = Field(
+        description="Model temperature. Value should be >= 0 and <= 1.0"
+    )
+
+    top_p: Optional[float] = Field(
+        description="top_p adjusts the number of choices for each predicted tokens based on cumulative probabilities. Value should be ranging between 0.0 and 1.0."
+    )
+
+    max_retries: Optional[int] = Field(
+        description="Max number of retries to call the API"
+    )
+
+    tools: Optional[Dict[str, Any]] = Field(
+        description="A list of tools the model may call. Currently, only functions are supported as a tool"
+    )
+
+    frequency_penalty: Optional[float] = Field(
+        description=(
+            "Number between -2.0 and 2.0. Positive values penalize new tokens based."
+        ),
+    )
+
+    presence_penalty: Optional[float] = Field(
+        description=(
+            "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far."
+        ),
+    )
+
+    logit_bias: Optional[dict] = Field(
+        description=(
+            "JSON object that maps tokens to an associated bias value from -100 to 100."
+        ),
+    )
+
+    seed: Optional[int] = Field(
+        description=(
+            "This feature is in Beta. If specified, our system will make a best effort to sample deterministically."
+        ),
+    )
+
+    _client: "Prem" = PrivateAttr()
+
+    def __init__(
+        self,
+        project_id: int,
+        premai_api_key: Optional[str] = None,
+        session_id: Optional[int] = None,
+        model: Optional[str] = None,
+        system_prompt: Optional[str] = None,
+        max_tokens: Optional[str] = 128,
+        temperature: Optional[float] = 0.1,
+        top_p: Optional[float] = 0.7,
+        max_retries: Optional[int] = 1,
+        tools: Optional[Dict[str, Any]] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        logit_bias: Optional[dict] = None,
+        seed: Optional[int] = None,
+        additional_kwargs: Optional[Dict[str, Any]] = None,
+        callback_manager: Optional[CallbackManager] = None,
+        messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
+        completion_to_prompt: Optional[Callable[[str], str]] = None,
+        pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
+        output_parser: Optional[BaseOutputParser] = None,
+    ):
+        additional_kwargs = additional_kwargs or {}
+        callback_manager = callback_manager or CallbackManager([])
+
+        api_key = get_from_param_or_env("api_key", premai_api_key, "PREMAI_API_KEY", "")
+
+        if not api_key:
+            raise ValueError(
+                "You must provide an API key to use premai. "
+                "You can either pass it in as an argument or set it `PREMAI_API_KEY`. You can get your API key here: https://app.premai.io/api_keys/"
+            )
+
+        self._client = Prem(api_key=api_key)
+
+        super().__init__(
+            project_id=project_id,
+            session_id=session_id,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            model=model,
+            api_key=api_key,
+            callback_manager=callback_manager,
+            top_p=top_p,
+            system_prompt=system_prompt,
+            additional_kwargs=additional_kwargs,
+            logit_bias=logit_bias,
+            messages_to_prompt=messages_to_prompt,
+            completion_to_prompt=completion_to_prompt,
+            pydantic_program_mode=pydantic_program_mode,
+            output_parser=output_parser,
+            seed=seed,
+            max_retries=max_retries,
+            tools=tools,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+        )
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "PremAI_LLM"
+
+    @property
+    def metadata(self) -> LLMMetadata:
+        # TODO: We need to fetch information from prem-sdk here
+        return LLMMetadata(
+            num_output=self.max_tokens,
+            is_chat_model=True,
+            temperature=self.temperature,
+            top_p=self.top_p,
+        )
+
+    @property
+    def _model_kwargs(self) -> Dict[str, Any]:
+        return {
+            "model": self.model,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "seed": self.seed,
+            "top_p": self.top_p,
+            "system_prompt": self.system_prompt,
+            "logit_bias": self.logit_bias,
+            "tools": self.tools,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.presence_penalty,
+        }
+
+    def _get_all_kwargs(self, **kwargs) -> Dict[str, Any]:
+        all_kwargs = {**self._model_kwargs, **kwargs}
+        _keys_that_cannot_be_none = [
+            "system_prompt",
+            "frequency_penalty",
+            "presence_penalty",
+            "tools",
+            "model",
+        ]
+
+        for key in _keys_that_cannot_be_none:
+            if all_kwargs.get(key) is None:
+                all_kwargs.pop(key, None)
+        return all_kwargs
+
+    @llm_chat_callback()
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        all_kwargs = self._get_all_kwargs(**kwargs)
+        chat_messages = []
+
+        for message in messages:
+            if "system_prompt" in all_kwargs and message.role.value == "system":
+                continue
+
+            elif "system_prompt" not in all_kwargs and message.role.value == "system":
+                all_kwargs["system_prompt"] = message.content
+            else:
+                chat_messages.append(
+                    {"role": message.role.value, "content": message.content}
+                )
+        response = self._client.chat.completions.create(
+            project_id=self.project_id, messages=chat_messages, **all_kwargs
+        )
+        if not response.choices:
+            raise ChatPremError("ChatResponse must have at least one candidate")
+
+        chat_responses: Sequence[ChatResponse] = []
+
+        for choice in response.choices:
+            role = choice.message.role
+            if role is None:
+                raise ChatPremError(f"ChatResponse {choice} must have a role.")
+            content = choice.message.content or ""
+            chat_responses.append(
+                ChatResponse(
+                    message=ChatMessage(role=role, content=content),
+                    raw={"role": role, "content": content},
+                )
+            )
+
+        if "is_completion" in kwargs:
+            return chat_responses[0]
+
+        return chat_responses
+
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        all_kwargs = self._get_all_kwargs(**kwargs)
+        chat_messages = []
+
+        for message in messages:
+            if "system_prompt" in all_kwargs and message.role.value == "system":
+                continue
+
+            elif "system_prompt" not in all_kwargs and message.role.value == "system":
+                all_kwargs["system_prompt"] = message.content
+            else:
+                chat_messages.append(
+                    {"role": message.role.value, "content": message.content}
+                )
+
+        response_generator = self._client.chat.completions.create(
+            project_id=self.project_id,
+            messages=chat_messages,
+            stream=True,
+            **all_kwargs,
+        )
+
+        def gen() -> ChatResponseGen:
+            content = ""
+            role = MessageRole.ASSISTANT
+            for chunk in response_generator:
+                delta = chunk.choices[0].delta
+                if delta is None or delta["content"] is None:
+                    continue
+
+                chunk_content = delta["content"]
+                content += chunk_content
+
+                yield ChatResponse(
+                    message=ChatMessage(content=content, role=role), delta=chunk_content
+                )
+
+        return gen()
+
+    def achat(self):
+        raise NotImplementedError(
+            "Current version of premai does not support async calls."
+        )
+
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        complete_fn = chat_to_completion_decorator(self.chat)
+        kwargs["is_completion"] = True
+        return complete_fn(prompt, **kwargs)
+
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        stream_complete_fn = stream_chat_to_completion_decorator(self.stream_chat)
+        return stream_complete_fn(prompt, **kwargs)
+
+    def acomplete(self):
+        raise NotImplementedError(
+            "Current version of premai does not support async calls."
+        )
+
+    def astream_complete(self):
+        raise NotImplementedError(
+            "Current version of premai does not support async calls."
+        )
+
+    def astream_chat(self):
+        raise NotImplementedError(
+            "Current version of premai does not support async calls."
+        )
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-premai/pyproject.toml
new file mode 100644
index 0000000000..3ec50f7f84
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.llms.premai"
+
+[tool.llamahub.class_authors]
+PremAI = "premai"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["PremAI"]
+description = "llama-index llms Prem AI integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-llms-premai"
+readme = "README.md"
+version = "0.1.4"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.10.1"
+premai = "^0.3.20"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-premai/tests/BUILD
new file mode 100644
index 0000000000..dabf212d7e
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-premai/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/llama-index-integrations/llms/llama-index-llms-premai/tests/test_llms_prem.py b/llama-index-integrations/llms/llama-index-llms-premai/tests/test_llms_prem.py
new file mode 100644
index 0000000000..92992299cf
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-premai/tests/test_llms_prem.py
@@ -0,0 +1,7 @@
+from llama_index.core.base.llms.base import BaseLLM
+from llama_index.llms.premai import PremAI
+
+
+def test_embedding_class():
+    names_of_base_classes = [b.__name__ for b in PremAI.__mro__]
+    assert BaseLLM.__name__ in names_of_base_classes
-- 
GitLab