From bc0c28e895d964565bb7a7ef61d14b28e939bb0e Mon Sep 17 00:00:00 2001
From: Ulan Yisaev <55792896+ulan-yisaev@users.noreply.github.com>
Date: Fri, 22 Mar 2024 05:51:40 +0200
Subject: [PATCH] Aleph alpha llm (#12149)

* Added Aleph Alpha integration

* formatted and linted

* - fixed llm notebook;
- added additional request params like log_probs;
- added more tests

* how to access additional information such as log probabilities

* used built-in logprobs

---------

Co-authored-by: Ulan.Yisaev <ulan.yisaev@nortal.com>
---
 docs/examples/embeddings/alephalpha.ipynb     | 164 +++++++++
 docs/examples/llm/alephalpha.ipynb            | 318 ++++++++++++++++++
 .../.gitignore                                | 153 +++++++++
 .../llama-index-embeddings-alephalpha/BUILD   |   3 +
 .../Makefile                                  |  17 +
 .../README.md                                 |  49 +++
 .../llama_index/embeddings/alephalpha/BUILD   |   1 +
 .../embeddings/alephalpha/__init__.py         |   3 +
 .../llama_index/embeddings/alephalpha/base.py | 224 ++++++++++++
 .../pyproject.toml                            |  63 ++++
 .../tests/BUILD                               |   3 +
 .../tests/__init__.py                         |   0
 .../tests/test_embeddings_alephalpha.py       |   7 +
 .../llama-index-llms-alephalpha/.gitignore    | 153 +++++++++
 .../llms/llama-index-llms-alephalpha/BUILD    |   3 +
 .../llms/llama-index-llms-alephalpha/Makefile |  17 +
 .../llama-index-llms-alephalpha/README.md     |  61 ++++
 .../llama_index/llms/alephalpha/BUILD         |   1 +
 .../llama_index/llms/alephalpha/__init__.py   |   3 +
 .../llama_index/llms/alephalpha/base.py       | 292 ++++++++++++++++
 .../llama_index/llms/alephalpha/utils.py      |  79 +++++
 .../pyproject.toml                            |  66 ++++
 .../llama-index-llms-alephalpha/tests/BUILD   |   1 +
 .../tests/__init__.py                         |   0
 .../tests/test_llms_alephalpha.py             |  65 ++++
 25 files changed, 1746 insertions(+)
 create mode 100644 docs/examples/embeddings/alephalpha.ipynb
 create mode 100644 docs/examples/llm/alephalpha.ipynb
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/.gitignore
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/BUILD
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/Makefile
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/README.md
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/BUILD
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/__init__.py
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/base.py
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/pyproject.toml
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/BUILD
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/__init__.py
 create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/test_embeddings_alephalpha.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/.gitignore
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/BUILD
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/Makefile
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/README.md
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/BUILD
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/__init__.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/base.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/utils.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/pyproject.toml
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/tests/BUILD
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/tests/__init__.py
 create mode 100644 llama-index-integrations/llms/llama-index-llms-alephalpha/tests/test_llms_alephalpha.py

diff --git a/docs/examples/embeddings/alephalpha.ipynb b/docs/examples/embeddings/alephalpha.ipynb
new file mode 100644
index 0000000000..2ba14c0d3d
--- /dev/null
+++ b/docs/examples/embeddings/alephalpha.ipynb
@@ -0,0 +1,164 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/ulan-yisaev/llama_index/blob/main/docs/examples/embeddings/alephalpha.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Aleph Alpha Embeddings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-index-embeddings-alephalpha"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install llama-index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialise with your AA token\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"AA_TOKEN\"] = \"your_token_here\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### With `luminous-base` embeddings.\n",
+    "\n",
+    "- representation=\"Document\": Use this for texts (documents) you want to store in your vector database\n",
+    "- representation=\"Query\": Use this for search queries to find the most relevant documents in your vector database\n",
+    "- representation=\"Symmetric\": Use this for clustering, classification, anomaly detection or visualisation tasks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "representation_enum: SemanticRepresentation.Query\n",
+      "\n",
+      "\n",
+      "5120\n",
+      "[0.14257812, 2.59375, 0.33203125, -0.33789062, -0.94140625]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from llama_index.embeddings.alephalpha import AlephAlphaEmbedding\n",
+    "\n",
+    "# To customize your token, do this\n",
+    "# otherwise it will lookup AA_TOKEN from your env variable\n",
+    "# embed_model = AlephAlpha(token=\"<aa_token>\")\n",
+    "\n",
+    "# with representation='query'\n",
+    "embed_model = AlephAlphaEmbedding(\n",
+    "    model=\"luminous-base\",\n",
+    "    representation=\"Query\",\n",
+    ")\n",
+    "\n",
+    "embeddings = embed_model.get_text_embedding(\"Hello Aleph Alpha!\")\n",
+    "\n",
+    "print(len(embeddings))\n",
+    "print(embeddings[:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "representation_enum: SemanticRepresentation.Document\n",
+      "\n",
+      "\n",
+      "5120\n",
+      "[0.14257812, 2.59375, 0.33203125, -0.33789062, -0.94140625]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# with representation='Document'\n",
+    "embed_model = AlephAlphaEmbedding(\n",
+    "    model=\"luminous-base\",\n",
+    "    representation=\"Document\",\n",
+    ")\n",
+    "\n",
+    "embeddings = embed_model.get_text_embedding(\"Hello Aleph Alpha!\")\n",
+    "\n",
+    "print(len(embeddings))\n",
+    "print(embeddings[:5])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "64bcadabe4cd61f3d117ba0da9d14bf2f8e35582ff79e821f2e71056f2723d1e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/examples/llm/alephalpha.ipynb b/docs/examples/llm/alephalpha.ipynb
new file mode 100644
index 0000000000..8425edb618
--- /dev/null
+++ b/docs/examples/llm/alephalpha.ipynb
@@ -0,0 +1,318 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6453d3d5",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/ulan-yisaev/llama_index/blob/main/docs/examples/llm/alephalpha.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "731d44db9e0d236c",
+   "metadata": {},
+   "source": [
+    "# Aleph Alpha\n",
+    "\n",
+    "Aleph Alpha is a powerful language model that can generate human-like text. Aleph Alpha is capable of generating text in multiple languages and styles, and can be fine-tuned to generate text in specific domains."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c78b172f",
+   "metadata": {},
+   "source": [
+    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e31874a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-index-llms-alephalpha"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50fc1a30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install llama-index"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f2a12560669efa6",
+   "metadata": {},
+   "source": [
+    "#### Set your Aleph Alpha token\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85fbba23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"AA_TOKEN\"] = \"your_token_here\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b81a3ef6-2ee5-460d-9aa4-f73708774014",
+   "metadata": {},
+   "source": [
+    "#### Call `complete` with a prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "910b50ad-c55e-487e-8808-5905dfaa78b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.alephalpha import AlephAlpha\n",
+    "\n",
+    "# To customize your token, do this\n",
+    "# otherwise it will lookup AA_TOKEN from your env variable\n",
+    "# llm = AlephAlpha(token=\"<aa_token>\")\n",
+    "llm = AlephAlpha(model=\"luminous-base-control\")\n",
+    "\n",
+    "resp = llm.complete(\"Paul Graham is \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfda925e-89c5-47a6-9311-16916ab08b66",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " a well-known computer scientist and entrepreneur. He is the co-founder of PayPal and a co-founder of the Y Combinator startup accelerator. He has also co-authored the book \"Programming the Web\". Paul Graham is also a frequent speaker and writer on topics related to computer science, entrepreneurship, and startups. He has written several blog posts on the topic of \"Why Startups Fail\". In this post, I will summarize some of the key points from Paul Graham's blog post on why startups fail.\n",
+      "\n",
+      "1. Lack of a clear vision: Startups often lack a clear vision of what they\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(resp)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "413fbe0a60a8e449",
+   "metadata": {},
+   "source": [
+    "#### Additional Response Details\n",
+    "To access detailed response information such as log probabilities, ensure your AlephAlpha instance is initialized with the `log_probs` parameter. The `logprobs` attribute of the `CompletionResponse` will contain this data. Other details like the model version and raw completion text can be accessed directly if they're part of the response or via `additional_kwargs`.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de81c4b2e2c36894",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Log Probabilities:\n",
+      "Token:  a, LogProb: -0.95955\n",
+      "Token:  well, LogProb: -1.9219251\n",
+      "Token: -, LogProb: -0.1312752\n",
+      "Token: known, LogProb: -0.022855662\n",
+      "Token:  computer, LogProb: -0.9569155\n",
+      "Token:  scientist, LogProb: -0.06721641\n",
+      "Token:  and, LogProb: -0.56296504\n",
+      "Token:  entrepreneur, LogProb: -0.65574974\n",
+      "Token: ., LogProb: -0.5926046\n",
+      "Token:  He, LogProb: -0.1885516\n",
+      "Token:  is, LogProb: -0.3927348\n",
+      "Token:  the, LogProb: -0.46820825\n",
+      "Token:  co, LogProb: -0.465878\n",
+      "Token: -, LogProb: -0.024082167\n",
+      "Token: founder, LogProb: -0.009869587\n",
+      "Token:  of, LogProb: -0.31641242\n",
+      "Token:  PayPal, LogProb: -1.0825713\n",
+      "Token:  and, LogProb: -0.39408743\n",
+      "Token:  a, LogProb: -1.45493\n",
+      "Token:  co, LogProb: -1.0837904\n",
+      "Token: -, LogProb: -0.0011430404\n",
+      "Token: founder, LogProb: -0.074010715\n",
+      "Token:  of, LogProb: -0.038962167\n",
+      "Token:  the, LogProb: -1.7761776\n",
+      "Token:  Y, LogProb: -0.41853565\n",
+      "Token:  Combin, LogProb: -0.17868777\n",
+      "Token: ator, LogProb: -2.0265374e-05\n",
+      "Token:  startup, LogProb: -0.24595682\n",
+      "Token:  acceler, LogProb: -0.5855012\n",
+      "Token: ator, LogProb: -6.675698e-06\n",
+      "Token: ., LogProb: -0.022597663\n",
+      "Token:  He, LogProb: -0.8310143\n",
+      "Token:  has, LogProb: -1.5842702\n",
+      "Token:  also, LogProb: -0.5774656\n",
+      "Token:  been, LogProb: -1.3938092\n",
+      "Token:  a, LogProb: -0.67207164\n",
+      "Token:  professor, LogProb: -1.0511048\n",
+      "Token:  at, LogProb: -0.13273911\n",
+      "Token:  the, LogProb: -0.7993539\n",
+      "Token:  MIT, LogProb: -1.2281163\n",
+      "Token:  Media, LogProb: -0.7707413\n",
+      "Token:  Lab, LogProb: -0.06716257\n",
+      "Token: ., LogProb: -0.9140582\n",
+      "Token:  Paul, LogProb: -0.8244309\n",
+      "Token:  Graham, LogProb: -0.15202633\n",
+      "Token:  has, LogProb: -1.3735206\n",
+      "Token:  written, LogProb: -0.77148163\n",
+      "Token:  several, LogProb: -0.7167357\n",
+      "Token:  books, LogProb: -0.24542983\n",
+      "Token:  on, LogProb: -0.77700675\n",
+      "Token:  computer, LogProb: -0.8485363\n",
+      "Token:  science, LogProb: -0.026196867\n",
+      "Token:  and, LogProb: -0.4796574\n",
+      "Token:  entrepreneurs, LogProb: -0.48952234\n",
+      "Token: hip, LogProb: -1.0847986e-05\n",
+      "Token: ,, LogProb: -0.1426171\n",
+      "Token:  including, LogProb: -0.10799221\n",
+      "Token:  \", LogProb: -0.4733107\n",
+      "Token: Program, LogProb: -0.9295699\n",
+      "Token: ming, LogProb: -0.00090034\n",
+      "Token:  the, LogProb: -1.5219054\n",
+      "Token:  Universe, LogProb: -1.2475122\n",
+      "Token: \", LogProb: -0.8377396\n",
+      "Token:  and, LogProb: -0.014596111\n",
+      "Token:  \", LogProb: -0.0034322182\n",
+      "Token: The, LogProb: -0.97810173\n",
+      "Token:  Art, LogProb: -1.4708842\n",
+      "Token:  of, LogProb: -0.0017665509\n",
+      "Token:  Computer, LogProb: -0.027323013\n",
+      "Token:  Programming, LogProb: -0.09090222\n",
+      "Token: \"., LogProb: -0.2312944\n",
+      "Token:  He, LogProb: -0.9431941\n",
+      "Token:  is, LogProb: -0.52350885\n",
+      "Token:  also, LogProb: -0.8409716\n",
+      "Token:  the, LogProb: -1.2813272\n",
+      "Token:  founder, LogProb: -0.8080497\n",
+      "Token:  of, LogProb: -0.12735468\n",
+      "Token:  the, LogProb: -0.26858208\n",
+      "Token:  startup, LogProb: -1.7183943\n",
+      "Token:  incub, LogProb: -0.71643037\n",
+      "Token: ator, LogProb: -0.00013922676\n",
+      "Token: ,, LogProb: -1.6374074\n",
+      "Token:  Y, LogProb: -1.3464186\n",
+      "Token:  Combin, LogProb: -0.043204635\n",
+      "Token: ator, LogProb: -1.490105e-05\n",
+      "Token: ., LogProb: -0.48073012\n",
+      "Token: <|endoftext|>, LogProb: -0.30235213\n",
+      "\n",
+      "Model Version:\n",
+      "20240215\n",
+      "\n",
+      "Raw Completion:\n",
+      " a well-known computer scientist and entrepreneur. He is the co-founder of PayPal and a co-founder of the Y Combinator startup accelerator. He has also been a professor at the MIT Media Lab. Paul Graham has written several books on computer science and entrepreneurship, including \"Programming the Universe\" and \"The Art of Computer Programming\". He is also the founder of the startup incubator, Y Combinator.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from llama_index.llms.alephalpha import AlephAlpha\n",
+    "\n",
+    "llm = AlephAlpha(model=\"luminous-base-control\", log_probs=0)\n",
+    "\n",
+    "resp = llm.complete(\"Paul Graham is \")\n",
+    "\n",
+    "if resp.logprobs is not None:\n",
+    "    print(\"\\nLog Probabilities:\")\n",
+    "    for lp_list in resp.logprobs:\n",
+    "        for lp in lp_list:\n",
+    "            print(f\"Token: {lp.token}, LogProb: {lp.logprob}\")\n",
+    "\n",
+    "if \"model_version\" in resp.additional_kwargs:\n",
+    "    print(\"\\nModel Version:\")\n",
+    "    print(resp.additional_kwargs[\"model_version\"])\n",
+    "\n",
+    "if \"raw_completion\" in resp.additional_kwargs:\n",
+    "    print(\"\\nRaw Completion:\")\n",
+    "    print(resp.additional_kwargs[\"raw_completion\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5152a2b4-78e6-47a5-933d-f5186ec0f775",
+   "metadata": {},
+   "source": [
+    "## Async"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7df04c0e-43ee-4176-9aad-94781d0ed36d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.alephalpha import AlephAlpha\n",
+    "\n",
+    "llm = AlephAlpha(model=\"luminous-base-control\")\n",
+    "resp = await llm.acomplete(\"Paul Graham is \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be09c52b-604a-4f05-8f93-36e6ea882ff5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " a computer scientist and entrepreneur who is known for his work in the field of artificial intelligence and computer science. He is the co-founder of the company Y Combinator, which is a startup accelerator that helps startups get funding and resources. Paul Graham has also written several books on computer science and entrepreneurship, including \"Programming: Principles and Practice\" and \"The Art of Computer Programming\". He is a well-known figure in the computer science community and has made significant contributions to the field.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(resp)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/.gitignore
new file mode 100644
index 0000000000..990c18de22
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/BUILD
new file mode 100644
index 0000000000..0896ca890d
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/Makefile
new file mode 100644
index 0000000000..b9eab05aa3
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/README.md
new file mode 100644
index 0000000000..4e99df8d7c
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/README.md
@@ -0,0 +1,49 @@
+# LlamaIndex Embeddings Integration: Aleph Alpha
+
+This README provides an overview of integrating Aleph Alpha's semantic embeddings with LlamaIndex. Aleph Alpha's API enables the generation of semantic embeddings from text, which can be used for downstream tasks such as semantic similarity and models like classifiers.
+
+## Features
+
+- **Semantic Embeddings:** Generate embeddings for text prompts using Aleph Alpha models.
+- **Model Selection:** Utilize the latest version of specified models for generating embeddings.
+- **Representation Types:** Choose from `symmetric`, `document`, and `query` embeddings based on your use case.
+- **Compression:** Option to compress embeddings to 128 dimensions for faster comparison.
+- **Normalization:** Retrieve normalized embeddings to optimize cosine similarity calculations.
+
+## Installation
+
+```bash
+pip install llama-index-embeddings-alephalpha
+```
+
+## Usage
+
+```python
+from llama_index.embeddings.alephalpha import AlephAlphaEmbedding
+```
+
+1. **Request Parameters:**
+
+   - `model`: Model name (e.g., `luminous-base`). The latest model version is used.
+   - `representation`: Type of embedding (`symmetric`, `document`, `query`).
+   - `prompt`: Text or multimodal prompt to embed. Supports text strings or an array of multimodal items.
+   - `compress_to_size`: Optional compression to 128 dimensions.
+   - `normalize`: Set to `true` for normalized embeddings.
+
+2. **Advanced Parameters:**
+   - `hosting`: Datacenter processing option (`aleph-alpha` for maximal data privacy).
+   - `contextual_control_threshold`, `control_log_additive`: Control attention parameters for advanced use cases.
+
+## Response Structure
+
+- `model_version`: Model name and version used for inference.
+- `embedding`: List of floats representing the generated embedding.
+- `num_tokens_prompt_total`: Total number of tokens in the input prompt.
+
+## Example
+
+See the [example notebook](../../../docs/examples/embeddings/alephalpha.ipynb) for a detailed walkthrough of using Aleph Alpha embeddings with LlamaIndex.
+
+## API Documentation
+
+For more detailed API documentation and available models, visit [Aleph Alpha's API Docs](https://docs.aleph-alpha.com/api/semantic-embed/).
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/BUILD
new file mode 100644
index 0000000000..db46e8d6c9
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/__init__.py
new file mode 100644
index 0000000000..4c1399a7ca
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.embeddings.alephalpha.base import AlephAlphaEmbedding
+
+__all__ = ["AlephAlphaEmbedding"]
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/base.py
new file mode 100644
index 0000000000..40da8ec5e5
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/base.py
@@ -0,0 +1,224 @@
+from typing import Any, List, Optional, Dict
+
+from aleph_alpha_client import (
+    Client,
+    AsyncClient,
+    Prompt,
+    SemanticEmbeddingRequest,
+    SemanticRepresentation,
+    BatchSemanticEmbeddingRequest,
+    BatchSemanticEmbeddingResponse,
+)
+from llama_index.core.base.embeddings.base import (
+    BaseEmbedding,
+)
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+
+DEFAULT_ALEPHALPHA_MODEL = "luminous-base"
+DEFAULT_ALEPHALPHA_HOST = "https://api.aleph-alpha.com"
+
+VALID_REPRESENTATION_TYPES = [
+    None,
+    SemanticRepresentation.Symmetric,
+    SemanticRepresentation.Query,
+    SemanticRepresentation.Document,
+]
+
+
+class AlephAlphaEmbedding(BaseEmbedding):
+    """AlephAlphaEmbedding uses the Aleph Alpha API to generate embeddings for text."""
+
+    model: str = Field(
+        default=DEFAULT_ALEPHALPHA_MODEL, description="The Aleph Alpha model to use."
+    )
+    token: str = Field(default=None, description="The Aleph Alpha API token.")
+    representation: Optional[str] = Field(
+        default=SemanticRepresentation.Query,
+        description="The representation type to use for generating embeddings.",
+    )
+    compress_to_size: Optional[int] = Field(
+        default=None,
+        description="The size to compress the embeddings to.",
+        gt=0,
+    )
+    base_url: Optional[str] = Field(
+        default=DEFAULT_ALEPHALPHA_HOST, description="The hostname of the API base_url."
+    )
+    timeout: Optional[float] = Field(
+        default=None, description="The timeout to use in seconds.", gte=0
+    )
+    max_retries: int = Field(
+        default=10, description="The maximum number of API retries.", gte=0
+    )
+    normalize: Optional[bool] = Field(
+        default=False, description="Return normalized embeddings."
+    )
+    hosting: Optional[str] = Field(default=None, description="The hosting to use.")
+    nice: bool = Field(default=False, description="Whether to be nice to the API.")
+    verify_ssl: bool = Field(default=True, description="Whether to verify SSL.")
+    additional_kwargs: Dict[str, Any] = Field(
+        default_factory=dict, description="Additional kwargs for the Aleph Alpha API."
+    )
+
+    # Instance variables initialized via Pydantic's mechanism
+    _client: Any = PrivateAttr()
+    _aclient: Any = PrivateAttr()
+
+    def __init__(
+        self,
+        model: str = DEFAULT_ALEPHALPHA_MODEL,
+        token: Optional[str] = None,
+        representation: Optional[str] = None,
+        base_url: Optional[str] = DEFAULT_ALEPHALPHA_HOST,
+        hosting: Optional[str] = None,
+        timeout: Optional[float] = None,
+        max_retries: int = 10,
+        nice: bool = False,
+        verify_ssl: bool = True,
+        additional_kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        A class representation for generating embeddings using the AlephAlpha API.
+
+        Args:
+            token: The token to use for the AlephAlpha API.
+            model: The model to use for generating embeddings.
+            base_url: The base URL of the AlephAlpha API.
+            nice: Whether to use the "nice" mode for the AlephAlpha API.
+            additional_kwargs: Additional kwargs for the AlephAlpha API.
+
+        """
+        additional_kwargs = additional_kwargs or {}
+
+        super().__init__(
+            model=model,
+            representation=representation,
+            base_url=base_url,
+            token=token,
+            nice=nice,
+            additional_kwargs=additional_kwargs,
+        )
+
+        self.token = get_from_param_or_env("aa_token", token, "AA_TOKEN", "")
+
+        if representation is not None and isinstance(representation, str):
+            try:
+                representation_enum = SemanticRepresentation[
+                    representation.capitalize()
+                ]
+            except KeyError:
+                raise ValueError(
+                    f"{representation} is not a valid representation type. Available types are: {list(SemanticRepresentation.__members__.keys())}"
+                )
+            self.representation = representation_enum
+        else:
+            self.representation = representation
+
+        self._client = None
+        self._aclient = None
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "AlephAlphaEmbedding"
+
+    def _get_credential_kwargs(self) -> Dict[str, Any]:
+        return {
+            "token": self.token,
+            "host": self.base_url,
+            "hosting": self.hosting,
+            "request_timeout_seconds": self.timeout,
+            "total_retries": self.max_retries,
+            "nice": self.nice,
+            "verify_ssl": self.verify_ssl,
+        }
+
+    def _get_client(self) -> Client:
+        if self._client is None:
+            self._client = Client(**self._get_credential_kwargs())
+        return self._client
+
+    def _get_aclient(self) -> AsyncClient:
+        if self._aclient is None:
+            self._aclient = AsyncClient(**self._get_credential_kwargs())
+        return self._aclient
+
+    def _get_embedding(self, text: str, representation: str) -> List[float]:
+        """Embed sentence using AlephAlpha."""
+        client = self._get_client()
+        request = SemanticEmbeddingRequest(
+            prompt=Prompt.from_text(text),
+            representation=representation or self.representation,
+            compress_to_size=self.compress_to_size,
+            normalize=self.normalize,
+        )
+        result = client.semantic_embed(request=request, model=self.model)
+        return result.embedding
+
+    async def _aget_embedding(self, text: str, representation: str) -> List[float]:
+        """Get embedding async."""
+        aclient = self._get_aclient()
+        request = SemanticEmbeddingRequest(
+            prompt=Prompt.from_text(text),
+            representation=representation or self.representation,
+            compress_to_size=self.compress_to_size,
+            normalize=self.normalize,
+        )
+        result = await aclient.semantic_embed(request=request, model=self.model)
+        return result.embedding
+
+    def _get_embeddings(
+        self, texts: List[str], representation: str
+    ) -> List[List[float]]:
+        """Embed sentences using AlephAlpha."""
+        client = self._get_client()
+        request = BatchSemanticEmbeddingRequest(
+            prompts=[Prompt.from_text(text) for text in texts],
+            representation=representation or self.representation,
+            compress_to_size=self.compress_to_size,
+            normalize=self.normalize,
+        )
+        result: BatchSemanticEmbeddingResponse = client.batch_semantic_embed(
+            request=request, model=self.model
+        )
+        return result.embeddings
+
+    async def _aget_embeddings(
+        self, texts: List[str], representation: str
+    ) -> List[List[float]]:
+        """Get embeddings async."""
+        aclient = self._get_aclient()
+        request = BatchSemanticEmbeddingRequest(
+            prompts=[Prompt.from_text(text) for text in texts],
+            representation=representation or self.representation,
+            compress_to_size=self.compress_to_size,
+            normalize=self.normalize,
+        )
+        result: BatchSemanticEmbeddingResponse = await aclient.batch_semantic_embed(
+            request=request, model=self.model
+        )
+        return result.embeddings
+
+    def _get_query_embedding(self, query: str) -> List[float]:
+        """Get query embedding. For query embeddings, representation='query'."""
+        return self._get_embedding(query, SemanticRepresentation.Query)
+
+    async def _aget_query_embedding(self, query: str) -> List[float]:
+        """Get query embedding async. For query embeddings, representation='query'."""
+        return self._aget_embedding(query, SemanticRepresentation.Query)
+
+    def _get_text_embedding(self, text: str) -> List[float]:
+        """Get text embedding. For text embeddings, representation='document'."""
+        return self._get_embedding(text, SemanticRepresentation.Document)
+
+    async def _aget_text_embedding(self, text: str) -> List[float]:
+        """Get text embedding async."""
+        return self._aget_embedding(text, SemanticRepresentation.Document)
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get text embeddings."""
+        return self._get_embeddings(texts, SemanticRepresentation.Document)
+
+    async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get text embeddings async."""
+        return self._aget_embeddings(texts, SemanticRepresentation.Document)
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/pyproject.toml
new file mode 100644
index 0000000000..9ee80c068d
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/pyproject.toml
@@ -0,0 +1,63 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = true
+import_path = "llama_index.embeddings.alephalpha"
+
+[tool.llamahub.class_authors]
+AlephAlphaEmbedding = "ulan-yisaev"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["Your Name <you@example.com>"]
+description = "llama-index embeddings alephalpha integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-embeddings-alephalpha"
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.10.1"
+aleph-alpha-client = "^7.0.1"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/BUILD
new file mode 100644
index 0000000000..8cf27481fd
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/BUILD
@@ -0,0 +1,3 @@
+python_tests(
+    dependencies=["llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/llama_index/embeddings/alephalpha/base.py"]
+)
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/test_embeddings_alephalpha.py b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/test_embeddings_alephalpha.py
new file mode 100644
index 0000000000..9504c9b4ae
--- /dev/null
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-alephalpha/tests/test_embeddings_alephalpha.py
@@ -0,0 +1,7 @@
+from llama_index.core.base.embeddings.base import BaseEmbedding
+from llama_index.embeddings.alephalpha import AlephAlphaEmbedding
+
+
+def test_anyscale_class():
+    emb = AlephAlphaEmbedding(token="fake_token", model="luminous-base")
+    assert isinstance(emb, BaseEmbedding)
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/.gitignore b/llama-index-integrations/llms/llama-index-llms-alephalpha/.gitignore
new file mode 100644
index 0000000000..990c18de22
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/BUILD b/llama-index-integrations/llms/llama-index-llms-alephalpha/BUILD
new file mode 100644
index 0000000000..0896ca890d
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/Makefile b/llama-index-integrations/llms/llama-index-llms-alephalpha/Makefile
new file mode 100644
index 0000000000..b9eab05aa3
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/README.md b/llama-index-integrations/llms/llama-index-llms-alephalpha/README.md
new file mode 100644
index 0000000000..b6f6f83d27
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/README.md
@@ -0,0 +1,61 @@
+# LlamaIndex LLM Integration: Aleph Alpha
+
+This README details the process of integrating Aleph Alpha's Large Language Models (LLMs) with LlamaIndex. Utilizing Aleph Alpha's API, users can generate completions, facilitate question-answering, and perform a variety of other natural language processing tasks directly within the LlamaIndex framework.
+
+## Features
+
+- **Text Completion:** Use Aleph Alpha LLMs to generate text completions for prompts.
+- **Model Selection:** Access the latest Aleph Alpha models, including the Luminous model family, to generate responses.
+- **Advanced Sampling Controls:** Customize the response generation with parameters like temperature, top_k, top_p, presence_penalty, and more, to fine-tune the creativity and relevance of the generated text.
+- **Control Parameters:** Apply attention control parameters for advanced use cases, affecting how the model focuses on different parts of the input.
+
+## Installation
+
+```bash
+pip install llama-index-llms-alephalpha
+```
+
+## Usage
+
+```python
+from llama_index.llms.alephalpha import AlephAlpha
+```
+
+1. **Request Parameters:**
+
+   - `model`: Specify the model name (e.g., `luminous-base-control`). The latest model version is always used.
+   - `prompt`: The text prompt for the model to complete.
+   - `maximum_tokens`: The maximum number of tokens to generate.
+   - `temperature`: Adjusts the randomness of the completions.
+   - `top_k`: Limits the sampled tokens to the top k probabilities.
+   - `top_p`: Limits the sampled tokens to the cumulative probability of the top tokens.
+   - `log_probs`: Set to `true` to return the log probabilities of the tokens.
+   - `echo`: Set to `true` to return the input prompt along with the completion.
+   - `penalty_exceptions`: A list of tokens that should not be penalized.
+   - `n`: Number of completions to generate.
+
+2. **Advanced Sampling Parameters:** (Optional)
+
+   - `presence_penalty` & `frequency_penalty`: Adjust to discourage repetition.
+   - `sequence_penalty`: Reduces likelihood of repeating token sequences.
+   - `hosting`: Option to process the request in Aleph Alpha's own datacenters for enhanced data privacy.
+
+## Response Structure
+
+    * `model_version`: The name and version of the model used.
+    * `completions`: A list containing the generated text completion(s) and optional metadata:
+        * `completion`: The generated text completion.
+        * `log_probs`: Log probabilities of the tokens in the completion.
+        * `raw_completion`: The raw completion without any post-processing.
+        * `completion_tokens`: Completion split into tokens.
+        * `finish_reason`: Reason for completion termination.
+    * `num_tokens_prompt_total`: Total number of tokens in the input prompt.
+    * `num_tokens_generated`: Number of tokens generated in the completion.
+
+## Example
+
+Refer to the [example notebook](../../../docs/examples/llm/alephalpha.ipynb) for a comprehensive guide on generating text completions with Aleph Alpha models in LlamaIndex.
+
+## API Documentation
+
+For further details on the API and available models, please consult [Aleph Alpha's API Documentation](https://docs.aleph-alpha.com/api/complete/).
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/BUILD b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/BUILD
new file mode 100644
index 0000000000..db46e8d6c9
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/__init__.py b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/__init__.py
new file mode 100644
index 0000000000..9ca49d33e4
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.llms.alephalpha.base import AlephAlpha
+
+__all__ = ["AlephAlpha"]
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/base.py b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/base.py
new file mode 100644
index 0000000000..652cc588c4
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/base.py
@@ -0,0 +1,292 @@
+from typing import Any, Dict, Optional, Sequence, List
+
+from aleph_alpha_client import Prompt, CompletionRequest, Client, AsyncClient
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    CompletionResponse,
+    CompletionResponseGen,
+    LLMMetadata,
+    ChatResponseGen,
+    CompletionResponseAsyncGen,
+)
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.constants import DEFAULT_TEMPERATURE
+from llama_index.core.llms.callbacks import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.core.llms.llm import LLM
+from llama_index.core.utils import Tokenizer
+
+from llama_index.llms.alephalpha.utils import (
+    alephalpha_modelname_to_contextsize,
+    process_response,
+)
+
+DEFAULT_ALEPHALPHA_MODEL = "luminous-supreme-control"
+DEFAULT_ALEPHALPHA_MAX_TOKENS = 128
+DEFAULT_ALEPHALPHA_HOST = "https://api.aleph-alpha.com"
+
+
+class AlephAlpha(LLM):
+    """Aleph Alpha LLMs."""
+
+    model: str = Field(
+        default=DEFAULT_ALEPHALPHA_MODEL, description="The Aleph Alpha model to use."
+    )
+    token: str = Field(default=None, description="The Aleph Alpha API token.")
+    temperature: float = Field(
+        default=DEFAULT_TEMPERATURE,
+        description="The temperature to use for sampling.",
+        gte=0.0,
+        lte=1.0,
+    )
+    max_tokens: int = Field(
+        default=DEFAULT_ALEPHALPHA_MAX_TOKENS,
+        description="The maximum number of tokens to generate.",
+        gt=0,
+    )
+    base_url: Optional[str] = Field(
+        default=DEFAULT_ALEPHALPHA_HOST, description="The hostname of the API base_url."
+    )
+    timeout: Optional[float] = Field(
+        default=None, description="The timeout to use in seconds.", gte=0
+    )
+    max_retries: int = Field(
+        default=10, description="The maximum number of API retries.", gte=0
+    )
+    hosting: Optional[str] = Field(default=None, description="The hosting to use.")
+    nice: bool = Field(default=False, description="Whether to be nice to the API.")
+    verify_ssl: bool = Field(default=True, description="Whether to verify SSL.")
+    additional_kwargs: Dict[str, Any] = Field(
+        default_factory=dict, description="Additional kwargs for the Aleph Alpha API."
+    )
+    repetition_penalties_include_prompt = Field(
+        default=True,
+        description="Whether presence penalty or frequency penalty are updated from the prompt",
+    )
+    repetition_penalties_include_completion = Field(
+        default=True,
+        description="Whether presence penalty or frequency penalty are updated from the completion.",
+    )
+    sequence_penalty = Field(
+        default=0.7,
+        description="The sequence penalty to use. Increasing the sequence penalty reduces the likelihood of reproducing token sequences that already appear in the prompt",
+        gte=0.0,
+        lte=1.0,
+    )
+    sequence_penalty_min_length = Field(
+        default=3,
+        description="Minimal number of tokens to be considered as sequence. Must be greater or equal 2.",
+        gte=2,
+    )
+    stop_sequences = Field(default=["\n\n"], description="The stop sequences to use.")
+    log_probs: Optional[int] = Field(
+        default=None,
+        description="Number of top log probabilities to return for each token generated.",
+        ge=0,
+    )
+    top_p: Optional[float] = Field(
+        default=None,
+        description="Nucleus sampling parameter controlling the cumulative probability threshold.",
+        ge=0.0,
+        le=1.0,
+    )
+    echo: Optional[bool] = Field(
+        default=False, description="Echo the prompt in the completion."
+    )
+    penalty_exceptions: Optional[List[str]] = Field(
+        default=None,
+        description="List of strings that may be generated without penalty, regardless of other penalty settings.",
+    )
+    n: Optional[int] = Field(
+        default=1,
+        description="The number of completions to return. Useful for generating multiple alternatives.",
+    )
+
+    _client: Optional[Client] = PrivateAttr()
+    _aclient: Optional[AsyncClient] = PrivateAttr()
+
+    def __init__(
+        self,
+        model: str = DEFAULT_ALEPHALPHA_MODEL,
+        temperature: float = DEFAULT_TEMPERATURE,
+        max_tokens: int = DEFAULT_ALEPHALPHA_MAX_TOKENS,
+        base_url: Optional[str] = DEFAULT_ALEPHALPHA_HOST,
+        timeout: Optional[float] = None,
+        max_retries: int = 10,
+        token: Optional[str] = None,
+        hosting: Optional[str] = None,
+        nice: bool = False,
+        verify_ssl: bool = True,
+        log_probs: Optional[int] = None,
+        top_p: Optional[float] = None,
+        echo: Optional[bool] = False,
+        penalty_exceptions: Optional[List[str]] = None,
+        n: Optional[int] = 1,
+        additional_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        additional_kwargs = additional_kwargs or {}
+
+        super().__init__(
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            additional_kwargs=additional_kwargs,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            hosting=hosting,
+            nice=nice,
+            verify_ssl=verify_ssl,
+        )
+
+        self.token = get_from_param_or_env("aa_token", token, "AA_TOKEN", "")
+
+        self.log_probs = log_probs
+        self.top_p = top_p
+        self.echo = echo
+        self.penalty_exceptions = penalty_exceptions
+        self.n = n
+
+        self._client = None
+        self._aclient = None
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "AlephAlpha"
+
+    @property
+    def metadata(self) -> LLMMetadata:
+        return LLMMetadata(
+            context_window=alephalpha_modelname_to_contextsize(self.model),
+            num_output=self.max_tokens,
+            is_chat_model=False,  # The Aleph Alpha API does not support chat yet
+            model_name=self.model,
+        )
+
+    @property
+    def tokenizer(self) -> Tokenizer:
+        client = self._get_client()
+        return client.tokenizer(model=self.model)
+
+    @property
+    def _model_kwargs(self) -> Dict[str, Any]:
+        base_kwargs = {
+            "model": self.model,
+            "temperature": self.temperature,
+            "maximum_tokens": self.max_tokens,
+        }
+        return {
+            **base_kwargs,
+            **self.additional_kwargs,
+        }
+
+    @property
+    def _completion_kwargs(self) -> Dict[str, Any]:
+        completion_kwargs = {
+            "maximum_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "log_probs": self.log_probs,
+            "top_p": self.top_p,
+            "echo": self.echo,
+            "penalty_exceptions": self.penalty_exceptions,
+            "n": self.n,
+            "repetition_penalties_include_prompt": self.repetition_penalties_include_prompt,
+            "repetition_penalties_include_completion": self.repetition_penalties_include_completion,
+            "sequence_penalty": self.sequence_penalty,
+            "sequence_penalty_min_length": self.sequence_penalty_min_length,
+            "stop_sequences": self.stop_sequences,
+        }
+
+        return {k: v for k, v in completion_kwargs.items() if v is not None}
+
+    def _get_all_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
+        return {
+            **self._model_kwargs,
+            **kwargs,
+        }
+
+    def _get_credential_kwargs(self) -> Dict[str, Any]:
+        return {
+            "token": self.token,
+            "host": self.base_url,
+            "hosting": self.hosting,
+            "request_timeout_seconds": self.timeout,
+            "total_retries": self.max_retries,
+            "nice": self.nice,
+            "verify_ssl": self.verify_ssl,
+        }
+
+    def _get_client(self) -> Client:
+        if self._client is None:
+            self._client = Client(**self._get_credential_kwargs())
+        return self._client
+
+    def _get_aclient(self) -> AsyncClient:
+        if self._aclient is None:
+            self._aclient = AsyncClient(**self._get_credential_kwargs())
+        return self._aclient
+
+    @llm_chat_callback()
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        raise NotImplementedError("Aleph Alpha does not currently support chat.")
+
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        client = self._get_client()
+        all_kwargs = {
+            "prompt": Prompt.from_text(prompt),
+            **self._completion_kwargs,
+            **kwargs,
+        }
+        request = CompletionRequest(**all_kwargs)
+        response = client.complete(request=request, model=self.model)
+        completion = response.completions[0].completion if response.completions else ""
+        return process_response(response, completion)
+
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        client = self._get_aclient()
+        all_kwargs = {
+            "prompt": Prompt.from_text(prompt),
+            **self._completion_kwargs,
+            **kwargs,
+        }
+        request = CompletionRequest(**all_kwargs)
+        async with client as aclient:
+            response = await aclient.complete(request=request, model=self.model)
+            completion = (
+                response.completions[0].completion if response.completions else ""
+            )
+            return process_response(response, completion)
+
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        raise NotImplementedError("Aleph Alpha does not currently support streaming.")
+
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        raise NotImplementedError("Aleph Alpha does not currently support chat.")
+
+    def achat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        raise NotImplementedError("Aleph Alpha does not currently support chat.")
+
+    def astream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponse:
+        raise NotImplementedError("Aleph Alpha does not currently support chat.")
+
+    def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        raise NotImplementedError("Aleph Alpha does not currently support streaming.")
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/utils.py b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/utils.py
new file mode 100644
index 0000000000..fca085aa20
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/llama_index/llms/alephalpha/utils.py
@@ -0,0 +1,79 @@
+from typing import Dict, Any
+
+from llama_index.core.base.llms.types import LogProb, CompletionResponse
+
+HUMAN_PREFIX = "\n\nHuman:"
+ASSISTANT_PREFIX = "\n\nAssistant:"
+
+LUMINOUS_MODELS: Dict[str, int] = {
+    "luminous-base": 2048,
+    "luminous-extended": 2048,
+    "luminous-supreme": 2048,
+    "luminous-base-control": 2048,
+    "luminous-extended-control": 2048,
+    "luminous-supreme-control": 2048,
+}
+
+
+def alephalpha_modelname_to_contextsize(modelname: str) -> int:
+    """
+    Converts an Aleph Alpha model name to the corresponding context size.
+
+    :param modelname: The name of the Aleph Alpha model.
+    :return: The context size for the model.
+    """
+    if modelname not in LUMINOUS_MODELS:
+        raise ValueError(
+            f"Unknown model: {modelname}. Please provide a valid AlephAlpha model name."
+            "Known models are: " + ", ".join(LUMINOUS_MODELS.keys())
+        )
+
+    return LUMINOUS_MODELS[modelname]
+
+
+def extract_additional_info_from_response(response) -> Dict[str, Any]:
+    """
+    Extracts additional information from the Aleph Alpha completion response.
+
+    :param response: The response object from Aleph Alpha API.
+    :return: A dictionary with extracted information.
+    """
+    completion = response.completions[0] if response.completions else {}
+
+    additional_info = {
+        "model_version": getattr(response, "model_version", None),
+        "log_probs": getattr(completion, "log_probs", None),
+        "raw_completion": getattr(completion, "raw_completion", None),
+        "finish_reason": getattr(completion, "finish_reason", None),
+    }
+
+    return {k: v for k, v in additional_info.items() if v is not None}
+
+
+def process_response(response: Any, completion: str) -> CompletionResponse:
+    """
+    Processes the response from Aleph Alpha API.
+
+    :param response: The response object from Aleph Alpha API.
+    :param completion: The completion text.
+
+    :return: A CompletionResponse object.
+    """
+    log_probs_formatted = []
+
+    if response.completions and hasattr(response.completions[0], "log_probs"):
+        log_probs_extracted = response.completions[0].log_probs or []
+
+        for lp_dict in log_probs_extracted:
+            if isinstance(lp_dict, dict):
+                for token, log_prob in lp_dict.items():
+                    log_probs_formatted.append(LogProb(token=token, logprob=log_prob))
+
+    additional_info = extract_additional_info_from_response(response)
+
+    return CompletionResponse(
+        text=completion,
+        raw=response.to_json(),
+        logprobs=[log_probs_formatted],
+        additional_kwargs=additional_info,
+    )
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-alephalpha/pyproject.toml
new file mode 100644
index 0000000000..d2209d1904
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/pyproject.toml
@@ -0,0 +1,66 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+# Feel free to un-skip examples, and experimental, you will just need to
+# work through many typos (--write-changes and --interactive will help)
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = true
+import_path = "llama_index.llms.alephalpha"
+
+[tool.llamahub.class_authors]
+AlephAlpha = "ulan-yisaev"
+
+[tool.mypy]
+disallow_untyped_defs = true
+# Remove venv skip when integrated with pre-commit
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["Ulan Yisaev <Ulan.Yisaev@nortal.com>"]
+description = "llama-index llms Aleph Alpha integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-llms-alephalpha"
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.10.1"
+aleph-alpha-client = "^7.0.1"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/BUILD
new file mode 100644
index 0000000000..dabf212d7e
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/test_llms_alephalpha.py b/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/test_llms_alephalpha.py
new file mode 100644
index 0000000000..eb29ae0f4e
--- /dev/null
+++ b/llama-index-integrations/llms/llama-index-llms-alephalpha/tests/test_llms_alephalpha.py
@@ -0,0 +1,65 @@
+from unittest.mock import MagicMock, patch
+
+from llama_index.llms.alephalpha.base import AlephAlpha
+from llama_index.llms.alephalpha.utils import extract_additional_info_from_response
+
+
+def test_alephalpha_instantiation():
+    model = "luminous-base"
+    token = "test_token"
+    aleph_alpha_instance = AlephAlpha(model=model, token=token)
+
+    assert aleph_alpha_instance.model == model
+    assert aleph_alpha_instance.token == token
+    assert (
+        aleph_alpha_instance.temperature == AlephAlpha.__fields__["temperature"].default
+    )
+
+
+def test_complete_method():
+    mock_completion = MagicMock()
+    mock_completion.completion = "Test completion"
+
+    mock_response_json = {
+        "completion": "Test completion",
+    }
+
+    mock_response = MagicMock()
+    mock_response.completions = [mock_completion]
+    mock_response.to_json.return_value = mock_response_json
+
+    with patch(
+        "llama_index.llms.alephalpha.base.AlephAlpha._get_client"
+    ) as mock_get_client:
+        mock_client = MagicMock()
+        mock_client.complete.return_value = mock_response
+        mock_get_client.return_value = mock_client
+
+        aleph_alpha_instance = AlephAlpha()
+        response = aleph_alpha_instance.complete("Test prompt")
+
+        assert response.text == "Test completion"
+        assert response.raw == mock_response_json
+
+
+def test_extract_additional_info_from_response():
+    mock_completion = {
+        "log_probs": [{"token": "test", "log_prob": -0.5}],
+        "raw_completion": "Raw test completion",
+        "finish_reason": "length",
+    }
+
+    mock_response = MagicMock()
+    mock_response.model_version = "luminous-base-control"
+    mock_response.completions = [MagicMock(**mock_completion)]
+    mock_response.json.return_value = {
+        "model_version": "luminous-base-control",
+        "completions": [mock_completion],
+    }
+
+    extracted_info = extract_additional_info_from_response(mock_response)
+
+    assert extracted_info["model_version"] == "luminous-base-control"
+    assert extracted_info["log_probs"] == [{"token": "test", "log_prob": -0.5}]
+    assert extracted_info["raw_completion"] == "Raw test completion"
+    assert extracted_info["finish_reason"] == "length"
-- 
GitLab