Feat/lmstudio integration (#13557)

bacded36 · Shivam Kalra · GitHub · 92d31778 · bacded36 · bacded36
Unverified Commit bacded36 authored 10 months ago by Shivam Kalra Committed by GitHub 10 months ago
--- a/docs/docs/examples/llm/lmstudio.ipynb
+++ b/docs/docs/examples/llm/lmstudio.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/llm/lmstudio.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LM Studio"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1. Download and Install LM Studio\n",
+    "2. Follow the steps mentioned in the [README](https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/llms/llama-index-llms-lmstudio/README.md)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If not already installed in collab, install *llama-index* and *lmstudio* integration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-index-core llama-index llama-index-llms-lmstudio"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fix for \"RuntimeError: This event loop is already running\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.lmstudio import LMStudio\n",
+    "from llama_index.core.base.llms.types import ChatMessage, MessageRole"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = LMStudio(\n",
+    "    model_name=\"Hermes-2-Pro-Llama-3-8B\",\n",
+    "    base_url=\"http://localhost:1234/v1\",\n",
+    "    temperature=0.7,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The result of 2 + 2 is 4.\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = llm.complete(\"Hey there, what is 2+2?\")\n",
+    "print(str(response))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The result of 7 + 3 is 10."
+     ]
+    }
+   ],
+   "source": [
+    "# use llm.stream_complete\n",
+    "response = llm.stream_complete(\"What is 7+3?\")\n",
+    "for r in response:\n",
+    "    print(r.delta, end=\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "messages = [\n",
+    "    ChatMessage(\n",
+    "        role=MessageRole.SYSTEM,\n",
+    "        content=\"You an expert AI assistant. Help User with their queries.\",\n",
+    "    ),\n",
+    "    ChatMessage(\n",
+    "        role=MessageRole.USER,\n",
+    "        content=\"What is the significance of the number 42?\",\n",
+    "    ),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "assistant: The number 42 has been significant in various contexts throughout history and across different cultures, often holding symbolic or philosophical meanings. \n",
+      "\n",
+      "1. In mathematics: 42 is a relatively simple but still interesting whole number with no factors other than 1 and itself.\n",
+      "\n",
+      "2. In popular culture: Douglas Adams' science fiction series \"The Hitchhiker's Guide to the Galaxy\" presents the ultimate answer to the meaning of life as 42, which has become a well-known joke and meme since its introduction in the first book published in 1979.\n",
+      "\n",
+      "3. In religion and mythology: The number 42 appears in various religious texts or myths with different meanings, such as the Biblical Book of Numbers where Moses spent 42 years tending to his father-in-law's flock before receiving the call from God, or in Norse mythology when Odin spent 42 nights suspended on Yggdrasil (the World Tree) to gain knowledge.\n",
+      "\n",
+      "4. In sports: In baseball, a perfect game is considered to be an immaculate game with no hits, errors, or runners allowed to reach base; only 15 players can achieve this in Major League Baseball history, and the number of their names added together equals 42 (6 + 2 = 8, 3 + 4 = 7).\n",
+      "\n",
+      "5. In music: The English rock band Coldplay's popular song \"42\" is about lead singer Chris Martin reflecting on his age during the time it took for the band to gain success.\n",
+      "\n",
+      "The significance of the number 42 varies depending on the context and cultural background. It has often been used symbolically or metaphorically, making it a versatile and intriguing number in various aspects of human life.\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = llm.chat(messages=messages)\n",
+    "print(str(response))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The number 42 has various significances in different contexts:\n",
+      "\n",
+      "1. In popular culture: The famous \"Answer to the Ultimate Question of Life, the Universe and Everything\" from Douglas Adams' science fiction series \"The Hitchhiker's Guide to the Galaxy\" is 42. This has led to widespread recognition of the number as something meaningful or profound.\n",
+      "\n",
+      "2. Mathematics: The number 42 is a highly composite number with many divisors (1, 2, 3, 6, 7, 14, 21, and 42). In mathematics, the study of factors and divisors plays an essential role in various concepts such as prime factorization and greatest common denominators.\n",
+      "\n",
+      "3. Christianity: According to a story from The Book of Kells (an illuminated manuscript), it is said that St. Patrick used the number 42 to calculate when to begin his mission to convert Ireland to Christianity.\n",
+      "\n",
+      "4. Astrology: In astrology, the 42nd day after the Winter Solstice marks the beginning of the new astrological year and the start of a 13-month cycle in some traditions.\n",
+      "\n",
+      "5. Literature: The number 42 is mentioned several times throughout William Shakespeare's plays, such as \"Hamlet\" and \"Henry IV.\" It appears as a coincidence or possibly with symbolic intent in these works.\n",
+      "\n",
+      "6. In the field of computer science, the popular programming language 'Python' uses 42 as its \"magic number\" to represent the start-up code for the interpreter.\n",
+      "\n",
+      "Each context assigns a different significance to the number 42, making it multi-faceted and culturally relevant in various ways."
+     ]
+    }
+   ],
+   "source": [
+    "response = llm.stream_chat(messages=messages)\n",
+    "for r in response:\n",
+    "    print(r.delta, end=\"\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llama_index",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:markdown id: tags:
+<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/llm/lmstudio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
+%% Cell type:markdown id: tags:
+# LM Studio
+%% Cell type:markdown id: tags:
+## Setup
+%% Cell type:markdown id: tags:
+1. Download and Install LM Studio
+2. Follow the steps mentioned in the [README](https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/llms/llama-index-llms-lmstudio/README.md).
+%% Cell type:markdown id: tags:
+If not already installed in collab, install *llama-index* and *lmstudio* integration.
+%% Cell type:code id: tags:
+``` python
+%pip install llama-index-core llama-index llama-index-llms-lmstudio
+```
+%% Cell type:markdown id: tags:
+Fix for "RuntimeError: This event loop is already running"
+%% Cell type:code id: tags:
+``` python
+import nest_asyncio
+nest_asyncio.apply()
+```
+%% Cell type:code id: tags:
+``` python
+from llama_index.llms.lmstudio import LMStudio
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+```
+%% Cell type:code id: tags:
+``` python
+llm = LMStudio(
+    model_name="Hermes-2-Pro-Llama-3-8B",
+    base_url="http://localhost:1234/v1",
+    temperature=0.7,
+)
+```
+%% Cell type:code id: tags:
+``` python
+response = llm.complete("Hey there, what is 2+2?")
+print(str(response))
+```
+%% Output
+    The result of 2 + 2 is 4.
+%% Cell type:code id: tags:
+``` python
+# use llm.stream_complete
+response = llm.stream_complete("What is 7+3?")
+for r in response:
+    print(r.delta, end="")
+```
+%% Output
+    The result of 7 + 3 is 10.
+%% Cell type:code id: tags:
+``` python
+messages = [
+    ChatMessage(
+        role=MessageRole.SYSTEM,
+        content="You an expert AI assistant. Help User with their queries.",
+    ),
+    ChatMessage(
+        role=MessageRole.USER,
+        content="What is the significance of the number 42?",
+    ),
+]
+```
+%% Cell type:code id: tags:
+``` python
+response = llm.chat(messages=messages)
+print(str(response))
+```
+%% Output
+    assistant: The number 42 has been significant in various contexts throughout history and across different cultures, often holding symbolic or philosophical meanings.
+    1. In mathematics: 42 is a relatively simple but still interesting whole number with no factors other than 1 and itself.
+    2. In popular culture: Douglas Adams' science fiction series "The Hitchhiker's Guide to the Galaxy" presents the ultimate answer to the meaning of life as 42, which has become a well-known joke and meme since its introduction in the first book published in 1979.
+    3. In religion and mythology: The number 42 appears in various religious texts or myths with different meanings, such as the Biblical Book of Numbers where Moses spent 42 years tending to his father-in-law's flock before receiving the call from God, or in Norse mythology when Odin spent 42 nights suspended on Yggdrasil (the World Tree) to gain knowledge.
+    4. In sports: In baseball, a perfect game is considered to be an immaculate game with no hits, errors, or runners allowed to reach base; only 15 players can achieve this in Major League Baseball history, and the number of their names added together equals 42 (6 + 2 = 8, 3 + 4 = 7).
+    5. In music: The English rock band Coldplay's popular song "42" is about lead singer Chris Martin reflecting on his age during the time it took for the band to gain success.
+    The significance of the number 42 varies depending on the context and cultural background. It has often been used symbolically or metaphorically, making it a versatile and intriguing number in various aspects of human life.
+%% Cell type:code id: tags:
+``` python
+response = llm.stream_chat(messages=messages)
+for r in response:
+    print(r.delta, end="")
+```
+%% Output
+    The number 42 has various significances in different contexts:
+    1. In popular culture: The famous "Answer to the Ultimate Question of Life, the Universe and Everything" from Douglas Adams' science fiction series "The Hitchhiker's Guide to the Galaxy" is 42. This has led to widespread recognition of the number as something meaningful or profound.
+    2. Mathematics: The number 42 is a highly composite number with many divisors (1, 2, 3, 6, 7, 14, 21, and 42). In mathematics, the study of factors and divisors plays an essential role in various concepts such as prime factorization and greatest common denominators.
+    3. Christianity: According to a story from The Book of Kells (an illuminated manuscript), it is said that St. Patrick used the number 42 to calculate when to begin his mission to convert Ireland to Christianity.
+    4. Astrology: In astrology, the 42nd day after the Winter Solstice marks the beginning of the new astrological year and the start of a 13-month cycle in some traditions.
+    5. Literature: The number 42 is mentioned several times throughout William Shakespeare's plays, such as "Hamlet" and "Henry IV." It appears as a coincidence or possibly with symbolic intent in these works.
+    6. In the field of computer science, the popular programming language 'Python' uses 42 as its "magic number" to represent the start-up code for the interpreter.
+    Each context assigns a different significance to the number 42, making it multi-faceted and culturally relevant in various ways.
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/.gitignore
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/.gitignore
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Jetbrains
+.idea
+modules/
+*.swp
+# VsCode
+.vscode
+# pipenv
+Pipfile
+Pipfile.lock
+# pyright
+pyrightconfig.json
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/BUILD
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/BUILD
+poetry_requirements(
+    name="poetry",
+)
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/Makefile
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/Makefile
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+test:	## Run tests via pytest.
+	pytest tests
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/README.md
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/README.md
+# LlamaIndex Llms Integration: Lmstudio
+```bash
+pip install llama-index-llms-lmstudio
+```
+## Usage Steps
+1. Open LM Studio App and go to the Local Server Tab
+2. In the Configuration settings, enable Apply Prompt Formatting
+3. Load the model of your choice
+4. Start your server
+```python
+from llama_index.llms.lmstudio import LMStudio
+llm = LMStudio(
+    model_name="Hermes-2-Pro-Llama-3-8B",
+    base_url="http://localhost:1234/v1",
+    temperature=0.7,
+)
+messages = [
+    ChatMessage(
+        role=MessageRole.SYSTEM,
+        content="You an expert AI assistant. Help User with their queries.",
+    ),
+    ChatMessage(
+        role=MessageRole.USER,
+        content="What is the significance of the number 42?",
+    ),
+]
+response = llm.chat(messages=messages)
+print(str(response))
+```
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/llama_index/llms/lmstudio/BUILD
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/llama_index/llms/lmstudio/BUILD
+python_sources()
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/llama_index/llms/lmstudio/__init__.py
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/llama_index/llms/lmstudio/__init__.py
+from llama_index.llms.lmstudio.base import LMStudio
+__all__ = ["LMStudio"]
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/llama_index/llms/lmstudio/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/llama_index/llms/lmstudio/base.py
+import logging
+import json
+import httpx
+from httpx import Timeout
+from typing import Any, Dict, Sequence, Tuple
+from llama_index.core.bridge.pydantic import Field
+from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
+from llama_index.core.llms.custom import CustomLLM
+from llama_index.core.base.llms.generic_utils import (
+    stream_chat_to_completion_decorator,
+    chat_to_completion_decorator,
+    achat_to_completion_decorator,
+    astream_chat_to_completion_decorator,
+)
+from llama_index.core.constants import (
+    DEFAULT_TEMPERATURE,
+    DEFAULT_CONTEXT_WINDOW,
+    DEFAULT_NUM_OUTPUTS,
+)
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
+def get_additional_kwargs(
+    response: Dict[str, Any], exclude: Tuple[str, ...]
+) -> Dict[str, Any]:
+    return {k: v for k, v in response.items() if k not in exclude}
+logger = logging.getLogger(__name__)
+DEFAULT_REQUEST_TIMEOUT = 30.0
+class LMStudio(CustomLLM):
+    base_url: str = Field(
+        default="http://localhost:1234/v1",
+        description="Base url the model is hosted under.",
+    )
+    context_window: int = Field(
+        default=DEFAULT_CONTEXT_WINDOW,
+        description="The maximum number of context tokens for the model.",
+        gt=0,
+    )
+    model_name: str = Field(description="The model to use.")
+    request_timeout: float = Field(
+        default=DEFAULT_REQUEST_TIMEOUT,
+        description="The timeout for making http request in seconds to LM Studio API server.",
+    )
+    num_output: int = Field(
+        default=DEFAULT_NUM_OUTPUTS,
+        description=LLMMetadata.__fields__["num_output"].field_info.description,
+    )
+    is_chat_model: bool = Field(
+        default=True,
+        description=(
+            "LM Studio API supports chat."
+            + LLMMetadata.__fields__["is_chat_model"].field_info.description
+        ),
+    )
+    temperature: float = Field(
+        default=DEFAULT_TEMPERATURE,
+        description=("The temperature to use for sampling."),
+        gte=0.0,
+        lte=1.0,
+    )
+    timeout: float = Field(
+        default=120, description=("The timeout to use in seconds."), gte=0
+    )
+    additional_kwargs: Dict[str, Any] = Field(
+        default_factory=dict, description=("Additional kwargs to pass to the model.")
+    )
+    def _create_payload_from_messages(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> Dict[str, Any]:
+        return {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": message.role.value,
+                    "content": message.content,
+                    **(
+                        message.additional_kwargs
+                        if message.additional_kwargs is not None
+                        else {}
+                    ),
+                }
+                for message in messages
+            ],
+            "options": self._model_kwargs,
+            "stream": False,
+            **kwargs,
+        }
+    def _create_chat_response_from_http_response(
+        self, response: httpx.Response
+    ) -> ChatResponse:
+        raw = response.json()
+        message = raw["choices"][0]["message"]
+        return ChatResponse(
+            message=ChatMessage(
+                content=message.get("content"),
+                role=MessageRole(message.get("role")),
+                additional_kwargs=get_additional_kwargs(message, ("content", "role")),
+            ),
+            raw=raw,
+            additional_kwargs=get_additional_kwargs(raw, ("choices",)),
+        )
+    @llm_chat_callback()
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        payload = self._create_payload_from_messages(messages, **kwargs)
+        with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
+            response = client.post(
+                url=f"{self.base_url}/chat/completions",
+                json=payload,
+            )
+            response.raise_for_status()
+            return self._create_chat_response_from_http_response(response)
+    @llm_chat_callback()
+    async def achat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponse:
+        payload = self._create_payload_from_messages(messages, **kwargs)
+        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
+            response = await client.post(
+                url=f"{self.base_url}/chat/completions",
+                json=payload,
+            )
+            response.raise_for_status()
+            return self._create_chat_response_from_http_response(response)
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        complete_fn = chat_to_completion_decorator(self.chat)
+        return complete_fn(prompt, **kwargs)
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        acomplete_fn = achat_to_completion_decorator(self.achat)
+        return await acomplete_fn(prompt, **kwargs)
+    @llm_chat_callback()
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        payload = self._create_payload_from_messages(messages, stream=True, **kwargs)
+        with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
+            with client.stream(
+                method="POST",
+                url=f"{self.base_url}/chat/completions",
+                json=payload,
+            ) as response:
+                response.raise_for_status()
+                text = ""
+                for line in response.iter_lines():
+                    if line:
+                        line = line.strip()
+                        if isinstance(line, bytes):
+                            line = line.decode("utf-8")
+                        if line.startswith("data: [DONE]"):
+                            break
+                        # Slice the line to remove the "data: " prefix
+                        chunk = json.loads(line[5:])
+                        delta = chunk["choices"][0].get("delta")
+                        role = delta.get("role") or MessageRole.ASSISTANT
+                        content_delta = delta.get("content") or ""
+                        text += content_delta
+                        yield ChatResponse(
+                            message=ChatMessage(
+                                content=text,
+                                role=MessageRole(role),
+                                additional_kwargs=get_additional_kwargs(
+                                    chunk, ("choices",)
+                                ),
+                            ),
+                            delta=content_delta,
+                            raw=chunk,
+                            additional_kwargs=get_additional_kwargs(
+                                chunk, ("choices",)
+                            ),
+                        )
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        stream_complete_fn = stream_chat_to_completion_decorator(self.stream_chat)
+        return stream_complete_fn(prompt, **kwargs)
+    @llm_completion_callback()
+    async def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        astream_complete_fn = astream_chat_to_completion_decorator(self.astream_chat)
+        return await astream_complete_fn(prompt, **kwargs)
+    @property
+    def metadata(self) -> LLMMetadata:
+        """LLM metadata."""
+        return LLMMetadata(
+            context_window=self.context_window,
+            num_output=self.num_output,
+            model_name=self.model_name,
+            is_chat_model=self.is_chat_model,
+        )
+    @property
+    def _model_kwargs(self) -> Dict[str, Any]:
+        base_kwargs = {
+            "temperature": self.temperature,
+            "num_ctx": self.context_window,
+        }
+        return {
+            **base_kwargs,
+            **self.additional_kwargs,
+        }
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/pyproject.toml
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.llms.lmstudio"
+[tool.llamahub.class_authors]
+LMStudio = "shivamklr"
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+[tool.poetry]
+authors = ["Shivam Kalra <dev@kaltq.com>"]
+description = "llama-index llms lmstudio integration"
+license = "MIT"
+name = "llama-index-llms-lmstudio"
+packages = [{include = "llama_index/"}]
+readme = "README.md"
+version = "0.1.0"
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+llama-index-core = "^0.10.0"
+[tool.poetry.group.dev.dependencies]
+black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
+codespell = {extras = ["toml"], version = ">=v2.2.6"}
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"  # TODO: unpin when mypy>0.991
+types-setuptools = "67.1.0.0"
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/tests/BUILD
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/tests/BUILD
+python_tests()
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/tests/__init__.py
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/tests/__init__.py
--- a/llama-index-integrations/llms/llama-index-llms-lmstudio/tests/test_llms_lmstudio.py
+++ b/llama-index-integrations/llms/llama-index-llms-lmstudio/tests/test_llms_lmstudio.py
+from llama_index.core.base.llms.base import BaseLLM
+from llama_index.llms.lmstudio import LMStudio
+def test_embedding_class():
+    names_of_base_classes = [b.__name__ for b in LMStudio.__mro__]
+    assert BaseLLM.__name__ in names_of_base_classes