diff --git a/docs/examples/llm/modelscope.ipynb b/docs/examples/llm/modelscope.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8f19987a0802aca7399d1cdbe067e9ab30504cb8 --- /dev/null +++ b/docs/examples/llm/modelscope.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/llm/modelscope.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ModelScope LLMS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we show how to use the ModelScope LLM models in LlamaIndex. Check out the [ModelScope site](https://www.modelscope.cn/).\n", + "\n", + "If you're opening this Notebook on colab, you will need to install LlamaIndex 🦙 and the modelscope." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install llama-index-llms-modelscope" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Usage\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from llama_index.llms.modelscope import ModelScopeLLM\n", + "\n", + "llm = ModelScopeLLM(model_name=\"qwen/Qwen1.5-7B-Chat\", model_revision=\"master\")\n", + "\n", + "rsp = llm.complete(\"Hello, who are you?\")\n", + "print(rsp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use Message request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core.base.llms.types import MessageRole, ChatMessage\n", + "\n", + "messages = [\n", + " ChatMessage(\n", + " role=MessageRole.SYSTEM, content=\"You are a helpful assistant.\"\n", + " ),\n", + " ChatMessage(role=MessageRole.USER, content=\"How to make cake?\"),\n", + "]\n", + "resp = llm.chat(messages)\n", + "print(resp)" + ] + } + ], + "metadata": { + "colab": { + "name": "gemini.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/module_guides/models/llms/modules.md b/docs/module_guides/models/llms/modules.md index 07e93054f0a60c64ce198bb0e160cf840ab66723..9e1e0044bea050f479093cc2a686b5308eb8cd89 100644 --- a/docs/module_guides/models/llms/modules.md +++ b/docs/module_guides/models/llms/modules.md @@ -378,3 +378,12 @@ maxdepth: 1 --- /examples/llm/xinference_local_deployment.ipynb ``` + +## ModelScope + +```{toctree} +--- +maxdepth: 1 +--- +/examples/llm/modelscope.ipynb +``` diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/.gitignore b/llama-index-integrations/llms/llama-index-llms-modelscope/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..990c18de229088f55c6c514fd0f2d49981d1b0e7 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/BUILD b/llama-index-integrations/llms/llama-index-llms-modelscope/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..0896ca890d8bffd60a44fa824f8d57fecd73ee53 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/BUILD @@ -0,0 +1,3 @@ +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/Makefile b/llama-index-integrations/llms/llama-index-llms-modelscope/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b9eab05aa370629a4a3de75df3ff64cd53887b68 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/README.md b/llama-index-integrations/llms/llama-index-llms-modelscope/README.md new file mode 100644 index 0000000000000000000000000000000000000000..58064ad24a95a26cb3a8a67192f68264d88bdb26 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/README.md @@ -0,0 +1 @@ +# LlamaIndex Llms Integration: ModelScope diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/BUILD b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..db46e8d6c978c67e301dd6c47bee08c1b3fd141c --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/__init__.py b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..513713369f6957e15acde03ce9aca6e4012ef7b6 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/__init__.py @@ -0,0 +1,3 @@ +from llama_index.llms.modelscope.base import ModelScopeLLM + +__all__ = ["ModelScopeLLM"] diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/base.py b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/base.py new file mode 100644 index 0000000000000000000000000000000000000000..db26971e732a747727fc12c39655cbe30c7221b6 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/base.py @@ -0,0 +1,180 @@ +import logging +from typing import Any, Optional, Sequence +import torch +from llama_index.core.base.llms.types import ( + ChatMessage, + ChatResponse, + ChatResponseGen, + CompletionResponse, + CompletionResponseGen, + LLMMetadata, +) +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CallbackManager +from llama_index.core.constants import ( + DEFAULT_CONTEXT_WINDOW, + DEFAULT_NUM_OUTPUTS, +) +from llama_index.core.llms.callbacks import ( + llm_chat_callback, + llm_completion_callback, +) +from llama_index.core.llms.custom import CustomLLM +from llama_index.core.prompts.base import PromptTemplate +from llama_index.core.types import PydanticProgramMode +from llama_index.llms.modelscope.utils import ( + chat_message_to_modelscope_messages, + text_to_completion_response, + modelscope_message_to_chat_response, +) +from modelscope import pipeline + +DEFAULT_MODELSCOPE_MODEL = "qwen/Qwen-7B-Chat" +DEFAULT_MODELSCOPE_MODEL_REVISION = "master" +DEFAULT_MODELSCOPE_TASK = "chat" +DEFAULT_MODELSCOPE_DTYPE = "float16" +logger = logging.getLogger(__name__) + +_STR_DTYPE_TO_TORCH_DTYPE = { + "half": torch.float16, + "float16": torch.float16, + "float": torch.float32, + "float32": torch.float32, + "bfloat16": torch.bfloat16, +} + + +class ModelScopeLLM(CustomLLM): + """ModelScope LLM.""" + + model_name: str = Field( + default=DEFAULT_MODELSCOPE_MODEL, + description=( + "The model name to use from ModelScope. " + "Unused if `model` is passed in directly." + ), + ) + model_revision: str = Field( + default=DEFAULT_MODELSCOPE_MODEL_REVISION, + description=( + "The model revision to use from ModelScope. " + "Unused if `model` is passed in directly." + ), + ) + task_name: str = Field( + default=DEFAULT_MODELSCOPE_TASK, + description=("The ModelScope task type, for llm use default chat."), + ) + dtype: str = Field( + default=DEFAULT_MODELSCOPE_DTYPE, + description=("The ModelScope task type, for llm use default chat."), + ) + context_window: int = Field( + default=DEFAULT_CONTEXT_WINDOW, + description="The maximum number of tokens available for input.", + gt=0, + ) + max_new_tokens: int = Field( + default=DEFAULT_NUM_OUTPUTS, + description="The maximum number of tokens to generate.", + gt=0, + ) + system_prompt: str = Field( + default="", + description=( + "The system prompt, containing any extra instructions or context. " + "The model card on ModelScope should specify if this is needed." + ), + ) + query_wrapper_prompt: PromptTemplate = Field( + default=PromptTemplate("{query_str}"), + description=( + "The query wrapper prompt, containing the query placeholder. " + "The model card on ModelScope should specify if this is needed. " + "Should contain a `{query_str}` placeholder." + ), + ) + device_map: str = Field( + default="auto", description="The device_map to use. Defaults to 'auto'." + ) + tokenizer_kwargs: dict = Field( + default_factory=dict, description="The kwargs to pass to the tokenizer." + ) + model_kwargs: dict = Field( + default_factory=dict, + description="The kwargs to pass to the model during initialization.", + ) + generate_kwargs: dict = Field( + default_factory=dict, + description="The kwargs to pass to the model during generation.", + ) + + _pipeline: Any = PrivateAttr() + + def __init__( + self, + model_name: str = DEFAULT_MODELSCOPE_MODEL, + model_revision: str = DEFAULT_MODELSCOPE_MODEL_REVISION, + task_name: str = DEFAULT_MODELSCOPE_TASK, + dtype: str = DEFAULT_MODELSCOPE_DTYPE, + model: Optional[Any] = None, + device_map: Optional[str] = "auto", + model_kwargs: Optional[dict] = None, + generate_kwargs: Optional[dict] = None, + callback_manager: Optional[CallbackManager] = None, + pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT, + ) -> None: + """Initialize params.""" + model_kwargs = model_kwargs or {} + if model: + self._pipeline = model + else: + self._pipeline = pipeline( + task=task_name, + model=model_name, + model_revision=model_revision, + llm_first=True, + torch_dtype=_STR_DTYPE_TO_TORCH_DTYPE[dtype], + device_map=device_map, + ) + + super().__init__( + model_kwargs=model_kwargs or {}, + generate_kwargs=generate_kwargs or {}, + callback_manager=callback_manager, + pydantic_program_mode=pydantic_program_mode, + ) + + @classmethod + def class_name(cls) -> str: + return "ModelScope_LLM" + + @property + def metadata(self) -> LLMMetadata: + """LLM metadata.""" + return LLMMetadata( + context_window=None, + num_output=None, + model_name=self.model_name, + is_chat_model=self.is_chat_model, + ) + + @llm_completion_callback() + def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: + return text_to_completion_response(self._pipeline(prompt, **kwargs)) + + @llm_completion_callback() + def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: + yield self.complete(prompt, **kwargs) + + @llm_chat_callback() + def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: + return modelscope_message_to_chat_response( + self._pipeline(chat_message_to_modelscope_messages(messages), **kwargs) + ) + + @llm_chat_callback() + def stream_chat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponseGen: + yield self.chat(messages, **kwargs) diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/utils.py b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2152822db464da3d9caf2edd16f1902ee0a95491 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/utils.py @@ -0,0 +1,30 @@ +from typing import Dict, List, Sequence + +from llama_index.core.base.llms.types import ( + ChatResponse, + CompletionResponse, + ChatMessage, +) + + +def chat_message_to_modelscope_messages( + chat_messages: Sequence[ChatMessage], +) -> List[Dict]: + messages = [] + for msg in chat_messages: + messages.append({"role": msg.role.value, "content": msg.content}) + return {"messages": messages} + + +def text_to_completion_response(output) -> CompletionResponse: + return CompletionResponse(text=output["text"], raw=output) + + +def modelscope_message_to_chat_response(output) -> ChatResponse: + # output format: {'message': {'role': 'assistant', 'content': ''}} + return ChatResponse( + message=ChatMessage( + role=output["message"]["role"], content=output["message"]["content"] + ), + raw=output, + ) diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..248cb8eeb4b1c06d0387305e7c58ba8b351b30e6 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml @@ -0,0 +1,68 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.llamahub] +contains_example = false +import_path = "llama_index.llms.modelscope" + +[tool.llamahub.class_authors] +ModelScopeLLM = "llama-index" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["ModelScope <modelscope@list.alibaba-inc.com>"] +description = "llama-index llms modelscope integration" +exclude = ["**/BUILD"] +license = "MIT" +name = "llama-index-llms-modelscope" +readme = "README.md" +version = "0.1.1" + +[tool.poetry.dependencies] +python = ">=3.8.1,<3.12" +llama-index-core = "^0.10.1" +modelscope = ">=1.12.0" +torch = "^2.1.2" + +[tool.poetry.dependencies.transformers] +extras = ["torch"] +version = "^4.37.0" + +[tool.poetry.group.dev.dependencies] +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" +types-setuptools = "67.1.0.0" + +[tool.poetry.group.dev.dependencies.black] +extras = ["jupyter"] +version = "<=23.9.1,>=23.7.0" + +[tool.poetry.group.dev.dependencies.codespell] +extras = ["toml"] +version = ">=v2.2.6" + +[[tool.poetry.packages]] +include = "llama_index/"