Skip to content
Snippets Groups Projects
Unverified Commit 2ddd7207 authored by Andrei Fajardo's avatar Andrei Fajardo Committed by GitHub
Browse files

The llamaindex cli package (#10723)

* llamaindex cli package

* BUILD to pass tests

* generate pants BUILD files

* add skip tests

* use llamaindex-cli name

* CHANGELOG
parent f2d9472e
No related branches found
No related tags found
No related merge requests found
Showing
with 848 additions and 0 deletions
llama_index/_static
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
bin/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
etc/
include/
lib/
lib64/
parts/
sdist/
share/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
.ruff_cache
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
notebooks/
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
pyvenv.cfg
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Jetbrains
.idea
modules/
*.swp
# VsCode
.vscode
# pipenv
Pipfile
Pipfile.lock
# pyright
pyrightconfig.json
python_sources()
poetry_requirements(
name="poetry",
)
# CHANGELOG
## [0.1.0] - 2024-02-15
- Initial release of cli (ripped out from core)
GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
help: ## Show all Makefile targets.
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
format: ## Run code autoformatters (black).
pre-commit install
git ls-files | xargs pre-commit run black --files
lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy
pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
test: ## Run tests via pytest.
pytest tests
watch-docs: ## Build and watch documentation.
sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
# LlamaIndex CLI
## Installation
```sh
pip install llama-index-cli
```
## Usage
```sh
llamaindex -h
usage: llamaindex [-h] {rag,download-llamapack,download-llamadataset,upgrade,upgrade-file,new-package} ...
LlamaIndex CLI tool.
options:
-h, --help show this help message and exit
commands:
{rag,download-llamapack,download-llamadataset,upgrade,upgrade-file,new-package}
rag Ask a question to a document / a directory of documents.
download-llamapack Download a llama-pack
download-llamadataset
Download a llama-dataset
upgrade Upgrade a directory containing notebooks or python files.
upgrade-file Upgrade a single notebook or python file.
new-package Initialize a new llama-index package
```
python_sources()
import argparse
from typing import Any, Optional
from llama_index.cli.rag.base import RagCLI, default_ragcli_persist_dir
from llama_index.cli.upgrade.base import upgrade_dir, upgrade_file
from llama_index.cli.new_package.base import init_new_package
from llama_index.core.ingestion import IngestionCache, IngestionPipeline
from llama_index.core.llama_dataset.download import (
LLAMA_DATASETS_LFS_URL,
LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL,
LLAMA_HUB_URL,
download_llama_dataset,
)
from llama_index.core.llama_pack.download import (
LLAMA_PACKS_CONTENTS_URL,
download_llama_pack,
)
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.text_splitter import SentenceSplitter
def handle_init_package(
name: str, kind: str, prefix: Optional[str] = None, **kwargs: Any
):
init_new_package(integration_name=name, integration_type=kind, prefix=prefix)
print(f"Successfully initialized package")
def handle_download_llama_pack(
llama_pack_class: Optional[str] = None,
download_dir: Optional[str] = None,
llama_pack_url: str = LLAMA_PACKS_CONTENTS_URL,
**kwargs: Any,
) -> None:
assert llama_pack_class is not None
assert download_dir is not None
download_llama_pack(
llama_pack_class=llama_pack_class,
download_dir=download_dir or "./custom_llama_pack",
llama_pack_url=llama_pack_url,
)
print(f"Successfully downloaded {llama_pack_class} to {download_dir}")
def handle_download_llama_dataset(
llama_dataset_class: Optional[str] = None,
download_dir: Optional[str] = None,
llama_hub_url: str = LLAMA_HUB_URL,
llama_datasets_lfs_url: str = LLAMA_DATASETS_LFS_URL,
llama_datasets_source_files_tree_url: str = LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL,
**kwargs: Any,
) -> None:
assert llama_dataset_class is not None
assert download_dir is not None
download_llama_dataset(
llama_dataset_class=llama_dataset_class,
download_dir=download_dir,
llama_hub_url=llama_hub_url,
llama_datasets_lfs_url=llama_datasets_lfs_url,
llama_datasets_source_files_tree_url=llama_datasets_source_files_tree_url,
show_progress=True,
load_documents=False,
)
print(f"Successfully downloaded {llama_dataset_class} to {download_dir}")
def default_rag_cli() -> RagCLI:
import chromadb # pants: no-infer-dep
from llama_index.embeddings.openai import OpenAIEmbedding # pants: no-infer-dep
from llama_index.vector_stores.chroma import (
ChromaVectorStore,
) # pants: no-infer-dep
persist_dir = default_ragcli_persist_dir()
chroma_client = chromadb.PersistentClient(path=persist_dir)
chroma_collection = chroma_client.create_collection("default", get_or_create=True)
vector_store = ChromaVectorStore(
chroma_collection=chroma_collection, persist_dir=persist_dir
)
docstore = SimpleDocumentStore()
ingestion_pipeline = IngestionPipeline(
transformations=[SentenceSplitter(), OpenAIEmbedding()],
vector_store=vector_store,
docstore=docstore,
cache=IngestionCache(),
)
try:
from llama_index.embeddings.openai import OpenAIEmbedding # pants: no-infer-dep
except ImportError:
OpenAIEmbedding = None
try:
import chromadb
from llama_index.vector_stores.chroma import (
ChromaVectorStore,
)
except ImportError:
ChromaVectorStore = None
if OpenAIEmbedding and ChromaVectorStore:
persist_dir = default_ragcli_persist_dir()
chroma_client = chromadb.PersistentClient(path=persist_dir)
chroma_collection = chroma_client.create_collection(
"default", get_or_create=True
)
vector_store = ChromaVectorStore(
chroma_collection=chroma_collection, persist_dir=persist_dir
)
docstore = SimpleDocumentStore()
ingestion_pipeline = IngestionPipeline(
transformations=[SentenceSplitter(), OpenAIEmbedding()],
vector_store=vector_store,
docstore=docstore,
cache=IngestionCache(),
)
try:
ingestion_pipeline.load(persist_dir=persist_dir)
except FileNotFoundError:
pass
return RagCLI(
ingestion_pipeline=ingestion_pipeline,
verbose=False,
persist_dir=persist_dir,
)
else:
print(
"Default RagCLI was not built. There are packages missing. Please"
" install required dependencies by running "
"`pip install llama-index-embeddings-openai llama-index-llms-openai chromadb llama-index-vector-stores-chroma`"
)
return None
def main() -> None:
parser = argparse.ArgumentParser(description="LlamaIndex CLI tool.")
# Subparsers for the main commands
subparsers = parser.add_subparsers(title="commands", dest="command", required=True)
# llama rag command
llamarag_parser = subparsers.add_parser(
"rag", help="Ask a question to a document or a directory of documents."
)
RagCLI.add_parser_args(llamarag_parser, default_rag_cli)
# download llamapacks command
llamapack_parser = subparsers.add_parser(
"download-llamapack", help="Download a llama-pack"
)
llamapack_parser.add_argument(
"llama_pack_class",
type=str,
help=(
"The name of the llama-pack class you want to download, "
"such as `GmailOpenAIAgentPack`."
),
)
llamapack_parser.add_argument(
"-d",
"--download-dir",
type=str,
default="./llama_packs",
help="Custom dirpath to download the pack into.",
)
llamapack_parser.add_argument(
"--llama-hub-url",
type=str,
default=LLAMA_HUB_URL,
help="URL to llama hub.",
)
llamapack_parser.set_defaults(
func=lambda args: handle_download_llama_pack(**vars(args))
)
# download llamadatasets command
llamadataset_parser = subparsers.add_parser(
"download-llamadataset", help="Download a llama-dataset"
)
llamadataset_parser.add_argument(
"llama_dataset_class",
type=str,
help=(
"The name of the llama-dataset class you want to download, "
"such as `PaulGrahamEssayDataset`."
),
)
llamadataset_parser.add_argument(
"-d",
"--download-dir",
type=str,
default="./llama_datasets",
help="Custom dirpath to download the pack into.",
)
llamadataset_parser.add_argument(
"--llama-hub-url",
type=str,
default=LLAMA_HUB_URL,
help="URL to llama hub.",
)
llamadataset_parser.add_argument(
"--llama-datasets-lfs-url",
type=str,
default=LLAMA_DATASETS_LFS_URL,
help="URL to llama datasets.",
)
llamadataset_parser.set_defaults(
func=lambda args: handle_download_llama_dataset(**vars(args))
)
# Upgrade command
upgrade_parser = subparsers.add_parser(
"upgrade", help="Upgrade a directory containing notebooks or python files."
)
upgrade_parser.add_argument(
"directory",
type=str,
help="The directory to upgrade. Will run on only .ipynb or .py files.",
)
upgrade_parser.set_defaults(func=lambda args: upgrade_dir(args.directory))
# Upgrade command
upgrade_file_parser = subparsers.add_parser(
"upgrade-file", help="Upgrade a single notebook or python file."
)
upgrade_file_parser.add_argument(
"path",
type=str,
help="The directory to upgrade. Will run on only .ipynb or .py files.",
)
upgrade_file_parser.set_defaults(func=lambda args: upgrade_file(args.path))
# init package command
new_package_parser = subparsers.add_parser(
"new-package", help="Initialize a new llama-index package"
)
new_package_parser.add_argument(
"-k",
"--kind",
type=str,
help="Kind of package, e.g., llm, embedding, pack, etc.",
)
new_package_parser.add_argument(
"-n",
"--name",
type=str,
help="Name of python package",
)
new_package_parser.add_argument(
"-p",
"--prefix",
type=str,
required=False,
help="Name of prefix package",
)
new_package_parser.set_defaults(func=lambda args: handle_init_package(**vars(args)))
# Parse the command-line arguments
args = parser.parse_args()
# Call the appropriate function based on the command
args.func(args)
if __name__ == "__main__":
main()
python_sources()
import os
import shutil
from pathlib import Path
from llama_index.cli.new_package.templates import (
pyproject_str,
readme_str,
init_str,
init_with_prefix_str,
)
from typing import Optional
def _create_init_file(dir: str):
# create __init__.py
Path(dir + "/__init__.py").touch()
def _create_test_file(filename: str):
Path(filename).touch()
def _makedirs(dir: str):
try:
os.makedirs(dir)
except FileExistsError as e:
pass
def init_new_package(
integration_type: str,
integration_name: str,
prefix: Optional[str] = None,
):
# create new directory, works in current directory
pkg_name = (
f"llama-index-{integration_type}-{integration_name}".replace(" ", "-")
.replace("_", "-")
.lower()
if prefix is None
else f"llama-index-{prefix}-{integration_type}-{integration_name}".replace(
" ", "-"
)
.replace("_", "-")
.lower()
)
pkg_path = os.path.join(os.getcwd(), pkg_name)
tests_path = os.path.join(pkg_path, "tests")
examples_path = os.path.join(pkg_path, "examples")
pkg_src_dir = os.path.join(
pkg_path,
(
f"llama_index/{integration_type}/{integration_name}".replace(
" ", "_"
).lower()
if prefix is None
else f"llama_index/{prefix}/{integration_type}/{integration_name}".replace(
" ", "_"
).lower()
),
)
# make dirs
_makedirs(pkg_path)
_makedirs(tests_path)
_makedirs(examples_path)
_makedirs(pkg_src_dir)
# create init files
_create_init_file(tests_path)
with open(pkg_src_dir + "/__init__.py", "w") as f:
init_string = (
init_str.format(
TYPE=integration_type.replace(" ", "_").lower(),
NAME=integration_name.replace(" ", "_").lower(),
)
if prefix is None
else init_with_prefix_str.format(
TYPE=integration_type.replace(" ", "-").lower(),
NAME=integration_name.replace(" ", "-").lower(),
PREFIX=prefix.replace(" ", "_").lower(),
)
)
f.write(init_string)
# create pyproject.toml
with open(pkg_path + "/pyproject.toml", "w") as f:
f.write(
pyproject_str.format(
PACKAGE_NAME=pkg_name,
TYPE=integration_type.lower(),
NAME=integration_name.lower(),
)
)
# create readme
with open(pkg_path + "/README.md", "w") as f:
f.write(
readme_str.format(
PACKAGE_NAME=pkg_name,
TYPE=integration_type.lower().title(),
NAME=integration_name.lower().title(),
)
)
# create an empty test file
test_file_name = tests_path + (
f"/test_{integration_type}_{integration_name}.py".replace(" ", "_").lower()
if prefix is None
else f"/test_{prefix}_{integration_type}_{integration_name}.py".replace(
" ", "_"
).lower()
)
_create_test_file(test_file_name)
# copy common files to folders
script_path = Path(__file__).parent.resolve()
common_path = os.path.join(script_path, "common")
shutil.copyfile(common_path + "/.gitignore", pkg_path + "/.gitignore")
shutil.copyfile(common_path + "/Makefile", pkg_path + "/Makefile")
shutil.copyfile(common_path + "/BUILD", pkg_path + "/BUILD")
llama_index/_static
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
bin/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
etc/
include/
lib/
lib64/
parts/
sdist/
share/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
.ruff_cache
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
notebooks/
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
pyvenv.cfg
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Jetbrains
.idea
modules/
*.swp
# VsCode
.vscode
# pipenv
Pipfile
Pipfile.lock
# pyright
pyrightconfig.json
python_sources()
GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
help: ## Show all Makefile targets.
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
format: ## Run code autoformatters (black).
pre-commit install
git ls-files | xargs pre-commit run black --files
lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy
pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
test: ## Run tests via pytest.
pytest tests
watch-docs: ## Build and watch documentation.
sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
python_sources()
from llama_index.cli.new_package.templates.pyproject import pyproject_str
from llama_index.cli.new_package.templates.readme import readme_str
from llama_index.cli.new_package.templates.init import (
init_str,
init_with_prefix_str,
)
__all__ = ["pyproject_str", "readme_str", "init_str", "init_with_prefix_str"]
init_str = """from llama_index.{TYPE}.{NAME}.base import <FILL>
__all__ = ["<FILL>"]
"""
init_with_prefix_str = """from llama_index.{PREFIX}.{TYPE}.{NAME}.base import <FILL>
__all__ = ["<FILL>"]
"""
pyproject_str = """[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.codespell]
check-filenames = true
check-hidden = true
# Feel free to un-skip examples, and experimental, you will just need to
# work through many typos (--write-changes and --interactive will help)
skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
[tool.mypy]
disallow_untyped_defs = true
# Remove venv skip when integrated with pre-commit
exclude = ["_static", "build", "examples", "notebooks", "venv"]
ignore_missing_imports = true
python_version = "3.8"
[tool.poetry]
name = "{PACKAGE_NAME}"
version = "0.1.0"
description = "llama-index {TYPE} {NAME} integration"
authors = ["Your Name <you@example.com>"]
license = "MIT"
readme = "README.md"
packages = [{{include = "llama_index/"}}]
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
llama-index-core = "^0.10.0"
[tool.poetry.group.dev.dependencies]
black = {{extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}}
codespell = {{extras = ["toml"], version = ">=v2.2.6"}}
ipython = "8.10.0"
jupyter = "^1.0.0"
mypy = "0.991"
pre-commit = "3.2.0"
pylint = "2.15.10"
pytest = "7.2.1"
pytest-mock = "3.11.1"
ruff = "0.0.292"
tree-sitter-languages = "^1.8.0"
types-Deprecated = ">=0.1.0"
types-PyYAML = "^6.0.12.12"
types-protobuf = "^4.24.0.4"
types-redis = "4.5.5.0"
types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991
types-setuptools = "67.1.0.0"
"""
readme_str = """# LlamaIndex {TYPE} Integration: {NAME}
"""
python_sources()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment