Skip to content
Snippets Groups Projects
Unverified Commit 3f04971b authored by iaalm's avatar iaalm Committed by GitHub
Browse files

[experimental] Add command line tool for llama_index (#1068)

parent 6356e797
No related branches found
No related tags found
No related merge requests found
...@@ -138,4 +138,5 @@ dmypy.json ...@@ -138,4 +138,5 @@ dmypy.json
# Jetbrains # Jetbrains
.idea .idea
modules/ modules/
\ No newline at end of file *.swp
Command line interface (experimental)
========
This module providers a way to interactive with llama\_index directly in shell.
Current supported commands:
```shell
# create a local config file in local dir
python -m experimental.cli init
# add file to index
python -m experimental.cli add ../data/
# query
python -m experimental.cli query "Some question?"
```
There're two files put in current directory.
- config.ini stores embedding/predicter model setup along with its parameters
- index.json the index file
import logging
from argparse import ArgumentParser, Namespace
from .cli_init import register_init_cli
from .cli_add import register_add_cli
from .cli_query import register_query_cli
logger = logging.getLogger(__name__)
def main() -> None:
parser = ArgumentParser(description=None)
parser.add_argument(
"-V",
"--version",
action="version",
version="%(prog)s " + "1.0",
)
parser.add_argument(
"-v",
"--verbose",
action="count",
dest="verbosity",
default=0,
help="Set verbosity.",
)
def print_help(kwargs: Namespace) -> None:
parser.print_help()
subparsers = parser.add_subparsers()
register_init_cli(subparsers)
register_add_cli(subparsers)
register_query_cli(subparsers)
parser.set_defaults(func=print_help)
args = parser.parse_args()
if args.verbosity == 1:
logger.setLevel(logging.INFO)
elif args.verbosity >= 2:
logger.setLevel(logging.DEBUG)
args.func(args)
if __name__ == "__main__":
main()
import os
from .configuration import load_index, save_index
from argparse import Namespace, _SubParsersAction
from llama_index import SimpleDirectoryReader
def add_cli(args: Namespace) -> None:
"""Handle subcommand "add" """
index = load_index()
for p in args.files:
if not os.path.exists(p):
raise FileNotFoundError(p)
if os.path.isdir(p):
documents = SimpleDirectoryReader(p).load_data()
for document in documents:
index.insert(document)
else:
documents = SimpleDirectoryReader(input_files=[p]).load_data()
for document in documents:
index.insert(document)
save_index(index)
def register_add_cli(subparsers: _SubParsersAction) -> None:
"""Register subcommand "add" to ArgumentParser"""
parser = subparsers.add_parser("add")
parser.add_argument(
"files",
default=".",
nargs="+",
help="Files to add",
)
parser.set_defaults(func=add_cli)
from .configuration import load_config, save_config
from argparse import Namespace, _SubParsersAction
def init_cli(args: Namespace) -> None:
"""Handle subcommand "init" """
config = load_config(args.directory)
save_config(config, args.directory)
def register_init_cli(subparsers: _SubParsersAction) -> None:
"""Register subcommand "init" to ArgumentParser"""
parser = subparsers.add_parser("init")
parser.add_argument(
"directory",
default=".",
nargs="?",
help="Directory to init",
)
parser.set_defaults(func=init_cli)
from .configuration import load_index
from argparse import Namespace, _SubParsersAction
def query_cli(args: Namespace) -> None:
"""Handle subcommand "query" """
index = load_index()
print(index.query(args.query))
def register_query_cli(subparsers: _SubParsersAction) -> None:
"""Register subcommand "query" to ArgumentParser"""
parser = subparsers.add_parser("query")
parser.add_argument(
"query",
help="Query",
)
parser.set_defaults(func=query_cli)
import os
from configparser import ConfigParser
from typing import Any
from llama_index.embeddings.openai import OpenAIEmbedding
from langchain import OpenAI
from llama_index.indices.base import BaseGPTIndex
from llama_index.embeddings.base import BaseEmbedding
from llama_index import GPTSimpleVectorIndex, ServiceContext, LLMPredictor
from llama_index.data_structs.data_structs_v2 import SimpleIndexDict
CONFIG_FILE_NAME = "config.ini"
JSON_INDEX_FILE_NAME = "index.json"
DEFAULT_CONFIG = {
"store": {"type": "json"},
"index": {"type": "default"},
"embed_model": {"type": "default"},
"llm_predictor": {"type": "default"},
}
def load_config(root: str = ".") -> ConfigParser:
"""Load configuration from file"""
config = ConfigParser()
config.read_dict(DEFAULT_CONFIG)
config.read(os.path.join(root, CONFIG_FILE_NAME))
return config
def save_config(config: ConfigParser, root: str = ".") -> None:
"""Load configuration to file"""
with open(os.path.join(root, CONFIG_FILE_NAME), "w") as fd:
config.write(fd)
def load_index(root: str = ".") -> BaseGPTIndex[Any]:
"""Load existing index file"""
config = load_config(root)
service_context = _load_service_context(config)
if config["store"]["type"] == "json":
index_file = os.path.join(root, JSON_INDEX_FILE_NAME)
else:
raise KeyError(f"Unknown index.type {config['index']['type']}")
if os.path.exists(index_file):
return GPTSimpleVectorIndex.load_from_disk(
index_file, service_context=service_context
)
else:
return GPTSimpleVectorIndex(
index_struct=SimpleIndexDict(), service_context=service_context
)
def save_index(index: BaseGPTIndex[Any], root: str = ".") -> None:
"""Save index to file"""
config = load_config(root)
if config["store"]["type"] == "json":
index_file = os.path.join(root, JSON_INDEX_FILE_NAME)
else:
raise KeyError(f"Unknown index.type {config['index']['type']}")
index.save_to_disk(index_file)
def _load_service_context(config: ConfigParser) -> ServiceContext:
"""Internal function to load service context based on configuration"""
embed_model = _load_embed_model(config)
llm_predictor = _load_llm_predictor(config)
return ServiceContext.from_defaults(
llm_predictor=llm_predictor, embed_model=embed_model
)
def _load_llm_predictor(config: ConfigParser) -> LLMPredictor:
"""Internal function to load LLM predictor based on configuration"""
model_type = config["llm_predictor"]["type"].lower()
if model_type == "default":
return LLMPredictor()
if model_type == "azure":
engine = config["llm_predictor"]["engine"]
return LLMPredictor(llm=OpenAI(engine=engine))
else:
raise KeyError("llm_predictor.type")
def _load_embed_model(config: ConfigParser) -> BaseEmbedding:
"""Internal function to load embedding model based on configuration"""
model_type = config["embed_model"]["type"]
if model_type == "default":
return OpenAIEmbedding()
else:
raise KeyError("embed_model.type")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment