diff --git a/.gitignore b/.gitignore index 0900a937e6bbf43d8d39a2952ee76c38bf8a1b42..1ef6f088dad1cb8e3730347fa6f9b8bf40fb20e4 100644 --- a/.gitignore +++ b/.gitignore @@ -138,4 +138,5 @@ dmypy.json # Jetbrains .idea -modules/ \ No newline at end of file +modules/ +*.swp diff --git a/experimental/cli/README.md b/experimental/cli/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ae04e4cc9bec7c34a49da331c6b08571e3fa0b9 --- /dev/null +++ b/experimental/cli/README.md @@ -0,0 +1,23 @@ +Command line interface (experimental) +======== + +This module providers a way to interactive with llama\_index directly in shell. + +Current supported commands: + +```shell +# create a local config file in local dir +python -m experimental.cli init + +# add file to index +python -m experimental.cli add ../data/ + +# query +python -m experimental.cli query "Some question?" +``` + +There're two files put in current directory. + +- config.ini stores embedding/predicter model setup along with its parameters +- index.json the index file + diff --git a/experimental/cli/__init__.py b/experimental/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experimental/cli/__main__.py b/experimental/cli/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac00041498afea2b2e540db1aa5b468bf0e5beb6 --- /dev/null +++ b/experimental/cli/__main__.py @@ -0,0 +1,46 @@ +import logging +from argparse import ArgumentParser, Namespace +from .cli_init import register_init_cli +from .cli_add import register_add_cli +from .cli_query import register_query_cli + +logger = logging.getLogger(__name__) + + +def main() -> None: + parser = ArgumentParser(description=None) + parser.add_argument( + "-V", + "--version", + action="version", + version="%(prog)s " + "1.0", + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + dest="verbosity", + default=0, + help="Set verbosity.", + ) + + def print_help(kwargs: Namespace) -> None: + parser.print_help() + + subparsers = parser.add_subparsers() + register_init_cli(subparsers) + register_add_cli(subparsers) + register_query_cli(subparsers) + parser.set_defaults(func=print_help) + + args = parser.parse_args() + if args.verbosity == 1: + logger.setLevel(logging.INFO) + elif args.verbosity >= 2: + logger.setLevel(logging.DEBUG) + + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/experimental/cli/cli_add.py b/experimental/cli/cli_add.py new file mode 100644 index 0000000000000000000000000000000000000000..d944330647edd28b69c6d7d2d40176839c3e781c --- /dev/null +++ b/experimental/cli/cli_add.py @@ -0,0 +1,36 @@ +import os +from .configuration import load_index, save_index +from argparse import Namespace, _SubParsersAction +from llama_index import SimpleDirectoryReader + + +def add_cli(args: Namespace) -> None: + """Handle subcommand "add" """ + index = load_index() + + for p in args.files: + if not os.path.exists(p): + raise FileNotFoundError(p) + if os.path.isdir(p): + documents = SimpleDirectoryReader(p).load_data() + for document in documents: + index.insert(document) + else: + documents = SimpleDirectoryReader(input_files=[p]).load_data() + for document in documents: + index.insert(document) + + save_index(index) + + +def register_add_cli(subparsers: _SubParsersAction) -> None: + """Register subcommand "add" to ArgumentParser""" + parser = subparsers.add_parser("add") + parser.add_argument( + "files", + default=".", + nargs="+", + help="Files to add", + ) + + parser.set_defaults(func=add_cli) diff --git a/experimental/cli/cli_init.py b/experimental/cli/cli_init.py new file mode 100644 index 0000000000000000000000000000000000000000..153b8154775eed45ab2b16fe3d77daaa41850eed --- /dev/null +++ b/experimental/cli/cli_init.py @@ -0,0 +1,21 @@ +from .configuration import load_config, save_config +from argparse import Namespace, _SubParsersAction + + +def init_cli(args: Namespace) -> None: + """Handle subcommand "init" """ + config = load_config(args.directory) + save_config(config, args.directory) + + +def register_init_cli(subparsers: _SubParsersAction) -> None: + """Register subcommand "init" to ArgumentParser""" + parser = subparsers.add_parser("init") + parser.add_argument( + "directory", + default=".", + nargs="?", + help="Directory to init", + ) + + parser.set_defaults(func=init_cli) diff --git a/experimental/cli/cli_query.py b/experimental/cli/cli_query.py new file mode 100644 index 0000000000000000000000000000000000000000..384ea5d5a3600120ad906967d0eb54c11e0aa2b3 --- /dev/null +++ b/experimental/cli/cli_query.py @@ -0,0 +1,19 @@ +from .configuration import load_index +from argparse import Namespace, _SubParsersAction + + +def query_cli(args: Namespace) -> None: + """Handle subcommand "query" """ + index = load_index() + print(index.query(args.query)) + + +def register_query_cli(subparsers: _SubParsersAction) -> None: + """Register subcommand "query" to ArgumentParser""" + parser = subparsers.add_parser("query") + parser.add_argument( + "query", + help="Query", + ) + + parser.set_defaults(func=query_cli) diff --git a/experimental/cli/configuration.py b/experimental/cli/configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..c995e620b3b056f603eff4779b6cc0c4acdad74f --- /dev/null +++ b/experimental/cli/configuration.py @@ -0,0 +1,91 @@ +import os +from configparser import ConfigParser +from typing import Any +from llama_index.embeddings.openai import OpenAIEmbedding +from langchain import OpenAI +from llama_index.indices.base import BaseGPTIndex +from llama_index.embeddings.base import BaseEmbedding +from llama_index import GPTSimpleVectorIndex, ServiceContext, LLMPredictor +from llama_index.data_structs.data_structs_v2 import SimpleIndexDict + + +CONFIG_FILE_NAME = "config.ini" +JSON_INDEX_FILE_NAME = "index.json" +DEFAULT_CONFIG = { + "store": {"type": "json"}, + "index": {"type": "default"}, + "embed_model": {"type": "default"}, + "llm_predictor": {"type": "default"}, +} + + +def load_config(root: str = ".") -> ConfigParser: + """Load configuration from file""" + config = ConfigParser() + config.read_dict(DEFAULT_CONFIG) + config.read(os.path.join(root, CONFIG_FILE_NAME)) + return config + + +def save_config(config: ConfigParser, root: str = ".") -> None: + """Load configuration to file""" + with open(os.path.join(root, CONFIG_FILE_NAME), "w") as fd: + config.write(fd) + + +def load_index(root: str = ".") -> BaseGPTIndex[Any]: + """Load existing index file""" + config = load_config(root) + service_context = _load_service_context(config) + if config["store"]["type"] == "json": + index_file = os.path.join(root, JSON_INDEX_FILE_NAME) + else: + raise KeyError(f"Unknown index.type {config['index']['type']}") + if os.path.exists(index_file): + return GPTSimpleVectorIndex.load_from_disk( + index_file, service_context=service_context + ) + else: + return GPTSimpleVectorIndex( + index_struct=SimpleIndexDict(), service_context=service_context + ) + + +def save_index(index: BaseGPTIndex[Any], root: str = ".") -> None: + """Save index to file""" + config = load_config(root) + if config["store"]["type"] == "json": + index_file = os.path.join(root, JSON_INDEX_FILE_NAME) + else: + raise KeyError(f"Unknown index.type {config['index']['type']}") + index.save_to_disk(index_file) + + +def _load_service_context(config: ConfigParser) -> ServiceContext: + """Internal function to load service context based on configuration""" + embed_model = _load_embed_model(config) + llm_predictor = _load_llm_predictor(config) + return ServiceContext.from_defaults( + llm_predictor=llm_predictor, embed_model=embed_model + ) + + +def _load_llm_predictor(config: ConfigParser) -> LLMPredictor: + """Internal function to load LLM predictor based on configuration""" + model_type = config["llm_predictor"]["type"].lower() + if model_type == "default": + return LLMPredictor() + if model_type == "azure": + engine = config["llm_predictor"]["engine"] + return LLMPredictor(llm=OpenAI(engine=engine)) + else: + raise KeyError("llm_predictor.type") + + +def _load_embed_model(config: ConfigParser) -> BaseEmbedding: + """Internal function to load embedding model based on configuration""" + model_type = config["embed_model"]["type"] + if model_type == "default": + return OpenAIEmbedding() + else: + raise KeyError("embed_model.type")