diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 531933cf10f601fce96c40825be825fbb6b98152..3e6c55d5d5f30a1b10d506adf918bd08013515f0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,13 +26,26 @@ repos: hooks: - id: black-jupyter name: black-src + alias: black exclude: docs/ - repo: https://github.com/psf/black-pre-commit-mirror rev: 23.10.1 hooks: - id: black-jupyter - name: black-docs + name: black-docs-py + alias: black files: docs/ + # Using PEP 8's line length in docs prevents excess left/right scrolling + args: [--line-length=79] + - repo: https://github.com/adamchainz/blacken-docs + rev: 1.16.0 + hooks: + - id: blacken-docs + name: black-docs-text + alias: black + types_or: [rst, markdown, tex] + additional_dependencies: [black==23.10.1] + # Using PEP 8's line length in docs prevents excess left/right scrolling args: [--line-length=79] - repo: https://github.com/pre-commit/mirrors-prettier rev: v3.0.3 @@ -53,8 +66,3 @@ repos: hooks: - id: toml-sort-fix exclude: poetry.lock - - repo: https://github.com/adamchainz/blacken-docs - rev: "1.16.0" - hooks: - - id: blacken-docs - additional_dependencies: [black==23.10.1] diff --git a/Makefile b/Makefile index b2b8b95da58beb3dde48b7cbed590e5b250ae617..d35d30f7481e41106e2fa64387e77ac24c67dc7e 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ help: ## Show all Makefile targets. format: ## Run code autoformatters (black). pre-commit install - pre-commit run black-jupyter --all-files + pre-commit run black --all-files lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy pre-commit install && pre-commit run --all-files --show-diff-on-failure diff --git a/README.md b/README.md index a42ec47e76f6a9a24d1d8bbabbe7a7960e753dd3..89df1f3030314a58958facf0c3a9bbbaf0c0eb86 100644 --- a/README.md +++ b/README.md @@ -99,12 +99,16 @@ from llama_index.embeddings import HuggingFaceEmbedding from llama_index import ServiceContext embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") -service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) +service_context = ServiceContext.from_defaults( + llm=llm, embed_model=embed_model +) from llama_index import VectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data() -index = VectorStoreIndex.from_documents(documents, service_context=service_context) +index = VectorStoreIndex.from_documents( + documents, service_context=service_context +) ``` To query: diff --git a/docs/community/integrations/graphsignal.md b/docs/community/integrations/graphsignal.md index d6ad8ddd2a71ec2f60011d1f7a87a59f97b4469e..51fd3ad759e55dda2ab917c0de14aa3a85241481 100644 --- a/docs/community/integrations/graphsignal.md +++ b/docs/community/integrations/graphsignal.md @@ -18,7 +18,9 @@ pip install graphsignal import graphsignal # Provide an API key directly or via GRAPHSIGNAL_API_KEY environment variable -graphsignal.configure(api_key="my-api-key", deployment="my-llama-index-app-prod") +graphsignal.configure( + api_key="my-api-key", deployment="my-llama-index-app-prod" +) ``` You can get an API key [here](https://app.graphsignal.com/). diff --git a/docs/community/integrations/guidance.md b/docs/community/integrations/guidance.md index 4004c8a56d97018708d25f3c82b527c3e4bde0be..130173d7adebde4331025cdb51c6ea560fa8c2f5 100644 --- a/docs/community/integrations/guidance.md +++ b/docs/community/integrations/guidance.md @@ -77,7 +77,9 @@ intermediate response has the expected structure (so that they can be parsed cor As an example, we implement a `GuidanceQuestionGenerator` that can be plugged into a `SubQuestionQueryEngine` to make it more robust than using the default setting. ```python -from llama_index.question_gen.guidance_generator import GuidanceQuestionGenerator +from llama_index.question_gen.guidance_generator import ( + GuidanceQuestionGenerator, +) from guidance.llms import OpenAI as GuidanceOpenAI # define guidance based question generator diff --git a/docs/community/integrations/lmformatenforcer.md b/docs/community/integrations/lmformatenforcer.md index 24306b1b0572a72497e3a1a619b84d3e4d48651a..b7083d1cfe3e6fd0efa697b0aee872d68f366ec7 100644 --- a/docs/community/integrations/lmformatenforcer.md +++ b/docs/community/integrations/lmformatenforcer.md @@ -53,7 +53,10 @@ Album( artist="The Shining Choir", songs=[ Song(title="Redrum", length_seconds=300), - Song(title="All Work and No Play Makes Jack a Dull Boy", length_seconds=240), + Song( + title="All Work and No Play Makes Jack a Dull Boy", + length_seconds=240, + ), Song(title="Heeeeere's Johnny!", length_seconds=180), ], ) diff --git a/docs/community/integrations/using_with_langchain.md b/docs/community/integrations/using_with_langchain.md index 11ce40d7f6022a5b0372dd7bc1f0473e3117bd2d..dcb4d3d3466fad2224477bbde28817d67b7418de 100644 --- a/docs/community/integrations/using_with_langchain.md +++ b/docs/community/integrations/using_with_langchain.md @@ -21,7 +21,10 @@ LlamaIndex provides Tool abstractions so that you can use a LlamaIndex query eng For instance, you can choose to create a "Tool" from an `QueryEngine` directly as follows: ```python -from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool +from llama_index.langchain_helpers.agents import ( + IndexToolConfig, + LlamaIndexTool, +) tool_config = IndexToolConfig( query_engine=query_engine, diff --git a/docs/community/integrations/vector_stores.md b/docs/community/integrations/vector_stores.md index 47df4538e8cf58fa1b8273fb8ba2f3b828b2c8be..d857285a216697c641c38559f9c49133393a2d67 100644 --- a/docs/community/integrations/vector_stores.md +++ b/docs/community/integrations/vector_stores.md @@ -74,7 +74,9 @@ storage_context = StorageContext.from_defaults( # Load documents and build index documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data() -index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) +index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context +) # Query index query_engine = index.as_query_engine() @@ -129,7 +131,9 @@ Then connect and use Redis as a vector database with LlamaIndex from llama_index.vector_stores import RedisVectorStore vector_store = RedisVectorStore( - index_name="llm-project", redis_url="redis://localhost:6379", overwrite=True + index_name="llm-project", + redis_url="redis://localhost:6379", + overwrite=True, ) ``` @@ -183,7 +187,8 @@ resource_owner_config = weaviate.AuthClientPassword( password="<password>", ) client = weaviate.Client( - "https://<cluster-id>.semi.network/", auth_client_secret=resource_owner_config + "https://<cluster-id>.semi.network/", + auth_client_secret=resource_owner_config, ) # construct vector store @@ -206,10 +211,14 @@ vector_store = ZepVectorStore( storage_context = StorageContext.from_defaults(vector_store=vector_store) -index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) +index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context +) # Query index using both a text query and metadata filters -filters = MetadataFilters(filters=[ExactMatchFilter(key="theme", value="Mafia")]) +filters = MetadataFilters( + filters=[ExactMatchFilter(key="theme", value="Mafia")] +) retriever = index.as_retriever(filters=filters) result = retriever.retrieve("What is inception about?") ``` @@ -223,7 +232,9 @@ from llama_index.vector_stores import PineconeVectorStore # Creating a Pinecone index api_key = "api_key" pinecone.init(api_key=api_key, environment="us-west1-gcp") -pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1") +pinecone.create_index( + "quickstart", dimension=1536, metric="euclidean", pod_type="p1" +) index = pinecone.Index("quickstart") # can define filters specific to this vector index (so you can @@ -271,7 +282,9 @@ cluster = Cluster(["127.0.0.1"]) cassio.init(session=cluster.connect(), keyspace="my_keyspace") # After the above `cassio.init(...)`, create a vector store: -vector_store = CassandraVectorStore(table="cass_v_table", embedding_dimension=1536) +vector_store = CassandraVectorStore( + table="cass_v_table", embedding_dimension=1536 +) ``` **Chroma** @@ -319,7 +332,9 @@ import pymilvus from llama_index.vector_stores import MilvusVectorStore # construct vector store -vector_store = MilvusVectorStore(uri="https://localhost:19530", overwrite="True") +vector_store = MilvusVectorStore( + uri="https://localhost:19530", overwrite="True" +) ``` **Note**: `MilvusVectorStore` depends on the `pymilvus` library. @@ -339,7 +354,9 @@ from llama_index.vector_stores import MilvusVectorStore # construct vector store vector_store = MilvusVectorStore( - uri="foo.vectordb.zillizcloud.com", token="your_token_here", overwrite="True" + uri="foo.vectordb.zillizcloud.com", + token="your_token_here", + overwrite="True", ) ``` @@ -423,16 +440,22 @@ from llama_index.storage.storage_context import StorageContext from llama_index.readers.file.base import SimpleDirectoryReader # mongo_uri = os.environ["MONGO_URI"] -mongo_uri = "mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority" +mongo_uri = ( + "mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority" +) mongodb_client = pymongo.MongoClient(mongo_uri) # construct store store = MongoDBAtlasVectorSearch(mongodb_client) storage_context = StorageContext.from_defaults(vector_store=store) -uber_docs = SimpleDirectoryReader(input_files=["../data/10k/uber_2021.pdf"]).load_data() +uber_docs = SimpleDirectoryReader( + input_files=["../data/10k/uber_2021.pdf"] +).load_data() # construct index -index = VectorStoreIndex.from_documents(uber_docs, storage_context=storage_context) +index = VectorStoreIndex.from_documents( + uber_docs, storage_context=storage_context +) ``` **Neo4j** @@ -517,7 +540,9 @@ reader = ChromaReader( query_vector = [n1, n2, n3, ...] -documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5) +documents = reader.load_data( + collection_name="demo", query_vector=query_vector, limit=5 +) index = SummaryIndex.from_documents(documents) query_engine = index.as_query_engine() @@ -542,7 +567,9 @@ query_vector = [n1, n2, n3, ...] # See the Python client: https;//github.com/qdrant/qdrant_client # for more details -documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5) +documents = reader.load_data( + collection_name="demo", query_vector=query_vector, limit=5 +) ``` NOTE: Since Weaviate can store a hybrid of document and vector objects, the user may either choose to explicitly specify `class_name` and `properties` in order to query documents, or they may choose to specify a raw GraphQL query. See below for usage. diff --git a/docs/getting_started/customization.rst b/docs/getting_started/customization.rst index 3ca772a365a3336a9a53e31dfe2cdd45f980032e..ed92118447a21d46a8ab90900787a5ce98edcf7e 100644 --- a/docs/getting_started/customization.rst +++ b/docs/getting_started/customization.rst @@ -33,7 +33,9 @@ The `ServiceContext <../module_guides/supporting_modules/service_context.html>`_ from llama_index import VectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader("data").load_data() - index = VectorStoreIndex.from_documents(documents, service_context=service_context) + index = VectorStoreIndex.from_documents( + documents, service_context=service_context + ) query_engine = index.as_query_engine() response = query_engine.query("What did the author do growing up?") print(response) @@ -61,7 +63,9 @@ The `ServiceContext <../module_guides/supporting_modules/service_context.html>`_ from llama_index import VectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader("data").load_data() - index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) + index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context + ) query_engine = index.as_query_engine() response = query_engine.query("What did the author do growing up?") print(response) diff --git a/docs/module_guides/deploying/agents/tools/usage_pattern.md b/docs/module_guides/deploying/agents/tools/usage_pattern.md index a7f687848a3a5191f7a9884521ee26e681cb9865..03091a1e58f0c52f4aeebbfb506fb950945c100f 100644 --- a/docs/module_guides/deploying/agents/tools/usage_pattern.md +++ b/docs/module_guides/deploying/agents/tools/usage_pattern.md @@ -48,6 +48,9 @@ langchain_tools = [t.to_langchain_tool() for t in tools] from langchain.agents import initialize_agent agent_executor = initialize_agent( - langchain_tools, llm, agent="conversational-react-description", memory=memory + langchain_tools, + llm, + agent="conversational-react-description", + memory=memory, ) ``` diff --git a/docs/module_guides/deploying/agents/usage_pattern.md b/docs/module_guides/deploying/agents/usage_pattern.md index 3b77b84770539916adcc66bfef883832a220b6f9..d41c1281f67312e520f66b55a27b91c2386faaf3 100644 --- a/docs/module_guides/deploying/agents/usage_pattern.md +++ b/docs/module_guides/deploying/agents/usage_pattern.md @@ -87,7 +87,8 @@ query_engine_tools = [ QueryEngineTool( query_engine=gmail_agent, metadata=ToolMetadata( - name="gmail_agent", description="Tool that can send emails on Gmail." + name="gmail_agent", + description="Tool that can send emails on Gmail.", ), ), ] @@ -129,7 +130,9 @@ We then define our `FnRetrieverOpenAIAgent`: ```python from llama_index.agent import FnRetrieverOpenAIAgent -agent = FnRetrieverOpenAIAgent.from_retriever(obj_index.as_retriever(), verbose=True) +agent = FnRetrieverOpenAIAgent.from_retriever( + obj_index.as_retriever(), verbose=True +) ``` ### Context Retrieval Agents @@ -155,7 +158,9 @@ context_index = VectorStoreIndex.from_documents(docs) # add context agent context_agent = ContextRetrieverOpenAIAgent.from_tools_and_retriever( - query_engine_tools, context_index.as_retriever(similarity_top_k=1), verbose=True + query_engine_tools, + context_index.as_retriever(similarity_top_k=1), + verbose=True, ) response = context_agent.chat("What is the YZ of March 2022?") ``` @@ -171,7 +176,9 @@ plan over a set of subtools. from llama_index.tools import QueryPlanTool from llama_index import get_response_synthesizer -response_synthesizer = get_response_synthesizer(service_context=service_context) +response_synthesizer = get_response_synthesizer( + service_context=service_context +) query_plan_tool = QueryPlanTool.from_defaults( query_engine_tools=[query_tool_sept, query_tool_june, query_tool_march], response_synthesizer=response_synthesizer, @@ -186,5 +193,7 @@ agent = OpenAIAgent.from_tools( ) # should output a query plan to call march, june, and september tools -response = agent.query("Analyze Uber revenue growth in March, June, and September") +response = agent.query( + "Analyze Uber revenue growth in March, June, and September" +) ``` diff --git a/docs/module_guides/deploying/chat_engines/usage_pattern.md b/docs/module_guides/deploying/chat_engines/usage_pattern.md index 29f3f4ec02297fd7b1031e7082ae8ff0d1e467b5..ec4d20ce258f8a0fd6f6027016a2ca14406d6a61 100644 --- a/docs/module_guides/deploying/chat_engines/usage_pattern.md +++ b/docs/module_guides/deploying/chat_engines/usage_pattern.md @@ -71,7 +71,9 @@ Here's an example where we configure the following: ```python from llama_index.prompts import PromptTemplate from llama_index.llms import ChatMessage, MessageRole -from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine +from llama_index.chat_engine.condense_question import ( + CondenseQuestionChatEngine, +) custom_prompt = PromptTemplate( """\ diff --git a/docs/module_guides/deploying/query_engine/usage_pattern.md b/docs/module_guides/deploying/query_engine/usage_pattern.md index a106e749872539f67cef888ba559d282390c05c3..1b46cab76eed72b88298e0f081f321d88394ba87 100644 --- a/docs/module_guides/deploying/query_engine/usage_pattern.md +++ b/docs/module_guides/deploying/query_engine/usage_pattern.md @@ -108,7 +108,10 @@ You can also define a custom query engine. Simply subclass the `CustomQueryEngin ```python from llama_index.query_engine import CustomQueryEngine from llama_index.retrievers import BaseRetriever -from llama_index.response_synthesizers import get_response_synthesizer, BaseSynthesizer +from llama_index.response_synthesizers import ( + get_response_synthesizer, + BaseSynthesizer, +) class RAGQueryEngine(CustomQueryEngine): diff --git a/docs/module_guides/evaluating/usage_pattern.md b/docs/module_guides/evaluating/usage_pattern.md index 3c031af42f232dc1479f37690973e2f93e5dbe86..8b6dae3ab2447cc85ca0b15b9f9b3358c514691a 100644 --- a/docs/module_guides/evaluating/usage_pattern.md +++ b/docs/module_guides/evaluating/usage_pattern.md @@ -159,7 +159,9 @@ response = query_engine.query(query) response_str = response.response for source_node in response.source_nodes: eval_result = evaluator.evaluate( - query=query, response=response_str, contexts=[source_node.get_content()] + query=query, + response=response_str, + contexts=[source_node.get_content()], ) print(str(eval_result.passing)) ``` diff --git a/docs/module_guides/evaluating/usage_pattern_retrieval.md b/docs/module_guides/evaluating/usage_pattern_retrieval.md index 6600f6d4892e58d62efe3bc0fab88958a490ae6a..3696d05d50985855f353589f5eb91cb7acc7dcdb 100644 --- a/docs/module_guides/evaluating/usage_pattern_retrieval.md +++ b/docs/module_guides/evaluating/usage_pattern_retrieval.md @@ -17,7 +17,9 @@ retriever_evaluator = RetrieverEvaluator.from_metric_names( ["mrr", "hit_rate"], retriever=retriever ) -retriever_evaluator.evaluate(query="query", expected_ids=["node_id1", "node_id2"]) +retriever_evaluator.evaluate( + query="query", expected_ids=["node_id1", "node_id2"] +) ``` ## Building an Evaluation Dataset @@ -27,7 +29,9 @@ You can manually curate a retrieval evaluation dataset of questions + node id's. ```python from llama_index.evaluation import generate_question_context_pairs -qa_dataset = generate_question_context_pairs(nodes, llm=llm, num_questions_per_chunk=2) +qa_dataset = generate_question_context_pairs( + nodes, llm=llm, num_questions_per_chunk=2 +) ``` The returned result is a `EmbeddingQAFinetuneDataset` object (containing `queries`, `relevant_docs`, and `corpus`). diff --git a/docs/module_guides/indexing/composability.md b/docs/module_guides/indexing/composability.md index 3f2afe255c11c4169ddc4139e2277b676a2d9077..c5e43b290bf87ed770b4140aecf1b73a25d139c5 100644 --- a/docs/module_guides/indexing/composability.md +++ b/docs/module_guides/indexing/composability.md @@ -49,7 +49,9 @@ You may choose to manually specify the summary text, or use LlamaIndex itself to a summary, for instance with the following: ```python -summary = index1.query("What is a summary of this document?", retriever_mode="all_leaf") +summary = index1.query( + "What is a summary of this document?", retriever_mode="all_leaf" +) index1_summary = str(summary) ``` diff --git a/docs/module_guides/indexing/document_management.md b/docs/module_guides/indexing/document_management.md index 0abfc8898a86ea76c896439d558f9978e93e6696..acbf217122aefb2436a66d70b98341f0183b7d43 100644 --- a/docs/module_guides/indexing/document_management.md +++ b/docs/module_guides/indexing/document_management.md @@ -48,7 +48,8 @@ If a Document is already present within an index, you can "update" a Document wi # NOTE: the document has a `doc_id` specified doc_chunks[0].text = "Brand new document text" index.update_ref_doc( - doc_chunks[0], update_kwargs={"delete_kwargs": {"delete_from_docstore": True}} + doc_chunks[0], + update_kwargs={"delete_kwargs": {"delete_from_docstore": True}}, ) ``` @@ -68,7 +69,10 @@ doc_chunks[0] = Document(text="Super new document text", id_="doc_id_0") # add a new document doc_chunks.append( - Document(text="This isn't in the index yet, but it will be soon!", id_="doc_id_3") + Document( + text="This isn't in the index yet, but it will be soon!", + id_="doc_id_3", + ) ) # refresh the index diff --git a/docs/module_guides/loading/documents_and_nodes/usage_documents.md b/docs/module_guides/loading/documents_and_nodes/usage_documents.md index 41195db50412b547fa966ae2922fe0c496b5663c..28ac753489c923b7e7ec59e9866413569b3dad83 100644 --- a/docs/module_guides/loading/documents_and_nodes/usage_documents.md +++ b/docs/module_guides/loading/documents_and_nodes/usage_documents.md @@ -43,7 +43,8 @@ There are a few ways to set up this dictionary: ```python document = Document( - text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"} + text="text", + metadata={"filename": "<doc_file_name>", "category": "<category>"}, ) ``` @@ -61,7 +62,9 @@ from llama_index import SimpleDirectoryReader filename_fn = lambda filename: {"file_name": filename} # automatically sets the metadata of each document according to filename_fn -documents = SimpleDirectoryReader("./data", file_metadata=filename_fn).load_data() +documents = SimpleDirectoryReader( + "./data", file_metadata=filename_fn +).load_data() ``` ### Customizing the id @@ -158,7 +161,10 @@ document = Document( text_template="Metadata: {metadata_str}\n-----\nContent: {content}", ) -print("The LLM sees this: \n", document.get_content(metadata_mode=MetadataMode.LLM)) +print( + "The LLM sees this: \n", + document.get_content(metadata_mode=MetadataMode.LLM), +) print( "The Embedding model sees this: \n", document.get_content(metadata_mode=MetadataMode.EMBED), diff --git a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md index 9bb6cb7c85ede8b0a1e8bb984ea44b5d3973c024..17372550df69432ad36d3e70f9949abab59458f2 100644 --- a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md +++ b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md @@ -19,7 +19,9 @@ from llama_index.node_parser.extractors import ( ) from llama_index.text_splitter import TokenTextSplitter -text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128) +text_splitter = TokenTextSplitter( + separator=" ", chunk_size=512, chunk_overlap=128 +) metadata_extractor = MetadataExtractor( extractors=[ TitleExtractor(nodes=5), diff --git a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md index 6bebdce04ed147a37cd7008d8425cdcc48a37185..643267b291bc87276e985d84ae51261342730573 100644 --- a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md +++ b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md @@ -23,8 +23,12 @@ from llama_index.schema import TextNode, NodeRelationship, RelatedNodeInfo node1 = TextNode(text="<text_chunk>", id_="<node_id>") node2 = TextNode(text="<text_chunk>", id_="<node_id>") # set relationships -node1.relationships[NodeRelationship.NEXT] = RelatedNodeInfo(node_id=node2.node_id) -node2.relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(node_id=node1.node_id) +node1.relationships[NodeRelationship.NEXT] = RelatedNodeInfo( + node_id=node2.node_id +) +node2.relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo( + node_id=node1.node_id +) nodes = [node1, node2] ``` diff --git a/docs/module_guides/loading/node_parsers/root.md b/docs/module_guides/loading/node_parsers/root.md index 6a04db518c629d0fdce8190cd70c3f240d5a460c..946db9a2b498c99d0909eb9eb3bded6e546f2edb 100644 --- a/docs/module_guides/loading/node_parsers/root.md +++ b/docs/module_guides/loading/node_parsers/root.md @@ -42,7 +42,9 @@ documents = SimpleDirectoryReader("./data").load_data() node_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=20) service_context = ServiceContext.from_defaults(node_parser=node_parser) -index = VectorStoreIndex.from_documents(documents, service_context=service_context) +index = VectorStoreIndex.from_documents( + documents, service_context=service_context +) ``` ## Customization diff --git a/docs/module_guides/models/embeddings.md b/docs/module_guides/models/embeddings.md index dcafee735f67ffb93973817bb08642e3ff11e905..63af416cf030d06e16724bad91640c9e496ae6f9 100644 --- a/docs/module_guides/models/embeddings.md +++ b/docs/module_guides/models/embeddings.md @@ -91,7 +91,9 @@ To configure the model used (from Hugging Face hub), add the model name separate ```python from llama_index import ServiceContext -service_context = ServiceContext.from_defaults(embed_model="local:BAAI/bge-large-en") +service_context = ServiceContext.from_defaults( + embed_model="local:BAAI/bge-large-en" +) ``` ### HuggingFace Optimum ONNX Embeddings @@ -109,7 +111,9 @@ Creation with specifying the model and output path: ```python from llama_index.embeddings import OptimumEmbedding -OptimumEmbedding.create_and_save_optimum_model("BAAI/bge-small-en-v1.5", "./bge_onnx") +OptimumEmbedding.create_and_save_optimum_model( + "BAAI/bge-small-en-v1.5", "./bge_onnx" +) ``` And then usage: @@ -179,7 +183,9 @@ class InstructorEmbeddings(BaseEmbedding): You can also use embeddings as a standalone module for your project, existing application, or general testing and exploration. ```python -embeddings = embed_model.get_text_embedding("It is raining cats and dogs here!") +embeddings = embed_model.get_text_embedding( + "It is raining cats and dogs here!" +) ``` ## Modules diff --git a/docs/module_guides/models/llms/usage_custom.md b/docs/module_guides/models/llms/usage_custom.md index 50437883201029f08a1bb34f0dcfe164e94b3339..5bc619cb2aab53d2bdcbbc2813a0311462aa17f3 100644 --- a/docs/module_guides/models/llms/usage_custom.md +++ b/docs/module_guides/models/llms/usage_custom.md @@ -39,11 +39,15 @@ llm = OpenAI(temperature=0.1, model="gpt-4") service_context = ServiceContext.from_defaults(llm=llm) # build index -index = KeywordTableIndex.from_documents(documents, service_context=service_context) +index = KeywordTableIndex.from_documents( + documents, service_context=service_context +) # get response from query query_engine = index.as_query_engine() -response = query_engine.query("What did the author do after his time at Y Combinator?") +response = query_engine.query( + "What did the author do after his time at Y Combinator?" +) ``` ## Example: Changing the number of output tokens (for OpenAI, Cohere, AI21) @@ -55,7 +59,11 @@ For OpenAI, Cohere, AI21, you just need to set the `max_tokens` parameter (or maxTokens for AI21). We will handle text chunking/calculations under the hood. ```python -from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext +from llama_index import ( + KeywordTableIndex, + SimpleDirectoryReader, + ServiceContext, +) from llama_index.llms import OpenAI documents = SimpleDirectoryReader("data").load_data() @@ -70,7 +78,11 @@ service_context = ServiceContext.from_defaults(llm=llm) If you are using other LLM classes from langchain, you may need to explicitly configure the `context_window` and `num_output` via the `ServiceContext` since the information is not available by default. ```python -from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext +from llama_index import ( + KeywordTableIndex, + SimpleDirectoryReader, + ServiceContext, +) from llama_index.llms import OpenAI # alternatively @@ -203,20 +215,26 @@ class OurLLM(CustomLLM): def metadata(self) -> LLMMetadata: """Get LLM metadata.""" return LLMMetadata( - context_window=context_window, num_output=num_output, model_name=model_name + context_window=context_window, + num_output=num_output, + model_name=model_name, ) @llm_completion_callback() def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: prompt_length = len(prompt) - response = pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"] + response = pipeline(prompt, max_new_tokens=num_output)[0][ + "generated_text" + ] # only return newly generated tokens text = response[prompt_length:] return CompletionResponse(text=text) @llm_completion_callback() - def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: + def stream_complete( + self, prompt: str, **kwargs: Any + ) -> CompletionResponseGen: raise NotImplementedError() diff --git a/docs/module_guides/models/llms/usage_standalone.md b/docs/module_guides/models/llms/usage_standalone.md index 9f07b3348aced3e8c6365de935e41ae0c1228edd..2aed9aed88a25f10f3d9a016c083a9b97886bded 100644 --- a/docs/module_guides/models/llms/usage_standalone.md +++ b/docs/module_guides/models/llms/usage_standalone.md @@ -26,7 +26,9 @@ for delta in resp: from llama_index.llms import ChatMessage, OpenAI messages = [ - ChatMessage(role="system", content="You are a pirate with a colorful personality"), + ChatMessage( + role="system", content="You are a pirate with a colorful personality" + ), ChatMessage(role="user", content="What is your name"), ] resp = OpenAI().chat(messages) diff --git a/docs/module_guides/models/prompts/usage_pattern.md b/docs/module_guides/models/prompts/usage_pattern.md index 56f85e3762d9acb67f93e97253ea15c04c1fdfbc..f532e03d6ed36ed617d2df629af36455742ee0d0 100644 --- a/docs/module_guides/models/prompts/usage_pattern.md +++ b/docs/module_guides/models/prompts/usage_pattern.md @@ -102,7 +102,9 @@ qa_prompt_tmpl_str = ( ) qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str) -query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl}) +query_engine.update_prompts( + {"response_synthesizer:text_qa_template": qa_prompt_tmpl} +) ``` #### Modify prompts used in query engine diff --git a/docs/module_guides/observability/callbacks/token_counting_migration.md b/docs/module_guides/observability/callbacks/token_counting_migration.md index d2b03abb02821185298bd4cec3a2d67ff4d96f5a..de7df75239d79db6737ddf7206a5f487692b73ce 100644 --- a/docs/module_guides/observability/callbacks/token_counting_migration.md +++ b/docs/module_guides/observability/callbacks/token_counting_migration.md @@ -25,12 +25,16 @@ token_counter = TokenCountingHandler( callback_manager = CallbackManager([token_counter]) -service_context = ServiceContext.from_defaults(callback_manager=callback_manager) +service_context = ServiceContext.from_defaults( + callback_manager=callback_manager +) document = SimpleDirectoryReader("./data").load_data() # if verbose is turned on, you will see embedding token usage printed -index = VectorStoreIndex.from_documents(documents, service_context=service_context) +index = VectorStoreIndex.from_documents( + documents, service_context=service_context +) # otherwise, you can access the count directly print(token_counter.total_embedding_token_count) diff --git a/docs/module_guides/querying/node_postprocessors/root.md b/docs/module_guides/querying/node_postprocessors/root.md index c0c40b79ee9016bc06314db1af727a805951ce49..84bc86b596413d42d2689802e1f9740b595a2722 100644 --- a/docs/module_guides/querying/node_postprocessors/root.md +++ b/docs/module_guides/querying/node_postprocessors/root.md @@ -46,7 +46,9 @@ index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine( node_postprocessors=[ - TimeWeightedPostprocessor(time_decay=0.5, time_access_refresh=False, top_k=1) + TimeWeightedPostprocessor( + time_decay=0.5, time_access_refresh=False, top_k=1 + ) ] ) diff --git a/docs/module_guides/querying/output_parser.md b/docs/module_guides/querying/output_parser.md index 5cc242a6a070a47a767e17ce001f2bb394429015..60fc812f637f77c3d7d8e5d1389373049b0fd2ff 100644 --- a/docs/module_guides/querying/output_parser.md +++ b/docs/module_guides/querying/output_parser.md @@ -115,12 +115,15 @@ response_schemas = [ description="Describes the author's educational experience/background.", ), ResponseSchema( - name="Work", description="Describes the author's work experience/background." + name="Work", + description="Describes the author's work experience/background.", ), ] # define output parser -lc_output_parser = StructuredOutputParser.from_response_schemas(response_schemas) +lc_output_parser = StructuredOutputParser.from_response_schemas( + response_schemas +) output_parser = LangchainOutputParser(lc_output_parser) # format each prompt with output parser instructions diff --git a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md index 66275a3d94bd489d038bf1999b8869472182d370..b75cdb3f090666c5934742abe66f2f47a3528fb0 100644 --- a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md +++ b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md @@ -32,14 +32,20 @@ response_synthesizer = get_response_synthesizer( response = response_synthesizer.synthesize( "query string", nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...], - additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...], + additional_source_nodes=[ + NodeWithScore(node=Node(text="text"), score=1.0), + ..., + ], ) # asynchronous response = await response_synthesizer.asynthesize( "query string", nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...], - additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...], + additional_source_nodes=[ + NodeWithScore(node=Node(text="text"), score=1.0), + ..., + ], ) ``` diff --git a/docs/module_guides/querying/response_synthesizers/root.md b/docs/module_guides/querying/response_synthesizers/root.md index d777736320c94d58d529499a7fd3475166c5b76f..8ac2b388dfeec9c599e90618cad9eb1bc889a6b3 100644 --- a/docs/module_guides/querying/response_synthesizers/root.md +++ b/docs/module_guides/querying/response_synthesizers/root.md @@ -18,11 +18,18 @@ Use a response synthesizer on it's own: ```python from llama_index.schema import Node -from llama_index.response_synthesizers import ResponseMode, get_response_synthesizer +from llama_index.response_synthesizers import ( + ResponseMode, + get_response_synthesizer, +) -response_synthesizer = get_response_synthesizer(response_mode=ResponseMode.COMPACT) +response_synthesizer = get_response_synthesizer( + response_mode=ResponseMode.COMPACT +) -response = response_synthesizer.synthesize("query text", nodes=[Node(text="text"), ...]) +response = response_synthesizer.synthesize( + "query text", nodes=[Node(text="text"), ...] +) ``` Or in a query engine after you've created an index: @@ -136,7 +143,9 @@ class BaseSynthesizer(ABC): streaming: bool = False, ) -> None: """Init params.""" - self._service_context = service_context or ServiceContext.from_defaults() + self._service_context = ( + service_context or ServiceContext.from_defaults() + ) self._callback_manager = self._service_context.callback_manager self._streaming = streaming diff --git a/docs/module_guides/querying/router/root.md b/docs/module_guides/querying/router/root.md index 689b6ccb5df70fc9f03b4fa5b75f6c64aacd48f9..b06737d7ea5c57592b4d8bfa6291cd42765e213a 100644 --- a/docs/module_guides/querying/router/root.md +++ b/docs/module_guides/querying/router/root.md @@ -63,7 +63,10 @@ We also highlight using our router as a standalone module. Some examples are given below with LLM and Pydantic based single/multi selectors: ```python -from llama_index.selectors.llm_selectors import LLMSingleSelector, LLMMultiSelector +from llama_index.selectors.llm_selectors import ( + LLMSingleSelector, + LLMMultiSelector, +) from llama_index.selectors.pydantic_selectors import ( PydanticMultiSelector, PydanticSingleSelector, @@ -88,7 +91,10 @@ A `RouterQueryEngine` is composed on top of other query engines as tools. ```python from llama_index.query_engine.router_query_engine import RouterQueryEngine -from llama_index.selectors.pydantic_selectors import PydanticSingleSelector, Pydantic +from llama_index.selectors.pydantic_selectors import ( + PydanticSingleSelector, + Pydantic, +) from llama_index.tools.query_engine import QueryEngineTool from llama_index import ( VectorStoreIndex, @@ -171,7 +177,10 @@ choices = [ ] # choices as a list of strings -choices = ["choice 1 - description for choice 1", "choice 2: description for choice 2"] +choices = [ + "choice 1 - description for choice 1", + "choice 2: description for choice 2", +] selector = LLMSingleSelector.from_defaults() selector_result = selector.select( diff --git a/docs/module_guides/storing/customization.md b/docs/module_guides/storing/customization.md index 3ff9c7a8d818795e742223863cb1466145a3872d..ac6e1ad677ffc547cc43cf3c5d73889fd5adcbb1 100644 --- a/docs/module_guides/storing/customization.md +++ b/docs/module_guides/storing/customization.md @@ -111,7 +111,9 @@ from llama_index.vector_stores import PineconeVectorStore # Creating a Pinecone index api_key = "api_key" pinecone.init(api_key=api_key, environment="us-west1-gcp") -pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1") +pinecone.create_index( + "quickstart", dimension=1536, metric="euclidean", pod_type="p1" +) index = pinecone.Index("quickstart") # construct vector store @@ -124,7 +126,9 @@ storage_context = StorageContext.from_defaults(vector_store=vector_store) documents = SimpleDirectoryReader("./data").load_data() # create index, which will insert documents/vectors to pinecone -index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) +index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context +) ``` If you have an existing vector store with data already loaded in, diff --git a/docs/module_guides/storing/save_load.md b/docs/module_guides/storing/save_load.md index da5c6f23b2300289645b1e6c679bd55222049cbd..0868cc7f480e42f947ae16f54333ed17845ffdef 100644 --- a/docs/module_guides/storing/save_load.md +++ b/docs/module_guides/storing/save_load.md @@ -22,7 +22,9 @@ To load data, user simply needs to re-create the storage context using the same ```python storage_context = StorageContext.from_defaults( docstore=SimpleDocumentStore.from_persist_dir(persist_dir="<persist_dir>"), - vector_store=SimpleVectorStore.from_persist_dir(persist_dir="<persist_dir>"), + vector_store=SimpleVectorStore.from_persist_dir( + persist_dir="<persist_dir>" + ), index_store=SimpleIndexStore.from_persist_dir(persist_dir="<persist_dir>"), ) ``` diff --git a/docs/module_guides/storing/storing.md b/docs/module_guides/storing/storing.md index 32275d18d7fbf65a873607eb161f2d36b2b3d7e3..df43986e703a2bf84f5d0605898221012b11a9ed 100644 --- a/docs/module_guides/storing/storing.md +++ b/docs/module_guides/storing/storing.md @@ -35,7 +35,9 @@ from llama_index.vector_stores import DeepLakeVectorStore vector_store = DeepLakeVectorStore(dataset_path="<dataset_path>") storage_context = StorageContext.from_defaults(vector_store=vector_store) # Load documents and build index -index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) +index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context +) ## reload an existing one diff --git a/docs/module_guides/supporting_modules/service_context.md b/docs/module_guides/supporting_modules/service_context.md index 6017f52ff6cdbbca16b4670229c906ccde37b493..13c788abb62c98cb7e38aef5ffef31c77b545c29 100644 --- a/docs/module_guides/supporting_modules/service_context.md +++ b/docs/module_guides/supporting_modules/service_context.md @@ -67,7 +67,12 @@ for convenience (so you don't have to manually construct them). Here's a complete example that sets up all objects using their default settings: ```python -from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper +from llama_index import ( + ServiceContext, + LLMPredictor, + OpenAIEmbedding, + PromptHelper, +) from llama_index.llms import OpenAI from llama_index.text_splitter import TokenTextSplitter from llama_index.node_parser import SimpleNodeParser @@ -78,7 +83,10 @@ node_parser = SimpleNodeParser.from_defaults( text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20) ) prompt_helper = PromptHelper( - context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None + context_window=4096, + num_output=256, + chunk_overlap_ratio=0.1, + chunk_size_limit=None, ) service_context = ServiceContext.from_defaults( diff --git a/docs/optimizing/advanced_retrieval/query_transformations.md b/docs/optimizing/advanced_retrieval/query_transformations.md index 68e6512d4ef0671d0f9771af662fe22680b2d1fc..c559fe2fad55e1c67af3436c1e1b97e11c1ffc9f 100644 --- a/docs/optimizing/advanced_retrieval/query_transformations.md +++ b/docs/optimizing/advanced_retrieval/query_transformations.md @@ -28,7 +28,9 @@ To use HyDE, an example code snippet is shown below. ```python from llama_index import VectorStoreIndex, SimpleDirectoryReader from llama_index.indices.query.query_transform.base import HyDEQueryTransform -from llama_index.query_engine.transform_query_engine import TransformQueryEngine +from llama_index.query_engine.transform_query_engine import ( + TransformQueryEngine, +) # load documents, build index documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data() @@ -65,9 +67,13 @@ Here's a corresponding example code snippet over a composed graph. ```python # Setting: a summary index composed over multiple vector indices # llm_predictor_chatgpt corresponds to the ChatGPT LLM interface -from llama_index.indices.query.query_transform.base import DecomposeQueryTransform +from llama_index.indices.query.query_transform.base import ( + DecomposeQueryTransform, +) -decompose_transform = DecomposeQueryTransform(llm_predictor_chatgpt, verbose=True) +decompose_transform = DecomposeQueryTransform( + llm_predictor_chatgpt, verbose=True +) # initialize indexes and graph ... @@ -83,7 +89,9 @@ vector_query_engine = TransformQueryEngine( custom_query_engines = {vector_index.index_id: vector_query_engine} # query -query_str = "Compare and contrast the airports in Seattle, Houston, and Toronto. " +query_str = ( + "Compare and contrast the airports in Seattle, Houston, and Toronto. " +) query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines) response = query_engine.query(query_str) ``` @@ -104,10 +112,14 @@ An example image is shown below. Here's a corresponding example code snippet. ```python -from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform +from llama_index.indices.query.query_transform.base import ( + StepDecomposeQueryTransform, +) # gpt-4 -step_decompose_transform = StepDecomposeQueryTransform(llm_predictor, verbose=True) +step_decompose_transform = StepDecomposeQueryTransform( + llm_predictor, verbose=True +) query_engine = index.as_query_engine() query_engine = MultiStepQueryEngine( diff --git a/docs/optimizing/basic_strategies/basic_strategies.md b/docs/optimizing/basic_strategies/basic_strategies.md index c73b23963c84fd4275059ad6ae0c9e335790ebb1..0ebeb3b049daeb8ce7847f6036ccb31fac2886d8 100644 --- a/docs/optimizing/basic_strategies/basic_strategies.md +++ b/docs/optimizing/basic_strategies/basic_strategies.md @@ -119,7 +119,9 @@ documents = [ Document(text="text", metadata={"author": "John Doe"}), ] -filters = MetadataFilters(filters=[ExactMatchFilter(key="author", value="John Doe")]) +filters = MetadataFilters( + filters=[ExactMatchFilter(key="author", value="John Doe")] +) index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine(filters=filters) diff --git a/docs/understanding/evaluating/cost_analysis/usage_pattern.md b/docs/understanding/evaluating/cost_analysis/usage_pattern.md index 4abc5d09b8fe909f8d598241dacb07ff1f31df5e..9d77ad34f46771aadb78688605fabb039462609b 100644 --- a/docs/understanding/evaluating/cost_analysis/usage_pattern.md +++ b/docs/understanding/evaluating/cost_analysis/usage_pattern.md @@ -44,7 +44,9 @@ set_global_service_context( ```python from llama_index import VectorStoreIndex, SimpleDirectoryReader -documents = SimpleDirectoryReader("./docs/examples/data/paul_graham").load_data() +documents = SimpleDirectoryReader( + "./docs/examples/data/paul_graham" +).load_data() index = VectorStoreIndex.from_documents(documents) ``` diff --git a/docs/understanding/loading/loading.md b/docs/understanding/loading/loading.md index 4d9ce537b7e998b5c6c77c261bb4e6bf6f107d37..5086c03345d12e16cca33a7f264bd22dc436437c 100644 --- a/docs/understanding/loading/loading.md +++ b/docs/understanding/loading/loading.md @@ -59,7 +59,9 @@ documents = SimpleDirectoryReader("./data").load_data() node_parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=10) service_context = ServiceContext.from_defaults(node_parser=node_parser) -index = VectorStoreIndex.from_documents(documents, service_context=service_context) +index = VectorStoreIndex.from_documents( + documents, service_context=service_context +) ``` ```{tip} @@ -87,7 +89,8 @@ When creating documents, you can also attach useful metadata that can be used at ```python document = Document( - text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"} + text="text", + metadata={"filename": "<doc_file_name>", "category": "<category>"}, ) ``` diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md index f5a899b14943298a04194f2ac82d5160a17f0c04..a6bfa25154fa5ab326a56e4524a280488e64f9c0 100644 --- a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md +++ b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md @@ -83,7 +83,10 @@ def query_index(): global index query_text = request.args.get("text", None) if query_text is None: - return "No text found, please include a ?text=blah parameter in the URL", 400 + return ( + "No text found, please include a ?text=blah parameter in the URL", + 400, + ) query_engine = index.as_query_engine() response = query_engine.query(query_text) return str(response), 200 @@ -177,7 +180,10 @@ def query_index(): global index query_text = request.args.get("text", None) if query_text is None: - return "No text found, please include a ?text=blah parameter in the URL", 400 + return ( + "No text found, please include a ?text=blah parameter in the URL", + 400, + ) response = manager.query_index(query_text)._getvalue() return str(response), 200 diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md index 2b8f1c6ad51a37a4d453454b9195b91898e1dacd..1ddea915affb43da414829cd07e35f9e5bfed5a6 100644 --- a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md +++ b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md @@ -157,7 +157,9 @@ async def create_collection( response=CollectionQueryOutput, summary="Ask a question of a document collection", ) -def query_collection_view(request: HttpRequest, query_input: CollectionQueryInput): +def query_collection_view( + request: HttpRequest, query_input: CollectionQueryInput +): collection_id = query_input.collection_id query_str = query_input.query_str response = query_collection(collection_id, query_str) @@ -307,7 +309,9 @@ async def receive(self, text_data): markdown_response = f"## Response\n\n{response}\n\n" if response.source_nodes: - markdown_sources = f"## Sources\n\n{response.get_formatted_sources()}" + markdown_sources = ( + f"## Sources\n\n{response.get_formatted_sources()}" + ) else: markdown_sources = "" @@ -316,7 +320,9 @@ async def receive(self, text_data): await self.send(json.dumps({"response": formatted_response}, indent=4)) else: await self.send( - json.dumps({"error": "No index loaded for this connection."}, indent=4) + json.dumps( + {"error": "No index loaded for this connection."}, indent=4 + ) ) ``` @@ -358,7 +364,9 @@ async def load_collection_model(collection_id: str | int) -> VectorStoreIndex: if not cache_file_path.exists(): cache_dir.mkdir(parents=True, exist_ok=True) with collection.model.open("rb") as model_file: - with cache_file_path.open("w+", encoding="utf-8") as cache_file: + with cache_file_path.open( + "w+", encoding="utf-8" + ) as cache_file: cache_file.write(model_file.read().decode("utf-8")) # define LLM diff --git a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md index 3e1ac4c2da11b7e12f970009b07a88081a5506be..1c7f58118773aeffb64bd9e34593faceb0a49ed4 100644 --- a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md +++ b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md @@ -98,7 +98,9 @@ from llama_index import load_index_from_storage index_set = {} for year in years: - storage_context = StorageContext.from_defaults(persist_dir=f"./storage/{year}") + storage_context = StorageContext.from_defaults( + persist_dir=f"./storage/{year}" + ) cur_index = load_index_from_storage( storage_context, service_context=service_context ) @@ -189,7 +191,9 @@ If we test it with a query regarding the 10-k of a given year, the agent will us the relevant vector index Tool. ```python -response = agent.chat("What were some of the biggest risk factors in 2020 for Uber?") +response = agent.chat( + "What were some of the biggest risk factors in 2020 for Uber?" +) print(str(response)) ``` diff --git a/docs/understanding/putting_it_all_together/q_and_a.md b/docs/understanding/putting_it_all_together/q_and_a.md index 8413aefb6a64f34d403f8c3a322011e629014ce6..ac6bee7ffcaa872e493c4acc2f48e3239030a00b 100644 --- a/docs/understanding/putting_it_all_together/q_and_a.md +++ b/docs/understanding/putting_it_all_together/q_and_a.md @@ -116,9 +116,13 @@ By default, this uses a `LLMSingleSelector` as the router, which uses the LLM to ```python from llama_index.query_engine import RouterQueryEngine -query_engine = RouterQueryEngine.from_defaults(query_engine_tools=[tool1, tool2]) +query_engine = RouterQueryEngine.from_defaults( + query_engine_tools=[tool1, tool2] +) -response = query_engine.query("In Notion, give me a summary of the product roadmap.") +response = query_engine.query( + "In Notion, give me a summary of the product roadmap." +) ``` **Guides** @@ -131,7 +135,9 @@ response = query_engine.query("In Notion, give me a summary of the product roadm You can explicitly perform compare/contrast queries with a **query transformation** module within a ComposableGraph. ```python -from llama_index.indices.query.query_transform.base import DecomposeQueryTransform +from llama_index.indices.query.query_transform.base import ( + DecomposeQueryTransform, +) decompose_transform = DecomposeQueryTransform( service_context.llm_predictor, verbose=True diff --git a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md index a5e7570009502659b3068e56db4627a8d6dfb42a..36f4789d3fd7947620e54615dc17d030ec23962e 100644 --- a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md +++ b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md @@ -60,7 +60,8 @@ with setup_tab: "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1 ) term_extract_str = st.text_area( - "The query to extract terms and definitions with.", value=DEFAULT_TERM_STR + "The query to extract terms and definitions with.", + value=DEFAULT_TERM_STR, ) with upload_tab: @@ -95,15 +96,21 @@ from llama_index.llms import OpenAI def get_llm(llm_name, model_temperature, api_key, max_tokens=256): os.environ["OPENAI_API_KEY"] = api_key - return OpenAI(temperature=model_temperature, model=llm_name, max_tokens=max_tokens) + return OpenAI( + temperature=model_temperature, model=llm_name, max_tokens=max_tokens + ) -def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_key): +def extract_terms( + documents, term_extract_str, llm_name, model_temperature, api_key +): llm = get_llm(llm_name, model_temperature, api_key, max_tokens=1024) service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1024) - temp_index = SummaryIndex.from_documents(documents, service_context=service_context) + temp_index = SummaryIndex.from_documents( + documents, service_context=service_context + ) query_engine = temp_index.as_query_engine(response_mode="tree_summarize") terms_definitions = str(query_engine.query(term_extract_str)) terms_definitions = [ @@ -471,7 +478,8 @@ with upload_tab: "Either upload an image/screenshot of a document, or enter the text manually." ) uploaded_file = st.file_uploader( - "Upload an image/screenshot of a document:", type=["png", "jpg", "jpeg"] + "Upload an image/screenshot of a document:", + type=["png", "jpg", "jpeg"], ) document_text = st.text_area("Or enter raw text") if st.button("Extract Terms and Definitions") and ( diff --git a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md index c1cd7896f0609c496a793bde8bdd224a6c8f3d3b..8de57a374640ad66a0d2997bc15b89f18c2a469b 100644 --- a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md +++ b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md @@ -135,7 +135,9 @@ graph = ComposableGraph.from_indices( ) # get root index -root_index = graph.get_index(graph.index_struct.root_id, SimpleKeywordTableIndex) +root_index = graph.get_index( + graph.index_struct.root_id, SimpleKeywordTableIndex +) # set id of root index root_index.set_index_id("compare_contrast") root_summary = ( @@ -150,12 +152,18 @@ An example is shown below. ```python # define decompose_transform from llama_index import LLMPredictor -from llama_index.indices.query.query_transform.base import DecomposeQueryTransform +from llama_index.indices.query.query_transform.base import ( + DecomposeQueryTransform, +) -decompose_transform = DecomposeQueryTransform(LLMPredictor(llm=llm_gpt4), verbose=True) +decompose_transform = DecomposeQueryTransform( + LLMPredictor(llm=llm_gpt4), verbose=True +) # define custom query engines -from llama_index.query_engine.transform_query_engine import TransformQueryEngine +from llama_index.query_engine.transform_query_engine import ( + TransformQueryEngine, +) custom_query_engines = {} for index in vector_indices.values(): @@ -209,7 +217,9 @@ for wiki_title in wiki_titles: summary = index_summaries[wiki_title] query_engine = index.as_query_engine(service_context=service_context) - vector_tool = QueryEngineTool.from_defaults(query_engine, description=summary) + vector_tool = QueryEngineTool.from_defaults( + query_engine, description=summary + ) query_engine_tools.append(vector_tool) diff --git a/docs/understanding/putting_it_all_together/structured_data.md b/docs/understanding/putting_it_all_together/structured_data.md index 0e38a4b78ca0cfb70e2ba6e59963533c1797290e..c82f97cb39f75319c53fd37dc9ff1274448c6b48 100644 --- a/docs/understanding/putting_it_all_together/structured_data.md +++ b/docs/understanding/putting_it_all_together/structured_data.md @@ -114,7 +114,11 @@ SQLDatabase and produces a Node object for each SQLTableSchema object passed into the ObjectIndex constructor. ```python -from llama_index.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema +from llama_index.objects import ( + SQLTableNodeMapping, + ObjectIndex, + SQLTableSchema, +) table_node_mapping = SQLTableNodeMapping(sql_database) table_schema_objs = [ diff --git a/docs/understanding/querying/querying.md b/docs/understanding/querying/querying.md index 70ff0a2605fb5c28535bf293f24780dd7f14c6ac..4790578915e3ecbc8bbca9ef5441e4af94934edd 100644 --- a/docs/understanding/querying/querying.md +++ b/docs/understanding/querying/querying.md @@ -118,7 +118,9 @@ After a retriever fetches relevant nodes, a `BaseSynthesizer` synthesizes the fi You can configure it via ```python -query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=response_mode) +query_engine = RetrieverQueryEngine.from_args( + retriever, response_mode=response_mode +) ``` Right now, we support the following options: diff --git a/docs/understanding/storing/storing.md b/docs/understanding/storing/storing.md index 69b870742c2f3e32204bb3cdded3bd30237e387c..e344c7929b5fc7a97887ae0782abaf24942033f1 100644 --- a/docs/understanding/storing/storing.md +++ b/docs/understanding/storing/storing.md @@ -75,7 +75,9 @@ vector_store = ChromaVectorStore(chroma_collection=chroma_collection) storage_context = StorageContext.from_defaults(vector_store=vector_store) # create your index -index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) +index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context +) # create a query engine and query query_engine = index.as_query_engine() diff --git a/docs/understanding/using_llms/using_llms.md b/docs/understanding/using_llms/using_llms.md index a6d4343f631c1a300dc115ab9a550fb6ca34f5a9..2b89039848e148233ce6276c39516a159c1dd1d9 100644 --- a/docs/understanding/using_llms/using_llms.md +++ b/docs/understanding/using_llms/using_llms.md @@ -32,7 +32,9 @@ llm = OpenAI(temperature=0.1, model="gpt-4") service_context = ServiceContext.from_defaults(llm=llm) documents = SimpleDirectoryReader("data").load_data() -index = VectorStoreIndex.from_documents(documents, service_context=service_context) +index = VectorStoreIndex.from_documents( + documents, service_context=service_context +) ``` In this case, you've instantiated OpenAI and customized it to use the `gpt-4` model instead of the default `gpt-3.5-turbo`, and also modified the `temperature`. The `VectorStoreIndex` will now use gpt-4 to encode or `embed` your documents for later querying.