From f847235d4a1f8fac694ef73e94a16bdd832bf046 Mon Sep 17 00:00:00 2001 From: Logan <logan.markewich@live.com> Date: Tue, 22 Aug 2023 20:05:04 -0600 Subject: [PATCH] update simple node parser usage (#7370) --- docs/core_modules/data_modules/documents_and_nodes/root.md | 2 +- .../documents_and_nodes/usage_metadata_extractor.md | 2 +- .../data_modules/documents_and_nodes/usage_nodes.md | 2 +- docs/core_modules/data_modules/index/metadata_extraction.md | 2 +- .../core_modules/data_modules/node_parsers/usage_pattern.md | 6 +++--- docs/core_modules/data_modules/storage/customization.md | 2 +- docs/core_modules/data_modules/storage/docstores.md | 6 +++--- docs/core_modules/supporting_modules/service_context.md | 2 +- docs/examples/agent/openai_agent_query_cookbook.ipynb | 2 +- docs/examples/callbacks/OpenInferenceCallback.ipynb | 2 +- docs/examples/docstore/DocstoreDemo.ipynb | 2 +- docs/examples/docstore/FirestoreDemo.ipynb | 2 +- docs/examples/docstore/MongoDocstoreDemo.ipynb | 2 +- docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb | 2 +- .../index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb | 2 +- .../index_structs/knowledge_graph/KuzuGraphDemo.ipynb | 2 +- .../knowledge_graph/NebulaGraphKGIndexDemo.ipynb | 2 +- .../index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb | 2 +- .../metadata_extraction/MetadataExtractionSEC.ipynb | 2 +- docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb | 2 +- docs/examples/query_engine/SQLJoinQueryEngine.ipynb | 2 +- examples/paul_graham_essay/SentenceSplittingDemo.ipynb | 2 +- tests/node_parser/metadata_extractor.py | 2 +- 23 files changed, 27 insertions(+), 27 deletions(-) diff --git a/docs/core_modules/data_modules/documents_and_nodes/root.md b/docs/core_modules/data_modules/documents_and_nodes/root.md index d88e7c941f..2e23d3959e 100644 --- a/docs/core_modules/data_modules/documents_and_nodes/root.md +++ b/docs/core_modules/data_modules/documents_and_nodes/root.md @@ -41,7 +41,7 @@ from llama_index.node_parser import SimpleNodeParser ... # parse nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # build index diff --git a/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md b/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md index 07122b5c5b..9e89b558d8 100644 --- a/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md +++ b/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md @@ -26,7 +26,7 @@ metadata_extractor = MetadataExtractor( ], ) -node_parser = SimpleNodeParser( +node_parser = SimpleNodeParser.from_defaults( text_splitter=text_splitter, metadata_extractor=metadata_extractor, ) diff --git a/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md b/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md index 39154812d5..de51ba708e 100644 --- a/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md +++ b/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md @@ -10,7 +10,7 @@ For instance, you can do ```python from llama_index.node_parser import SimpleNodeParser -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) ``` diff --git a/docs/core_modules/data_modules/index/metadata_extraction.md b/docs/core_modules/data_modules/index/metadata_extraction.md index 02ead1afa7..ee73fa5bd9 100644 --- a/docs/core_modules/data_modules/index/metadata_extraction.md +++ b/docs/core_modules/data_modules/index/metadata_extraction.md @@ -34,7 +34,7 @@ metadata_extractor = MetadataExtractor( ], ) -node_parser = SimpleNodeParser( +node_parser = SimpleNodeParser.from_defaults( metadata_extractor=metadata_extractor, ) ``` diff --git a/docs/core_modules/data_modules/node_parsers/usage_pattern.md b/docs/core_modules/data_modules/node_parsers/usage_pattern.md index ac379fee46..959043b9ea 100644 --- a/docs/core_modules/data_modules/node_parsers/usage_pattern.md +++ b/docs/core_modules/data_modules/node_parsers/usage_pattern.md @@ -63,7 +63,7 @@ text_splitter = SentenceSplitter( tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode ) -node_parser = SimpleNodeParser(text_splitter=text_splitter) +node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter) ``` `TokenTextSplitter` default configuration: @@ -80,7 +80,7 @@ text_splitter = TokenTextSplitter( tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode ) -node_parser = SimpleNodeParser(text_splitter=text_splitter) +node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter) ``` `CodeSplitter` configuration: @@ -95,7 +95,7 @@ text_splitter = CodeSplitter( max_chars=1500, ) -node_parser = SimpleNodeParser(text_splitter=text_splitter) +node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter) ``` ## SentenceWindowNodeParser diff --git a/docs/core_modules/data_modules/storage/customization.md b/docs/core_modules/data_modules/storage/customization.md index 646bed49ae..7b2fec1376 100644 --- a/docs/core_modules/data_modules/storage/customization.md +++ b/docs/core_modules/data_modules/storage/customization.md @@ -28,7 +28,7 @@ from llama_index.vector_stores import SimpleVectorStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create storage context using default stores diff --git a/docs/core_modules/data_modules/storage/docstores.md b/docs/core_modules/data_modules/storage/docstores.md index 266151435f..5029286744 100644 --- a/docs/core_modules/data_modules/storage/docstores.md +++ b/docs/core_modules/data_modules/storage/docstores.md @@ -17,7 +17,7 @@ from llama_index.storage.docstore import MongoDocumentStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create (or load) docstore and add nodes @@ -50,7 +50,7 @@ from llama_index.storage.docstore import RedisDocumentStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create (or load) docstore and add nodes @@ -84,7 +84,7 @@ from llama_index.storage.docstore import FirestoreDocumentStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create (or load) docstore and add nodes diff --git a/docs/core_modules/supporting_modules/service_context.md b/docs/core_modules/supporting_modules/service_context.md index 84753facfa..44dba472d4 100644 --- a/docs/core_modules/supporting_modules/service_context.md +++ b/docs/core_modules/supporting_modules/service_context.md @@ -71,7 +71,7 @@ from llama_index.node_parser import SimpleNodeParser llm = OpenAI(model='text-davinci-003', temperature=0, max_tokens=256) embed_model = OpenAIEmbedding() -node_parser = SimpleNodeParser( +node_parser = SimpleNodeParser.from_defaults( text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20) ) prompt_helper = PromptHelper( diff --git a/docs/examples/agent/openai_agent_query_cookbook.ipynb b/docs/examples/agent/openai_agent_query_cookbook.ipynb index dcc7e88c72..4f7dd82da9 100644 --- a/docs/examples/agent/openai_agent_query_cookbook.ipynb +++ b/docs/examples/agent/openai_agent_query_cookbook.ipynb @@ -674,7 +674,7 @@ "llm = OpenAI(temperature=0, model=\"gpt-4\")\n", "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n", "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n", - "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)\n", "\n", "# define pinecone vector index\n", "vector_store = PineconeVectorStore(\n", diff --git a/docs/examples/callbacks/OpenInferenceCallback.ipynb b/docs/examples/callbacks/OpenInferenceCallback.ipynb index 03eb618292..b57abdacd8 100644 --- a/docs/examples/callbacks/OpenInferenceCallback.ipynb +++ b/docs/examples/callbacks/OpenInferenceCallback.ipynb @@ -503,7 +503,7 @@ } ], "source": [ - "parser = SimpleNodeParser()\n", + "parser = SimpleNodeParser.from_defaults()\n", "nodes = parser.get_nodes_from_documents(documents)\n", "print(nodes[0].text)" ] diff --git a/docs/examples/docstore/DocstoreDemo.ipynb b/docs/examples/docstore/DocstoreDemo.ipynb index cf1df96745..261e7847c8 100644 --- a/docs/examples/docstore/DocstoreDemo.ipynb +++ b/docs/examples/docstore/DocstoreDemo.ipynb @@ -87,7 +87,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/docstore/FirestoreDemo.ipynb b/docs/examples/docstore/FirestoreDemo.ipynb index 098b9855c6..26f7266158 100644 --- a/docs/examples/docstore/FirestoreDemo.ipynb +++ b/docs/examples/docstore/FirestoreDemo.ipynb @@ -74,7 +74,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/docstore/MongoDocstoreDemo.ipynb b/docs/examples/docstore/MongoDocstoreDemo.ipynb index 6d2d6a3544..0b7adc0b1d 100644 --- a/docs/examples/docstore/MongoDocstoreDemo.ipynb +++ b/docs/examples/docstore/MongoDocstoreDemo.ipynb @@ -99,7 +99,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb b/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb index fed62ad97c..79201dff76 100644 --- a/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb +++ b/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb @@ -127,7 +127,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb index 0089c7acbf..6b60e21bd4 100644 --- a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb @@ -437,7 +437,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb b/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb index c844c535d7..5b949f9157 100644 --- a/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb @@ -561,7 +561,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb index 733dc9f093..8a0d986987 100644 --- a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb @@ -1006,7 +1006,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb b/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb index 2c4db17479..2af1b709a5 100644 --- a/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb @@ -486,7 +486,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb b/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb index 415573fb35..bd8fb38ea0 100644 --- a/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb +++ b/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb @@ -121,7 +121,7 @@ " ],\n", ")\n", "\n", - "node_parser = SimpleNodeParser(\n", + "node_parser = SimpleNodeParser.from_defaults(\n", " text_splitter=text_splitter,\n", " metadata_extractor=metadata_extractor,\n", ")" diff --git a/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb b/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb index 28406d80a5..78bd02b3d7 100644 --- a/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb +++ b/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb @@ -169,7 +169,7 @@ "llm = OpenAI(temperature=0, model=\"gpt-4\", streaming=True)\n", "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n", "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n", - "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)\n", "\n", "# define pinecone vector index\n", "vector_store = PineconeVectorStore(\n", diff --git a/docs/examples/query_engine/SQLJoinQueryEngine.ipynb b/docs/examples/query_engine/SQLJoinQueryEngine.ipynb index a0bf80ed9b..96ff368e30 100644 --- a/docs/examples/query_engine/SQLJoinQueryEngine.ipynb +++ b/docs/examples/query_engine/SQLJoinQueryEngine.ipynb @@ -136,7 +136,7 @@ "llm = OpenAI(temperature=0, model=\"gpt-4\", streaming=True)\n", "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n", "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n", - "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)\n", "\n", "# # define pinecone vector index\n", "# vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace='wiki_cities')\n", diff --git a/examples/paul_graham_essay/SentenceSplittingDemo.ipynb b/examples/paul_graham_essay/SentenceSplittingDemo.ipynb index 5471cc7f54..cc5011cf88 100644 --- a/examples/paul_graham_essay/SentenceSplittingDemo.ipynb +++ b/examples/paul_graham_essay/SentenceSplittingDemo.ipynb @@ -130,7 +130,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser(text_splitter=sentence_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=sentence_splitter)\n", "service_context = ServiceContext.from_defaults(node_parser=node_parser)\n", "documents = []\n", "documents.append(Document(text=page))\n", diff --git a/tests/node_parser/metadata_extractor.py b/tests/node_parser/metadata_extractor.py index cc6ec5b826..7c0878bb5b 100644 --- a/tests/node_parser/metadata_extractor.py +++ b/tests/node_parser/metadata_extractor.py @@ -20,7 +20,7 @@ def test_metadata_extractor(mock_service_context: ServiceContext) -> None: ], ) - node_parser = SimpleNodeParser( + node_parser = SimpleNodeParser.from_defaults( metadata_extractor=metadata_extractor, ) -- GitLab