diff --git a/docs/core_modules/data_modules/documents_and_nodes/root.md b/docs/core_modules/data_modules/documents_and_nodes/root.md index d88e7c941f16ccb1d66eaca5f5be692391f94faf..2e23d3959e82ba207ff9430d50b00606f09c38cf 100644 --- a/docs/core_modules/data_modules/documents_and_nodes/root.md +++ b/docs/core_modules/data_modules/documents_and_nodes/root.md @@ -41,7 +41,7 @@ from llama_index.node_parser import SimpleNodeParser ... # parse nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # build index diff --git a/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md b/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md index 07122b5c5ba93337b55246cd9426014ca2114613..9e89b558d8f37b0cbd0f32622aa81beb836a6a60 100644 --- a/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md +++ b/docs/core_modules/data_modules/documents_and_nodes/usage_metadata_extractor.md @@ -26,7 +26,7 @@ metadata_extractor = MetadataExtractor( ], ) -node_parser = SimpleNodeParser( +node_parser = SimpleNodeParser.from_defaults( text_splitter=text_splitter, metadata_extractor=metadata_extractor, ) diff --git a/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md b/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md index 39154812d595d47529d7a27638ab25c3746243c5..de51ba708e41de164662b52dafd4d217dbfba250 100644 --- a/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md +++ b/docs/core_modules/data_modules/documents_and_nodes/usage_nodes.md @@ -10,7 +10,7 @@ For instance, you can do ```python from llama_index.node_parser import SimpleNodeParser -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) ``` diff --git a/docs/core_modules/data_modules/index/metadata_extraction.md b/docs/core_modules/data_modules/index/metadata_extraction.md index 02ead1afa7c90625ae8e76a85ad70fb8a7680882..ee73fa5bd93f4243e01056b1de61b3008d468363 100644 --- a/docs/core_modules/data_modules/index/metadata_extraction.md +++ b/docs/core_modules/data_modules/index/metadata_extraction.md @@ -34,7 +34,7 @@ metadata_extractor = MetadataExtractor( ], ) -node_parser = SimpleNodeParser( +node_parser = SimpleNodeParser.from_defaults( metadata_extractor=metadata_extractor, ) ``` diff --git a/docs/core_modules/data_modules/node_parsers/usage_pattern.md b/docs/core_modules/data_modules/node_parsers/usage_pattern.md index ac379fee464338719d54db30628dd0255a534ca3..959043b9ea13ce947637c8838704ba205ea07dfc 100644 --- a/docs/core_modules/data_modules/node_parsers/usage_pattern.md +++ b/docs/core_modules/data_modules/node_parsers/usage_pattern.md @@ -63,7 +63,7 @@ text_splitter = SentenceSplitter( tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode ) -node_parser = SimpleNodeParser(text_splitter=text_splitter) +node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter) ``` `TokenTextSplitter` default configuration: @@ -80,7 +80,7 @@ text_splitter = TokenTextSplitter( tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode ) -node_parser = SimpleNodeParser(text_splitter=text_splitter) +node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter) ``` `CodeSplitter` configuration: @@ -95,7 +95,7 @@ text_splitter = CodeSplitter( max_chars=1500, ) -node_parser = SimpleNodeParser(text_splitter=text_splitter) +node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter) ``` ## SentenceWindowNodeParser diff --git a/docs/core_modules/data_modules/storage/customization.md b/docs/core_modules/data_modules/storage/customization.md index 646bed49ae37ffdceb029bf50797af379730a312..7b2fec137660ceb86da5816dce9c3fb535897871 100644 --- a/docs/core_modules/data_modules/storage/customization.md +++ b/docs/core_modules/data_modules/storage/customization.md @@ -28,7 +28,7 @@ from llama_index.vector_stores import SimpleVectorStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create storage context using default stores diff --git a/docs/core_modules/data_modules/storage/docstores.md b/docs/core_modules/data_modules/storage/docstores.md index 266151435f2cbb800f57d11ff931dc684558af12..5029286744cf4d84add453002b699319fcfe1eba 100644 --- a/docs/core_modules/data_modules/storage/docstores.md +++ b/docs/core_modules/data_modules/storage/docstores.md @@ -17,7 +17,7 @@ from llama_index.storage.docstore import MongoDocumentStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create (or load) docstore and add nodes @@ -50,7 +50,7 @@ from llama_index.storage.docstore import RedisDocumentStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create (or load) docstore and add nodes @@ -84,7 +84,7 @@ from llama_index.storage.docstore import FirestoreDocumentStore from llama_index.node_parser import SimpleNodeParser # create parser and parse document into nodes -parser = SimpleNodeParser() +parser = SimpleNodeParser.from_defaults() nodes = parser.get_nodes_from_documents(documents) # create (or load) docstore and add nodes diff --git a/docs/core_modules/supporting_modules/service_context.md b/docs/core_modules/supporting_modules/service_context.md index 84753facfa72a357f12b11142e3c1fc73ab6d7b3..44dba472d4169443c3401d14c5631465eab0725f 100644 --- a/docs/core_modules/supporting_modules/service_context.md +++ b/docs/core_modules/supporting_modules/service_context.md @@ -71,7 +71,7 @@ from llama_index.node_parser import SimpleNodeParser llm = OpenAI(model='text-davinci-003', temperature=0, max_tokens=256) embed_model = OpenAIEmbedding() -node_parser = SimpleNodeParser( +node_parser = SimpleNodeParser.from_defaults( text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20) ) prompt_helper = PromptHelper( diff --git a/docs/examples/agent/openai_agent_query_cookbook.ipynb b/docs/examples/agent/openai_agent_query_cookbook.ipynb index dcc7e88c72584d45e486450e834071b3d9e907fe..4f7dd82da921086f777a2b61bf4d35474688c582 100644 --- a/docs/examples/agent/openai_agent_query_cookbook.ipynb +++ b/docs/examples/agent/openai_agent_query_cookbook.ipynb @@ -674,7 +674,7 @@ "llm = OpenAI(temperature=0, model=\"gpt-4\")\n", "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n", "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n", - "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)\n", "\n", "# define pinecone vector index\n", "vector_store = PineconeVectorStore(\n", diff --git a/docs/examples/callbacks/OpenInferenceCallback.ipynb b/docs/examples/callbacks/OpenInferenceCallback.ipynb index 03eb61829297ba22ece8d24c99b13153331080ed..b57abdacd8e214e378c7a4e2b73aa7cb38451728 100644 --- a/docs/examples/callbacks/OpenInferenceCallback.ipynb +++ b/docs/examples/callbacks/OpenInferenceCallback.ipynb @@ -503,7 +503,7 @@ } ], "source": [ - "parser = SimpleNodeParser()\n", + "parser = SimpleNodeParser.from_defaults()\n", "nodes = parser.get_nodes_from_documents(documents)\n", "print(nodes[0].text)" ] diff --git a/docs/examples/docstore/DocstoreDemo.ipynb b/docs/examples/docstore/DocstoreDemo.ipynb index cf1df96745af8236f6cd62b23d20ec1beade904c..261e7847c87bddec568aadf2c96de16ffd57da4f 100644 --- a/docs/examples/docstore/DocstoreDemo.ipynb +++ b/docs/examples/docstore/DocstoreDemo.ipynb @@ -87,7 +87,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/docstore/FirestoreDemo.ipynb b/docs/examples/docstore/FirestoreDemo.ipynb index 098b9855c6e02d34fb69f29a9075876f9573ff2e..26f7266158aec3bf2e7d6d016eaafaa859969ac2 100644 --- a/docs/examples/docstore/FirestoreDemo.ipynb +++ b/docs/examples/docstore/FirestoreDemo.ipynb @@ -74,7 +74,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/docstore/MongoDocstoreDemo.ipynb b/docs/examples/docstore/MongoDocstoreDemo.ipynb index 6d2d6a3544c64ab532c2700101ea3ed14e715af1..0b7adc0b1d824e0a014081b00ecaddd4e49c1c4a 100644 --- a/docs/examples/docstore/MongoDocstoreDemo.ipynb +++ b/docs/examples/docstore/MongoDocstoreDemo.ipynb @@ -99,7 +99,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb b/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb index fed62ad97c8bf7a31b202f3b7d213d24b68b4593..79201dff76f694793d5207c49a8819a5a9d45a3f 100644 --- a/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb +++ b/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb @@ -127,7 +127,7 @@ "source": [ "from llama_index.node_parser import SimpleNodeParser\n", "\n", - "nodes = SimpleNodeParser().get_nodes_from_documents(documents)" + "nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb index 0089c7acbfb9f430c419a4fab93cb4c64dcf6829..6b60e21bd40dc4c35ce61589c8db02bef8dfa34a 100644 --- a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb @@ -437,7 +437,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb b/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb index c844c535d7a245f713726a89b6c2e087bfe31c73..5b949f91579bae5915536127c49dad123511e183 100644 --- a/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/KuzuGraphDemo.ipynb @@ -561,7 +561,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb index 733dc9f09390e17cc1327a3bfa2edba6bbf5472a..8a0d986987078967f3664d7f794ae41c1d7fcd94 100644 --- a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb @@ -1006,7 +1006,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb b/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb index 2c4db17479d5c78603bae5cfb9e26e9b01f83100..2af1b709a5d08a9d1eed696e8cf3a9bff1c3a45f 100644 --- a/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb +++ b/docs/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.ipynb @@ -486,7 +486,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser()" + "node_parser = SimpleNodeParser.from_defaults()" ] }, { diff --git a/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb b/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb index 415573fb355e552299984c711e00cebfef9d30c6..bd8fb38ea0115b3f73228603060b5065fa80a697 100644 --- a/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb +++ b/docs/examples/metadata_extraction/MetadataExtractionSEC.ipynb @@ -121,7 +121,7 @@ " ],\n", ")\n", "\n", - "node_parser = SimpleNodeParser(\n", + "node_parser = SimpleNodeParser.from_defaults(\n", " text_splitter=text_splitter,\n", " metadata_extractor=metadata_extractor,\n", ")" diff --git a/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb b/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb index 28406d80a5acb02e137b54489efb06016c8fe907..78bd02b3d7bb1282a6da6a14ed45e100c835ccb8 100644 --- a/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb +++ b/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb @@ -169,7 +169,7 @@ "llm = OpenAI(temperature=0, model=\"gpt-4\", streaming=True)\n", "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n", "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n", - "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)\n", "\n", "# define pinecone vector index\n", "vector_store = PineconeVectorStore(\n", diff --git a/docs/examples/query_engine/SQLJoinQueryEngine.ipynb b/docs/examples/query_engine/SQLJoinQueryEngine.ipynb index a0bf80ed9b4bfa798db8040ef288ea1612540c85..96ff368e3029da3ac5eabd98822f049f8896d85a 100644 --- a/docs/examples/query_engine/SQLJoinQueryEngine.ipynb +++ b/docs/examples/query_engine/SQLJoinQueryEngine.ipynb @@ -136,7 +136,7 @@ "llm = OpenAI(temperature=0, model=\"gpt-4\", streaming=True)\n", "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n", "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n", - "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)\n", "\n", "# # define pinecone vector index\n", "# vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace='wiki_cities')\n", diff --git a/examples/paul_graham_essay/SentenceSplittingDemo.ipynb b/examples/paul_graham_essay/SentenceSplittingDemo.ipynb index 5471cc7f549525ab7648601f9076923d13df7239..cc5011cf88e8198c6c4e8842b0f5211420884ac8 100644 --- a/examples/paul_graham_essay/SentenceSplittingDemo.ipynb +++ b/examples/paul_graham_essay/SentenceSplittingDemo.ipynb @@ -130,7 +130,7 @@ "metadata": {}, "outputs": [], "source": [ - "node_parser = SimpleNodeParser(text_splitter=sentence_splitter)\n", + "node_parser = SimpleNodeParser.from_defaults(text_splitter=sentence_splitter)\n", "service_context = ServiceContext.from_defaults(node_parser=node_parser)\n", "documents = []\n", "documents.append(Document(text=page))\n", diff --git a/tests/node_parser/metadata_extractor.py b/tests/node_parser/metadata_extractor.py index cc6ec5b8266a5f777593f3a695393420fcb32500..7c0878bb5bdd778e15df8d8526dba7bf034c53f3 100644 --- a/tests/node_parser/metadata_extractor.py +++ b/tests/node_parser/metadata_extractor.py @@ -20,7 +20,7 @@ def test_metadata_extractor(mock_service_context: ServiceContext) -> None: ], ) - node_parser = SimpleNodeParser( + node_parser = SimpleNodeParser.from_defaults( metadata_extractor=metadata_extractor, )