diff --git a/docs/examples/agent/openai_retrieval_benchmark.ipynb b/docs/examples/agent/openai_retrieval_benchmark.ipynb index 020f7ee9381516bd7274642ae648b5d9af42ca26..3b2e8f8ecfe57d67e64c6ecb418f9ed29423af2e 100644 --- a/docs/examples/agent/openai_retrieval_benchmark.ipynb +++ b/docs/examples/agent/openai_retrieval_benchmark.ipynb @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-agent-openai\n", "%pip install llama-index-llms-openai" ] @@ -74,8 +74,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 141KB/s in 1m 48s \n", "\n", - "2023-11-08 21:55:42 (123 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-11-08 21:55:42 (123 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], @@ -198,8 +197,7 @@ "\n", "data/llama2_eval_qr 100%[===================>] 59.23K --.-KB/s in 0.02s \n", "\n", - "2023-11-08 22:20:12 (2.87 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n", - "\n" + "2023-11-08 22:20:12 (2.87 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n" ] } ], diff --git a/docs/examples/finetuning/gradient/gradient_structured.ipynb b/docs/examples/finetuning/gradient/gradient_structured.ipynb index 9478b6fa29ff59c452698c03b554ad920a7ae402..70b585d36e5893ea23cb0b79911f394f1a52bbaa 100644 --- a/docs/examples/finetuning/gradient/gradient_structured.ipynb +++ b/docs/examples/finetuning/gradient/gradient_structured.ipynb @@ -31,7 +31,7 @@ "source": [ "%pip install llama-index-llms-gradient\n", "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-finetuning" ] }, diff --git a/docs/examples/finetuning/openai_fine_tuning_functions.ipynb b/docs/examples/finetuning/openai_fine_tuning_functions.ipynb index a57978db21e0eb32a4fa431e518e7d7a6002ab68..e8ee6bae8e12c353d8cb76c6fddb39e337ce9b0a 100644 --- a/docs/examples/finetuning/openai_fine_tuning_functions.ipynb +++ b/docs/examples/finetuning/openai_fine_tuning_functions.ipynb @@ -30,7 +30,7 @@ "%pip install llama-index-finetuning\n", "%pip install llama-index-llms-openai\n", "%pip install llama-index-finetuning-callbacks\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-program-openai" ] }, @@ -407,8 +407,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 229KB/s in 45s \n", "\n", - "2023-10-04 23:47:25 (298 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-10-04 23:47:25 (298 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/low_level/evaluation.ipynb b/docs/examples/low_level/evaluation.ipynb index fd17ad92e4ce5db28f38da409d0ebf3dd00aa10c..123c6730d58fe007b19fbc7fec25e59cfc7db924 100644 --- a/docs/examples/low_level/evaluation.ipynb +++ b/docs/examples/low_level/evaluation.ipynb @@ -38,7 +38,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-llms-openai" ] }, @@ -62,8 +62,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 1.56MB/s in 9.3s \n", "\n", - "2023-09-19 00:05:25 (1.40 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-09-19 00:05:25 (1.40 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/low_level/fusion_retriever.ipynb b/docs/examples/low_level/fusion_retriever.ipynb index f26448b4591c6326895e3f4a415528c5beadf1b2..0f5d1bf0b25b981ab99232f8260d2ea4e6cd3658 100644 --- a/docs/examples/low_level/fusion_retriever.ipynb +++ b/docs/examples/low_level/fusion_retriever.ipynb @@ -40,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-llms-openai" ] }, diff --git a/docs/examples/low_level/oss_ingestion_retrieval.ipynb b/docs/examples/low_level/oss_ingestion_retrieval.ipynb index 1aeaf0f270fade81b0788e104dc2259a40e39e72..5c09fc9a48752630981abcc5d27a203c947a24cd 100644 --- a/docs/examples/low_level/oss_ingestion_retrieval.ipynb +++ b/docs/examples/low_level/oss_ingestion_retrieval.ipynb @@ -53,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-vector-stores-postgres\n", "%pip install llama-index-embeddings-huggingface\n", "%pip install llama-index-llms-llama-cpp" diff --git a/docs/examples/low_level/response_synthesis.ipynb b/docs/examples/low_level/response_synthesis.ipynb index b0088704c84aedf338230bc7d4c89178b10f9e33..c472cc7b71db3f9ab46a35fd8f3dd6fa9d62c0fb 100644 --- a/docs/examples/low_level/response_synthesis.ipynb +++ b/docs/examples/low_level/response_synthesis.ipynb @@ -51,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-vector-stores-pinecone\n", "%pip install llama-index-llms-openai" ] @@ -386,7 +386,6 @@ "*****Response******:\n", "\n", "RLHF used both model-based and human-based evaluation to select the best-performing models among several ablations. Model-based evaluation was used to measure the robustness of the reward model by collecting a test set of prompts for both helpfulness and safety, and asking three annotators to judge the quality of the answers based on a 7-point Likert scale. Human evaluation was used to validate major model versions. Additionally, a more general reward was trained to ensure the measure wouldn't diverge from the human preferences. Results showed that the reward models were well calibrated with the human preference annotations.\n", - "\n", "\n" ] } @@ -485,9 +484,7 @@ "---------------------\n", "Given the context information and not prior knowledge, answer the query.\n", "Query: Can you tell me about results from RLHF using both model-based and human-based evaluation?\n", - "Answer: \n", - "\n", - "\n" + "Answer: \n" ] } ], diff --git a/docs/examples/low_level/retrieval.ipynb b/docs/examples/low_level/retrieval.ipynb index 4f25f8968ad6978559ec4065d7f83228e0dd150f..51d5f72842d2df124630dedacadb284227906c23 100644 --- a/docs/examples/low_level/retrieval.ipynb +++ b/docs/examples/low_level/retrieval.ipynb @@ -51,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-vector-stores-pinecone\n", "%pip install llama-index-embeddings-openai" ] diff --git a/docs/examples/low_level/router.ipynb b/docs/examples/low_level/router.ipynb index 761bf1fd993195b291702f156091b01da5423304..25428541c05fe898ea31b8c148d20082285921d1 100644 --- a/docs/examples/low_level/router.ipynb +++ b/docs/examples/low_level/router.ipynb @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-program-openai\n", "%pip install llama-index-llms-openai" ] @@ -692,8 +692,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 9.5s \n", "\n", - "2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/low_level/vector_store.ipynb b/docs/examples/low_level/vector_store.ipynb index dd9093bf834bb4c8d5111ea546f08f7e8ef03a32..fd2a913c8b853ac27c6bbac1a085f2c1e423a883 100644 --- a/docs/examples/low_level/vector_store.ipynb +++ b/docs/examples/low_level/vector_store.ipynb @@ -50,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-embeddings-openai" ] }, @@ -690,9 +690,7 @@ "In this section, we describe our approach to safety fine-tuning, including safety categories, annotation\n", "guidelines, and the techniques we use to mitigate safety risks. We employ a process similar to the general\n", "fine-tuning methods as described in Section 3, with some notable differences related to safety concerns.\n", - "----------------\n", - "\n", - "\n" + "----------------\n" ] } ], @@ -774,9 +772,7 @@ "Better Long-Tail Safety Robustness without Hurting Helpfulness\n", "Safety is inherently a long-tail problem,\n", "where the challenge comes from a small number of very specific cases.\n", - "----------------\n", - "\n", - "\n" + "----------------\n" ] } ], diff --git a/docs/examples/param_optimizer/param_optimizer.ipynb b/docs/examples/param_optimizer/param_optimizer.ipynb index c7e28398c0951b617a3c518515a1ef02b31e1b85..34eb9d74f029d15fdd80be12eb173596e80bbf25 100644 --- a/docs/examples/param_optimizer/param_optimizer.ipynb +++ b/docs/examples/param_optimizer/param_optimizer.ipynb @@ -33,7 +33,7 @@ "source": [ "%pip install llama-index-llms-openai\n", "%pip install llama-index-embeddings-openai\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-experimental-param-tuner" ] }, @@ -66,8 +66,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 533KB/s in 36s \n", "\n", - "2023-11-04 00:17:10 (376 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-11-04 00:17:10 (376 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/prompts/emotion_prompt.ipynb b/docs/examples/prompts/emotion_prompt.ipynb index 7fb2420c53ad514c994fdd7bb4d7984d75ae941b..da2bfac9a69476bcd2cb1af841605daca1bdf604 100644 --- a/docs/examples/prompts/emotion_prompt.ipynb +++ b/docs/examples/prompts/emotion_prompt.ipynb @@ -23,7 +23,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -192,8 +192,7 @@ "\n", "data/llama2_eval_qr 100%[===================>] 59.23K --.-KB/s in 0.04s \n", "\n", - "2023-11-04 00:34:10 (1.48 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n", - "\n" + "2023-11-04 00:34:10 (1.48 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n" ] } ], diff --git a/docs/examples/prompts/prompt_optimization.ipynb b/docs/examples/prompts/prompt_optimization.ipynb index 3cf58e91162bf141f774b28896866f557485aa96..ce2fb70d9949c95f342e5c3607172bbe14b179ad 100644 --- a/docs/examples/prompts/prompt_optimization.ipynb +++ b/docs/examples/prompts/prompt_optimization.ipynb @@ -21,7 +21,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { diff --git a/docs/examples/prompts/prompts_rag.ipynb b/docs/examples/prompts/prompts_rag.ipynb index cb6eacaad4ad016d5a71cb6a4f08881c16da6099..a9c0dac8c934b54e3963c9d0b80de96a6877510e 100644 --- a/docs/examples/prompts/prompts_rag.ipynb +++ b/docs/examples/prompts/prompts_rag.ipynb @@ -30,7 +30,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -130,8 +130,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 10s \n", "\n", - "2023-10-28 23:19:49 (1.31 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-10-28 23:19:49 (1.31 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb b/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb index 0930193d1246151274da31bf4ac05c0d40c5d69c..05c490d2f7f5ae659a13bf3c54a66d251dc24a7a 100644 --- a/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb +++ b/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb @@ -31,7 +31,7 @@ "outputs": [], "source": [ "%pip install llama-index-embeddings-openai\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-llms-openai" ] }, diff --git a/docs/examples/retrievers/auto_merging_retriever.ipynb b/docs/examples/retrievers/auto_merging_retriever.ipynb index 42e23142301d0fcc6ce7b1591e5532eab9a01e2a..6082fbc14842bc066e024779103671fe1d2f8373 100644 --- a/docs/examples/retrievers/auto_merging_retriever.ipynb +++ b/docs/examples/retrievers/auto_merging_retriever.ipynb @@ -28,7 +28,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -353,8 +353,7 @@ "text": [ "> Merging 4 nodes into parent node.\n", "> Parent node id: caf5f81c-842f-46a4-b679-6be584bd6aff.\n", - "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n", - "\n" + "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n" ] } ], @@ -684,8 +683,7 @@ "text": [ "> Merging 4 nodes into parent node.\n", "> Parent node id: 3671b20d-ea5e-4afc-983e-02be6ee8302d.\n", - "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n", - "\n" + "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n" ] } ], diff --git a/docs/examples/retrievers/composable_retrievers.ipynb b/docs/examples/retrievers/composable_retrievers.ipynb index 78d851500130378e97c7f60389727ca9df733979..234dd0015e68ed6e14b6c4365ba9332e91a5eef9 100644 --- a/docs/examples/retrievers/composable_retrievers.ipynb +++ b/docs/examples/retrievers/composable_retrievers.ipynb @@ -38,7 +38,7 @@ "%pip install llama-index-retrievers-bm25\n", "%pip install llama-index-storage-docstore-redis\n", "%pip install llama-index-storage-docstore-dynamodb\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { diff --git a/docs/examples/retrievers/ensemble_retrieval.ipynb b/docs/examples/retrievers/ensemble_retrieval.ipynb index 9bb2ff4fef72d3686dded5d81abd0dbd135cf8a9..70e9d7230f0028bb8232b9cff1c506c1f1709c61 100644 --- a/docs/examples/retrievers/ensemble_retrieval.ipynb +++ b/docs/examples/retrievers/ensemble_retrieval.ipynb @@ -38,7 +38,7 @@ "source": [ "%pip install llama-index-llms-openai\n", "%pip install llama-index-postprocessor-cohere-rerank\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -159,8 +159,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 521KB/s in 42s \n", "\n", - "2023-09-28 12:57:20 (320 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-09-28 12:57:20 (320 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md index cfde1701d6e3a616d10585dc6b8508a8eef6baad..82a20b32057c42c3af6dea9d3504975f1f806ad1 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md +++ b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md @@ -1,7 +1,7 @@ # PyMuPDF Loader ```bash -pip install llama-index-readers-file +pip install llama-index-readers-file pymupdf ``` This loader extracts text from a local PDF file using the `PyMuPDF` Python library. If `metadata` is passed as True while calling `load` function; extracted documents will include basic metadata such as page numbers, file path and total number of pages in pdf. diff --git a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml index 7ce4da7890a749e800a33abc4a3e40c6fb5cde10..71561c017a295f1efe4d7123a115534a6c676db4 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml @@ -50,12 +50,13 @@ license = "MIT" maintainers = ["FarisHijazi", "Haowjy", "ephe-meral", "hursh-desai", "iamarunbrahma", "jon-chuang", "mmaatouk", "ravi03071991", "sangwongenip", "thejessezhang"] name = "llama-index-readers-file" readme = "README.md" -version = "0.1.10" +version = "0.1.11" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.1" -pymupdf = "^1.23.21" +# pymupdf is AGPLv3-licensed, so it's optional +pymupdf = {optional = true, version = "^1.23.21"} bs4 = "^0.0.2" beautifulsoup4 = "^4.12.3" pypdf = "^4.0.1"