From 46f1e13b63d6735a6fb86fabbf8d6104552b2f49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com> Date: Wed, 13 Mar 2024 22:45:45 +0100 Subject: [PATCH] fix: pymupdf must be optional because is AGPLv3 licensed (#11896) --- docs/examples/agent/openai_retrieval_benchmark.ipynb | 8 +++----- .../finetuning/gradient/gradient_structured.ipynb | 2 +- .../finetuning/openai_fine_tuning_functions.ipynb | 5 ++--- docs/examples/low_level/evaluation.ipynb | 5 ++--- docs/examples/low_level/fusion_retriever.ipynb | 2 +- docs/examples/low_level/oss_ingestion_retrieval.ipynb | 2 +- docs/examples/low_level/response_synthesis.ipynb | 7 ++----- docs/examples/low_level/retrieval.ipynb | 2 +- docs/examples/low_level/router.ipynb | 5 ++--- docs/examples/low_level/vector_store.ipynb | 10 +++------- docs/examples/param_optimizer/param_optimizer.ipynb | 5 ++--- docs/examples/prompts/emotion_prompt.ipynb | 5 ++--- docs/examples/prompts/prompt_optimization.ipynb | 2 +- docs/examples/prompts/prompts_rag.ipynb | 5 ++--- .../query_engine/pdf_tables/recursive_retriever.ipynb | 2 +- docs/examples/retrievers/auto_merging_retriever.ipynb | 8 +++----- docs/examples/retrievers/composable_retrievers.ipynb | 2 +- docs/examples/retrievers/ensemble_retrieval.ipynb | 5 ++--- .../llama_index/readers/file/pymu_pdf/README.md | 2 +- .../readers/llama-index-readers-file/pyproject.toml | 5 +++-- 20 files changed, 36 insertions(+), 53 deletions(-) diff --git a/docs/examples/agent/openai_retrieval_benchmark.ipynb b/docs/examples/agent/openai_retrieval_benchmark.ipynb index 020f7ee938..3b2e8f8ecf 100644 --- a/docs/examples/agent/openai_retrieval_benchmark.ipynb +++ b/docs/examples/agent/openai_retrieval_benchmark.ipynb @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-agent-openai\n", "%pip install llama-index-llms-openai" ] @@ -74,8 +74,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 141KB/s in 1m 48s \n", "\n", - "2023-11-08 21:55:42 (123 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-11-08 21:55:42 (123 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], @@ -198,8 +197,7 @@ "\n", "data/llama2_eval_qr 100%[===================>] 59.23K --.-KB/s in 0.02s \n", "\n", - "2023-11-08 22:20:12 (2.87 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n", - "\n" + "2023-11-08 22:20:12 (2.87 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n" ] } ], diff --git a/docs/examples/finetuning/gradient/gradient_structured.ipynb b/docs/examples/finetuning/gradient/gradient_structured.ipynb index 9478b6fa29..70b585d36e 100644 --- a/docs/examples/finetuning/gradient/gradient_structured.ipynb +++ b/docs/examples/finetuning/gradient/gradient_structured.ipynb @@ -31,7 +31,7 @@ "source": [ "%pip install llama-index-llms-gradient\n", "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-finetuning" ] }, diff --git a/docs/examples/finetuning/openai_fine_tuning_functions.ipynb b/docs/examples/finetuning/openai_fine_tuning_functions.ipynb index a57978db21..e8ee6bae8e 100644 --- a/docs/examples/finetuning/openai_fine_tuning_functions.ipynb +++ b/docs/examples/finetuning/openai_fine_tuning_functions.ipynb @@ -30,7 +30,7 @@ "%pip install llama-index-finetuning\n", "%pip install llama-index-llms-openai\n", "%pip install llama-index-finetuning-callbacks\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-program-openai" ] }, @@ -407,8 +407,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 229KB/s in 45s \n", "\n", - "2023-10-04 23:47:25 (298 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-10-04 23:47:25 (298 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/low_level/evaluation.ipynb b/docs/examples/low_level/evaluation.ipynb index fd17ad92e4..123c6730d5 100644 --- a/docs/examples/low_level/evaluation.ipynb +++ b/docs/examples/low_level/evaluation.ipynb @@ -38,7 +38,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-llms-openai" ] }, @@ -62,8 +62,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 1.56MB/s in 9.3s \n", "\n", - "2023-09-19 00:05:25 (1.40 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-09-19 00:05:25 (1.40 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/low_level/fusion_retriever.ipynb b/docs/examples/low_level/fusion_retriever.ipynb index f26448b459..0f5d1bf0b2 100644 --- a/docs/examples/low_level/fusion_retriever.ipynb +++ b/docs/examples/low_level/fusion_retriever.ipynb @@ -40,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-llms-openai" ] }, diff --git a/docs/examples/low_level/oss_ingestion_retrieval.ipynb b/docs/examples/low_level/oss_ingestion_retrieval.ipynb index 1aeaf0f270..5c09fc9a48 100644 --- a/docs/examples/low_level/oss_ingestion_retrieval.ipynb +++ b/docs/examples/low_level/oss_ingestion_retrieval.ipynb @@ -53,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-vector-stores-postgres\n", "%pip install llama-index-embeddings-huggingface\n", "%pip install llama-index-llms-llama-cpp" diff --git a/docs/examples/low_level/response_synthesis.ipynb b/docs/examples/low_level/response_synthesis.ipynb index b0088704c8..c472cc7b71 100644 --- a/docs/examples/low_level/response_synthesis.ipynb +++ b/docs/examples/low_level/response_synthesis.ipynb @@ -51,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-vector-stores-pinecone\n", "%pip install llama-index-llms-openai" ] @@ -386,7 +386,6 @@ "*****Response******:\n", "\n", "RLHF used both model-based and human-based evaluation to select the best-performing models among several ablations. Model-based evaluation was used to measure the robustness of the reward model by collecting a test set of prompts for both helpfulness and safety, and asking three annotators to judge the quality of the answers based on a 7-point Likert scale. Human evaluation was used to validate major model versions. Additionally, a more general reward was trained to ensure the measure wouldn't diverge from the human preferences. Results showed that the reward models were well calibrated with the human preference annotations.\n", - "\n", "\n" ] } @@ -485,9 +484,7 @@ "---------------------\n", "Given the context information and not prior knowledge, answer the query.\n", "Query: Can you tell me about results from RLHF using both model-based and human-based evaluation?\n", - "Answer: \n", - "\n", - "\n" + "Answer: \n" ] } ], diff --git a/docs/examples/low_level/retrieval.ipynb b/docs/examples/low_level/retrieval.ipynb index 4f25f8968a..51d5f72842 100644 --- a/docs/examples/low_level/retrieval.ipynb +++ b/docs/examples/low_level/retrieval.ipynb @@ -51,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-vector-stores-pinecone\n", "%pip install llama-index-embeddings-openai" ] diff --git a/docs/examples/low_level/router.ipynb b/docs/examples/low_level/router.ipynb index 761bf1fd99..25428541c0 100644 --- a/docs/examples/low_level/router.ipynb +++ b/docs/examples/low_level/router.ipynb @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-program-openai\n", "%pip install llama-index-llms-openai" ] @@ -692,8 +692,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 9.5s \n", "\n", - "2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/low_level/vector_store.ipynb b/docs/examples/low_level/vector_store.ipynb index dd9093bf83..fd2a913c8b 100644 --- a/docs/examples/low_level/vector_store.ipynb +++ b/docs/examples/low_level/vector_store.ipynb @@ -50,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-embeddings-openai" ] }, @@ -690,9 +690,7 @@ "In this section, we describe our approach to safety fine-tuning, including safety categories, annotation\n", "guidelines, and the techniques we use to mitigate safety risks. We employ a process similar to the general\n", "fine-tuning methods as described in Section 3, with some notable differences related to safety concerns.\n", - "----------------\n", - "\n", - "\n" + "----------------\n" ] } ], @@ -774,9 +772,7 @@ "Better Long-Tail Safety Robustness without Hurting Helpfulness\n", "Safety is inherently a long-tail problem,\n", "where the challenge comes from a small number of very specific cases.\n", - "----------------\n", - "\n", - "\n" + "----------------\n" ] } ], diff --git a/docs/examples/param_optimizer/param_optimizer.ipynb b/docs/examples/param_optimizer/param_optimizer.ipynb index c7e28398c0..34eb9d74f0 100644 --- a/docs/examples/param_optimizer/param_optimizer.ipynb +++ b/docs/examples/param_optimizer/param_optimizer.ipynb @@ -33,7 +33,7 @@ "source": [ "%pip install llama-index-llms-openai\n", "%pip install llama-index-embeddings-openai\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-experimental-param-tuner" ] }, @@ -66,8 +66,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 533KB/s in 36s \n", "\n", - "2023-11-04 00:17:10 (376 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-11-04 00:17:10 (376 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/prompts/emotion_prompt.ipynb b/docs/examples/prompts/emotion_prompt.ipynb index 7fb2420c53..da2bfac9a6 100644 --- a/docs/examples/prompts/emotion_prompt.ipynb +++ b/docs/examples/prompts/emotion_prompt.ipynb @@ -23,7 +23,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -192,8 +192,7 @@ "\n", "data/llama2_eval_qr 100%[===================>] 59.23K --.-KB/s in 0.04s \n", "\n", - "2023-11-04 00:34:10 (1.48 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n", - "\n" + "2023-11-04 00:34:10 (1.48 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n" ] } ], diff --git a/docs/examples/prompts/prompt_optimization.ipynb b/docs/examples/prompts/prompt_optimization.ipynb index 3cf58e9116..ce2fb70d99 100644 --- a/docs/examples/prompts/prompt_optimization.ipynb +++ b/docs/examples/prompts/prompt_optimization.ipynb @@ -21,7 +21,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { diff --git a/docs/examples/prompts/prompts_rag.ipynb b/docs/examples/prompts/prompts_rag.ipynb index cb6eacaad4..a9c0dac8c9 100644 --- a/docs/examples/prompts/prompts_rag.ipynb +++ b/docs/examples/prompts/prompts_rag.ipynb @@ -30,7 +30,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -130,8 +130,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 10s \n", "\n", - "2023-10-28 23:19:49 (1.31 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-10-28 23:19:49 (1.31 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb b/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb index 0930193d12..05c490d2f7 100644 --- a/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb +++ b/docs/examples/query_engine/pdf_tables/recursive_retriever.ipynb @@ -31,7 +31,7 @@ "outputs": [], "source": [ "%pip install llama-index-embeddings-openai\n", - "%pip install llama-index-readers-file\n", + "%pip install llama-index-readers-file pymupdf\n", "%pip install llama-index-llms-openai" ] }, diff --git a/docs/examples/retrievers/auto_merging_retriever.ipynb b/docs/examples/retrievers/auto_merging_retriever.ipynb index 42e2314230..6082fbc148 100644 --- a/docs/examples/retrievers/auto_merging_retriever.ipynb +++ b/docs/examples/retrievers/auto_merging_retriever.ipynb @@ -28,7 +28,7 @@ "outputs": [], "source": [ "%pip install llama-index-llms-openai\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -353,8 +353,7 @@ "text": [ "> Merging 4 nodes into parent node.\n", "> Parent node id: caf5f81c-842f-46a4-b679-6be584bd6aff.\n", - "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n", - "\n" + "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n" ] } ], @@ -684,8 +683,7 @@ "text": [ "> Merging 4 nodes into parent node.\n", "> Parent node id: 3671b20d-ea5e-4afc-983e-02be6ee8302d.\n", - "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n", - "\n" + "> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n" ] } ], diff --git a/docs/examples/retrievers/composable_retrievers.ipynb b/docs/examples/retrievers/composable_retrievers.ipynb index 78d8515001..234dd0015e 100644 --- a/docs/examples/retrievers/composable_retrievers.ipynb +++ b/docs/examples/retrievers/composable_retrievers.ipynb @@ -38,7 +38,7 @@ "%pip install llama-index-retrievers-bm25\n", "%pip install llama-index-storage-docstore-redis\n", "%pip install llama-index-storage-docstore-dynamodb\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { diff --git a/docs/examples/retrievers/ensemble_retrieval.ipynb b/docs/examples/retrievers/ensemble_retrieval.ipynb index 9bb2ff4fef..70e9d7230f 100644 --- a/docs/examples/retrievers/ensemble_retrieval.ipynb +++ b/docs/examples/retrievers/ensemble_retrieval.ipynb @@ -38,7 +38,7 @@ "source": [ "%pip install llama-index-llms-openai\n", "%pip install llama-index-postprocessor-cohere-rerank\n", - "%pip install llama-index-readers-file" + "%pip install llama-index-readers-file pymupdf" ] }, { @@ -159,8 +159,7 @@ "\n", "data/llama2.pdf 100%[===================>] 13.03M 521KB/s in 42s \n", "\n", - "2023-09-28 12:57:20 (320 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n", - "\n" + "2023-09-28 12:57:20 (320 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n" ] } ], diff --git a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md index cfde1701d6..82a20b3205 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md +++ b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/pymu_pdf/README.md @@ -1,7 +1,7 @@ # PyMuPDF Loader ```bash -pip install llama-index-readers-file +pip install llama-index-readers-file pymupdf ``` This loader extracts text from a local PDF file using the `PyMuPDF` Python library. If `metadata` is passed as True while calling `load` function; extracted documents will include basic metadata such as page numbers, file path and total number of pages in pdf. diff --git a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml index 7ce4da7890..71561c017a 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml @@ -50,12 +50,13 @@ license = "MIT" maintainers = ["FarisHijazi", "Haowjy", "ephe-meral", "hursh-desai", "iamarunbrahma", "jon-chuang", "mmaatouk", "ravi03071991", "sangwongenip", "thejessezhang"] name = "llama-index-readers-file" readme = "README.md" -version = "0.1.10" +version = "0.1.11" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.1" -pymupdf = "^1.23.21" +# pymupdf is AGPLv3-licensed, so it's optional +pymupdf = {optional = true, version = "^1.23.21"} bs4 = "^0.0.2" beautifulsoup4 = "^4.12.3" pypdf = "^4.0.1" -- GitLab