diff --git a/docs/multi_gpu.md b/docs/multi_gpu.md index 81e3766bb28d032d6da1551bc183dafba85f7e51..baca383de5daa71f520a8d70523ffdf56fb29288 100644 --- a/docs/multi_gpu.md +++ b/docs/multi_gpu.md @@ -86,7 +86,7 @@ Currently 4 datasets are supported that can be found in [Datasets config file](. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder. ```bash -wget -P datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json +wget -P src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json ``` * `samsum_dataset` diff --git a/docs/single_gpu.md b/docs/single_gpu.md index 89b4749e8f09cb555fedd37e69a72038a1fc2d74..bcd2a3178a3e209ba391a14971ab8c464f9ada62 100644 --- a/docs/single_gpu.md +++ b/docs/single_gpu.md @@ -41,7 +41,7 @@ Currently 4 datasets are supported that can be found in [Datasets config file](. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `ft_dataset` folder. ```bash -wget -P datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json +wget -P src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json ``` * `samsum_dataset` diff --git a/src/llama_recipes/configs/datasets.py b/src/llama_recipes/configs/datasets.py index 6cb3cf591c1a17f0af748672f558ba1f9508f769..70823b5e120f4006576a0c29b24a2fa1ece2cf64 100644 --- a/src/llama_recipes/configs/datasets.py +++ b/src/llama_recipes/configs/datasets.py @@ -15,8 +15,8 @@ class samsum_dataset: @dataclass class grammar_dataset: dataset: str = "grammar_dataset" - train_split: str = "ft_datasets/grammar_dataset/gtrain_10k.csv" - test_split: str = "ft_datasets/grammar_dataset/grammar_validation.csv" + train_split: str = "src/llama_recipes/datasets/grammar_dataset/gtrain_10k.csv" + test_split: str = "src/llama_recipes/datasets/grammar_dataset/grammar_validation.csv" input_length: int = 2048 @@ -25,4 +25,4 @@ class alpaca_dataset: dataset: str = "alpaca_dataset" train_split: str = "train" test_split: str = "val" - data_path: str = "ft_datasets/alpaca_data.json" \ No newline at end of file + data_path: str = "src/llama_recipes/datasets/alpaca_data.json" \ No newline at end of file