diff --git a/recipes/benchmarks/inference_throughput/on-prem/README.md b/recipes/benchmarks/inference_throughput/on-prem/README.md
index 0d2053f5ac76a3d1470fee76e8084170bc9d95cb..ffd332e9dd90918c464cd1a61981e30519fb4cfe 100644
--- a/recipes/benchmarks/inference_throughput/on-prem/README.md
+++ b/recipes/benchmarks/inference_throughput/on-prem/README.md
@@ -37,3 +37,5 @@ To run pretrained model benchmark, follow the command below.
 ```
 python pretrained_vllm_benchmark.py
 ```
+
+Refer to more vLLM benchmark details on their official Github repo [here](https://github.com/vllm-project/vllm/tree/main/benchmarks).
diff --git a/recipes/benchmarks/inference_throughput/on-prem/vllm/chat_vllm_benchmark.py b/recipes/benchmarks/inference_throughput/on-prem/vllm/chat_vllm_benchmark.py
index 7c7057cc12278c0b3a02c97b802f9417dbfbb008..30b2765f81ace4508cd1e4f9c253d4d13faeb229 100644
--- a/recipes/benchmarks/inference_throughput/on-prem/vllm/chat_vllm_benchmark.py
+++ b/recipes/benchmarks/inference_throughput/on-prem/vllm/chat_vllm_benchmark.py
@@ -4,7 +4,6 @@
 import csv
 import json
 import time
-import random
 import threading
 import numpy as np
 import requests
@@ -18,7 +17,7 @@ from azure.core.exceptions import HttpResponseError
 from azure.ai.contentsafety.models import AnalyzeTextOptions
 
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Dict, Tuple, List
+from typing import Tuple, List
 
 
 
diff --git a/recipes/benchmarks/inference_throughput/on-prem/vllm/parameters.json b/recipes/benchmarks/inference_throughput/on-prem/vllm/parameters.json
index d5f055039e4cca1ce890913b9f4f7c24d925d6cb..deaee9bb358f6fadf2663e6b7f9771b091b2f23a 100644
--- a/recipes/benchmarks/inference_throughput/on-prem/vllm/parameters.json
+++ b/recipes/benchmarks/inference_throughput/on-prem/vllm/parameters.json
@@ -1,7 +1,7 @@
 {
     "MAX_NEW_TOKENS" : 256,
     "CONCURRENT_LEVELS" : [1, 2, 4, 8, 16, 32, 64, 128, 256],
-    "MODEL_PATH" : "meta-llama/Meta-Llama-3-70B-Instruct",
+    "MODEL_PATH" : "meta-llama/your-model-path",
     "MODEL_HEADERS" : {"Content-Type": "application/json"},
     "SAFE_CHECK" : true,
     "THRESHOLD_TPS" : 7,
diff --git a/recipes/benchmarks/inference_throughput/on-prem/vllm/pretrained_vllm_benchmark.py b/recipes/benchmarks/inference_throughput/on-prem/vllm/pretrained_vllm_benchmark.py
index 84fdf2e64f0e8ef50efa7810e1052dd5b8dc8048..3d74cd4e3d40e077111a9ceb1b4debfcc9c67286 100644
--- a/recipes/benchmarks/inference_throughput/on-prem/vllm/pretrained_vllm_benchmark.py
+++ b/recipes/benchmarks/inference_throughput/on-prem/vllm/pretrained_vllm_benchmark.py
@@ -18,7 +18,7 @@ from azure.core.exceptions import HttpResponseError
 from azure.ai.contentsafety.models import AnalyzeTextOptions
 
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Dict, Tuple, List
+from typing import Tuple, List
 
 
 # Predefined inputs