bugfix: remove duplicate load_peft_model

fcc817e9 · hongbo.mo · 03faba66 · fcc817e9
Commit fcc817e9 authored 1 year ago by hongbo.mo
--- a/inference/inference.py
+++ b/inference/inference.py
@@ -99,11 +99,7 @@ def main(
        print("Skipping the inference as the prompt is not safe.")
        sys.exit(1)  # Exit the program with an error status
-    if peft_model:
+    batch = tokenizer(user_prompt, padding='max_length', truncation=True, max_length=max_padding_length, return_tensors="pt")
-        model = load_peft_model(model, peft_model)
-    model.eval()
-    batch = tokenizer(user_prompt, padding='max_length', truncation=True,max_length=max_padding_length,return_tensors="pt")
    batch = {k: v.to("cuda") for k, v in batch.items()}
    start = time.perf_counter()