Skip to content
Snippets Groups Projects
Commit 30bc536d authored by Kai Wu's avatar Kai Wu
Browse files

remove double BOS manually during fine-tuning

parent ff3df5b0
No related branches found
No related tags found
No related merge requests found
...@@ -26,11 +26,11 @@ def replace_target(target, seq): ...@@ -26,11 +26,11 @@ def replace_target(target, seq):
def tokenize_dialogs(dialogs, images, processor): def tokenize_dialogs(dialogs, images, processor):
text_prompt = processor.apply_chat_template(dialogs) text_prompt = processor.apply_chat_template(dialogs)
text_prompt = [prompt.replace('<|begin_of_text|>','') for prompt in text_prompt]
batch = processor( batch = processor(
images=images, images=images,
text=text_prompt, text=text_prompt,
padding=True, padding=True,
text_kwargs={"add_special_tokens": False},
return_tensors="pt", return_tensors="pt",
) )
label_list = [] label_list = []
...@@ -137,3 +137,4 @@ class OCRVQADataCollator: ...@@ -137,3 +137,4 @@ class OCRVQADataCollator:
def get_data_collator(processor): def get_data_collator(processor):
return OCRVQADataCollator(processor) return OCRVQADataCollator(processor)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment