diff --git a/src/generate.py b/src/generate.py index e43fd112df6761a79a584c6b86e6abd0945f35bc..a5927e37a038246c86ee60850c322b611e1ed677 100644 --- a/src/generate.py +++ b/src/generate.py @@ -51,7 +51,7 @@ if __name__ == '__main__': client, model = get_client_fn(args.model_path)(args.model_path, args.model_api) generate_response_remote_wrapper(generate_fn, queries, model, args.model_api, client) else: - generate_fn(args.model_path, queries) + generate_fn(queries, args.model_path) for k in queries: queries[k].pop("figure_path", None) diff --git a/src/generate_lib/cambrian.py b/src/generate_lib/cambrian.py index 416c081e4f28597b895f40fdbc7990178c3fbd65..0a5c0c198b58517bb31c90defb8294796f33dbdc 100644 --- a/src/generate_lib/cambrian.py +++ b/src/generate_lib/cambrian.py @@ -19,7 +19,7 @@ from cambrian.mm_utils import tokenizer_image_token, process_images, get_model_n from PIL import Image -def generate_response(model_path, queries): +def generate_response(queries, model_path): conv_mode = "chatml_direct" def process(image, question, tokenizer, image_processor, model_config): qs = question diff --git a/src/generate_lib/chartgemma.py b/src/generate_lib/chartgemma.py index 4337db501e9ff13cdcdb2ec868cc0a5be2ebbc44..c5a204d336e08afbb137885a2c61a46b2662e8ff 100644 --- a/src/generate_lib/chartgemma.py +++ b/src/generate_lib/chartgemma.py @@ -6,7 +6,7 @@ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration import torch from tqdm import tqdm -def generate_response(queries, model_path=None): +def generate_response(queries, model_path): # Load Model model = PaliGemmaForConditionalGeneration.from_pretrained(model_path, torch_dtype=torch.float16) processor = AutoProcessor.from_pretrained(model_path) diff --git a/src/generate_lib/deepseekvl.py b/src/generate_lib/deepseekvl.py index 11b9e85191ebb61b36586dad805f2407f7e15a67..d709c72ae6316a691ee959544b674abbf8125e0b 100644 --- a/src/generate_lib/deepseekvl.py +++ b/src/generate_lib/deepseekvl.py @@ -8,7 +8,7 @@ from deepseek_vl.utils.io import load_pil_images import torch from tqdm import tqdm -def generate_response(model_path, queries): +def generate_response(queries, model_path): # specify the path to the model vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path) tokenizer = vl_chat_processor.tokenizer diff --git a/src/generate_lib/idefics2.py b/src/generate_lib/idefics2.py index 7b19fcd001c3e819d782906d7c23e67a73e08b7a..d02ca64b657bc25cae0f087f333f994f2c36c3ad 100644 --- a/src/generate_lib/idefics2.py +++ b/src/generate_lib/idefics2.py @@ -4,7 +4,7 @@ from transformers.image_utils import load_image from transformers import AutoProcessor, AutoModelForVision2Seq from tqdm import tqdm -def generate_response(model_path, queries): +def generate_response(queries, model_path): model = AutoModelForVision2Seq.from_pretrained(model_path).to('cuda') processor = AutoProcessor.from_pretrained(model_path) for k in tqdm(queries): diff --git a/src/generate_lib/internvl15.py b/src/generate_lib/internvl15.py index 2be19bc32bf5c7306e15af1a6e4f30207e7600d0..d4469cc266b5288908cda0baeb1b692ac0677b73 100644 --- a/src/generate_lib/internvl15.py +++ b/src/generate_lib/internvl15.py @@ -87,7 +87,7 @@ def load_image(image_file, input_size=448, max_num=6): pixel_values = torch.stack(pixel_values) return pixel_values -def generate_response(model_path, queries): +def generate_response(queries, model_path): model = AutoModel.from_pretrained( model_path, torch_dtype=torch.bfloat16, diff --git a/src/generate_lib/internvl2.py b/src/generate_lib/internvl2.py index cc1e7f4f21588e084b6bd864cacb95ef5df36257..d415da82485c7dcf13ad893c35c50a76e8f332c2 100644 --- a/src/generate_lib/internvl2.py +++ b/src/generate_lib/internvl2.py @@ -113,7 +113,7 @@ def split_model(model_name): return device_map -def generate_response(queries, model_path=None): +def generate_response(queries, model_path): device_map = split_model(model_path.split('/')[-1]) print(device_map) model = AutoModel.from_pretrained( diff --git a/src/generate_lib/internvl2pro.py b/src/generate_lib/internvl2pro.py new file mode 100644 index 0000000000000000000000000000000000000000..5c8aa6581dc4ea6e94a70b7d004d49b36d59fa63 --- /dev/null +++ b/src/generate_lib/internvl2pro.py @@ -0,0 +1,38 @@ + + +import requests + +def get_client_model(model_path, api_key): + assert api_key is not None, "API key is required for using GPT" + assert model_path is not None, "Model name is required for using GPT" + model = model_path + client = None + return client, model + +def generate_response(image_path, query, model, media_type="image/jpeg", api_key=None, client=None, random_baseline=False): + + url = "http://101.132.98.120:11005/chat/" + + + file_paths = [ + image_path + ] + question = query + + files = [('files', open(file_path, 'rb')) for file_path in file_paths] + data = { + 'question': question, + 'api_key': api_key + } + + try: + response = requests.post(url, files=files, data=data) + if response.status_code == 200: + print("Response:", response.json().get("response", "No response key found in the JSON.")) + return response.json().get("response", "No response key found in the JSON.") + else: + print("Error:", response.status_code, response.text) + return "Error in generating response." + except requests.exceptions.RequestException as e: + print(f"Error: {e}") + return "Error in generating response." diff --git a/src/generate_lib/ixc2.py b/src/generate_lib/ixc2.py index a96476bb0b404ef7abbd0cce6c8ebb37b0acdc6a..b44e5b5e7c89b81cfb63b879c50492b515ed0e0f 100644 --- a/src/generate_lib/ixc2.py +++ b/src/generate_lib/ixc2.py @@ -4,7 +4,7 @@ import torch from transformers import AutoModel, AutoTokenizer from tqdm import tqdm -def generate_response(model_path, queries): +def generate_response(queries, model_path): # taken from: torch.set_grad_enabled(False) if '4khd' in model_path: diff --git a/src/generate_lib/llava16.py b/src/generate_lib/llava16.py index 28658320ec0cda11eabad54cc20aab10002f5ec0..cc4a30e513d5751736f2aa298f08a3956a07626a 100644 --- a/src/generate_lib/llava16.py +++ b/src/generate_lib/llava16.py @@ -6,7 +6,7 @@ from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration from tqdm import tqdm from PIL import Image -def generate_responses(model_path, queries): +def generate_responses(queries, model_path): # taken from: https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf processor = LlavaNextProcessor.from_pretrained(model_path) model = LlavaNextForConditionalGeneration.from_pretrained(model_path, diff --git a/src/generate_lib/mgm.py b/src/generate_lib/mgm.py index 2cfeb5e8b3007119a0fc2eba99a5ff53a62c308c..c5b81f2d52df0f32287fde860719f1e378a53c6f 100644 --- a/src/generate_lib/mgm.py +++ b/src/generate_lib/mgm.py @@ -64,7 +64,7 @@ def get_image_input_from_path(image, model, image_processor): return images, images_aux, -def generate_response(model_path, queries): +def generate_response(queries, model_path): disable_torch_init() model_path = os.path.expanduser(model_path) model_name = get_model_name_from_path(model_path) diff --git a/src/generate_lib/minicpm.py b/src/generate_lib/minicpm.py index 6b859e265b244853f62e41bf36a8d8e55c27c615..80c1a043e81b7b737cd055f63fb06baf65fc1aab 100644 --- a/src/generate_lib/minicpm.py +++ b/src/generate_lib/minicpm.py @@ -6,7 +6,7 @@ from tqdm import tqdm from PIL import Image import torch -def generate_response(model_path, queries): +def generate_response(queries, model_path): model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16) model = model.to(device='cuda', dtype=torch.bfloat16) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) diff --git a/src/generate_lib/moai.py b/src/generate_lib/moai.py index 19f2ea350ed0ccc8832492f0dd300737e08d09de..cc8a3fde73bbfc722f8dff4025c955a5bb82a161 100644 --- a/src/generate_lib/moai.py +++ b/src/generate_lib/moai.py @@ -16,7 +16,7 @@ import tqdm from PIL import Image import torch -def generate_response(model_path, queries): +def generate_response(queries, model_path): moai_model, moai_processor, seg_model, seg_processor, od_model, od_processor, sgg_model, ocr_model \ = prepare_moai(moai_path=model_path, bits=4, grad_ckpt=False, lora=False, dtype='fp16') for k in tqdm(queries): diff --git a/src/generate_lib/ovis.py b/src/generate_lib/ovis.py index 3b5ef825fc00c2d283c8ba66b95d9e8a82b15aac..5bdb500feb244ab02496a7e6140c5c2b8beefd79 100644 --- a/src/generate_lib/ovis.py +++ b/src/generate_lib/ovis.py @@ -6,7 +6,7 @@ from PIL import Image from transformers import AutoModelForCausalLM from tqdm import tqdm -def generate_response(model_path, queries): +def generate_response(queries, model_path): model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, multimodal_max_length=8192, diff --git a/src/generate_lib/paligemma.py b/src/generate_lib/paligemma.py index 50ff0e824fba55086507e5998d51515b2b3c822f..b3ebee0ff70a153fbc4b247175b7c57087506d59 100644 --- a/src/generate_lib/paligemma.py +++ b/src/generate_lib/paligemma.py @@ -6,7 +6,7 @@ from PIL import Image import torch from tqdm import tqdm -def generate_response(queries, model_path=None): +def generate_response(queries, model_path): model_id = model_path device = "cuda:0" dtype = torch.bfloat16 diff --git a/src/generate_lib/phi3.py b/src/generate_lib/phi3.py index 694e34efc19d9d99895993a2e0fd99ce731c6bb2..0e959a25ff25e354c38373bef124293c3168369b 100644 --- a/src/generate_lib/phi3.py +++ b/src/generate_lib/phi3.py @@ -5,7 +5,7 @@ from PIL import Image from transformers import AutoModelForCausalLM, AutoProcessor from tqdm import tqdm -def generate_response(queries, model_path=None): +def generate_response(queries, model_path): model = AutoModelForCausalLM.from_pretrained(model_path, device_map="cuda", trust_remote_code=True, diff --git a/src/generate_lib/sphinx2.py b/src/generate_lib/sphinx2.py index f69c25c6dc4d840e6ad5390c221bde7360692024..eeef79399fbdfe7d5c1920f89cf20b7659d7280f 100644 --- a/src/generate_lib/sphinx2.py +++ b/src/generate_lib/sphinx2.py @@ -5,7 +5,7 @@ from SPHINX import SPHINXModel from PIL import Image from tqdm import tqdm -def generate_response(model_path, queries): +def generate_response(queries, model_path): model = SPHINXModel.from_pretrained(pretrained_path=model_path, with_visual=True) for k in tqdm(queries): qas = [[queries[k]['question'], None]] diff --git a/src/generate_lib/utils.py b/src/generate_lib/utils.py index db4107add7622fd394b6bdeae1587e92135981f7..94a0d3307410c89a861a17059d0f3bd39e9b5ba9 100644 --- a/src/generate_lib/utils.py +++ b/src/generate_lib/utils.py @@ -33,7 +33,8 @@ def get_client_fn(model_path): # gemini elif model_path in ['gemini-1.5-pro-001', 'gemini-1.0-pro-vision-001', - 'gemini-1.5-flash-001']: + 'gemini-1.5-flash-001', + 'gemini-1.5-pro-exp-0801']: from .gemini import get_client_model # gpt elif model_path in ['gpt-4o-2024-05-13', @@ -49,6 +50,9 @@ def get_client_fn(model_path): elif model_path in ['qwen-vl-max', 'qwen-vl-plus']: from .qwen import get_client_model + # internvl2pro + elif model_path in ['InternVL2-Pro']: + from .internvl2pro import get_client_model else: raise ValueError(f"Model {model_path} not supported") return get_client_model @@ -73,7 +77,8 @@ def get_generate_fn(model_path): # gemini elif model_name in ['gemini-1.5-pro-001', 'gemini-1.0-pro-vision-001', - 'gemini-1.5-flash-001']: + 'gemini-1.5-flash-001', + 'gemini-1.5-pro-exp-0801']: from .gemini import generate_response # gpt elif model_name in ['gpt-4o-2024-05-13', @@ -135,6 +140,9 @@ def get_generate_fn(model_path): elif model_name in ['Ovis1.5-Llama3-8B', 'Ovis1.5-Gemma2-9B']: from .ovis import generate_response + # internvl2pro + elif model_name in ['InternVL2-Pro']: + from .internvl2pro import generate_response else: raise ValueError(f"Model {model_name} not supported") return generate_response diff --git a/src/generate_lib/vila15.py b/src/generate_lib/vila15.py index 0badfa0eda4d6de04dcded5e5eb0090c164e4363..223212f7a890563e08ca6020327c9dad9b8979b5 100644 --- a/src/generate_lib/vila15.py +++ b/src/generate_lib/vila15.py @@ -34,7 +34,7 @@ def load_images(image_files): out.append(image) return out -def generate_response(queries, model_path=None): +def generate_response(queries, model_path): disable_torch_init() model_name = get_model_name_from_path(model_path)