diff --git a/tests/models/test_internvl.py b/tests/models/test_internvl.py index a1f2a0283..c768521e6 100644 --- a/tests/models/test_internvl.py +++ b/tests/models/test_internvl.py @@ -8,9 +8,6 @@ from transformers import AutoConfig from aphrodite.common.utils import is_cpu -from aphrodite.modeling.models.internvl import (IMG_CONTEXT, IMG_END, - IMG_START, - image_to_pixel_values) from aphrodite.multimodal.utils import rescale_image_size from ..conftest import IMAGE_ASSETS, AphroditeRunner, HfRunner, _ImageAssets @@ -39,35 +36,6 @@ ] -class InternVLProcessor: - """A simple processor for InternVL2 HF model which misses a processor.""" - - def __init__(self, hf_runner: HfRunner): - self.num_image_token = hf_runner.model.num_image_token - self.tokenizer = hf_runner.tokenizer - self.dtype = hf_runner.model.dtype - - self.config = AutoConfig.from_pretrained(hf_runner.model_name) - self.vision_config = self.config.vision_config - self.use_thumbnail = self.config.use_thumbnail - self.min_num = self.config.min_dynamic_patch - self.max_num = self.config.max_dynamic_patch - self.image_size = self.vision_config.image_size - - def __call__(self, text: str, images: Image, **kwargs): - pixel_values = image_to_pixel_values(images, self.image_size, - self.min_num, self.max_num, - self.use_thumbnail).to(self.dtype) - num_patches_list = [pixel_values.shape[0]] - for num_patches in num_patches_list: - context_tokens = IMG_CONTEXT * self.num_image_token * num_patches - image_tokens = IMG_START + context_tokens + IMG_END - text = text.replace('', image_tokens, 1) - prompt = self.tokenizer(text, return_tensors="pt") - prompt.update({"pixel_values": pixel_values}) - return prompt - - # adapted from https://huggingface.co/OpenGVLab/InternVL2-1B/blob/main/modeling_internvl_chat.py def generate( self, @@ -133,6 +101,35 @@ def run_test( # if we run HF first, the cuda initialization will be done and it # will hurt multiprocessing backend with fork method (the default method). + + class InternVLProcessor: + """A simple processor for InternVL2 which misses a processor.""" + def __init__(self, hf_runner: HfRunner): + self.num_image_token = hf_runner.model.num_image_token + self.tokenizer = hf_runner.tokenizer + self.dtype = hf_runner.model.dtype + self.config = AutoConfig.from_pretrained(hf_runner.model_name) + self.vision_config = self.config.vision_config + self.use_thumbnail = self.config.use_thumbnail + self.min_num = self.config.min_dynamic_patch + self.max_num = self.config.max_dynamic_patch + self.image_size = self.vision_config.image_size + def __call__(self, text: str, images: Image, **kwargs): + from aphrodite.modeling.models.internvl import ( + IMG_CONTEXT, IMG_END, IMG_START, image_to_pixel_values) + pixel_values = image_to_pixel_values( + images, self.image_size, self.min_num, self.max_num, + self.use_thumbnail).to(self.dtype) + num_patches_list = [pixel_values.shape[0]] + for num_patches in num_patches_list: + context_tokens = IMG_CONTEXT * self.num_image_token \ + * num_patches + image_tokens = IMG_START + context_tokens + IMG_END + text = text.replace('', image_tokens, 1) + prompt = self.tokenizer(text, return_tensors="pt") + prompt.update({"pixel_values": pixel_values}) + return prompt + # max_model_len should be greater than image_feature_size with aphrodite_runner(model, max_model_len=4096,