Skip to content

Instantly share code, notes, and snippets.

@nph4rd
Last active March 20, 2025 17:09
Show Gist options
  • Select an option

  • Save nph4rd/f003323ac4c8940f779f44a24b815ff7 to your computer and use it in GitHub Desktop.

Select an option

Save nph4rd/f003323ac4c8940f779f44a24b815ff7 to your computer and use it in GitHub Desktop.
# using `transformers==4.49.0` there's a input side-effect when calling the processor
# this example shows the case for Qwen2.5-VL
from transformers import AutoProcessor
from qwen_vl_utils import process_vision_info
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
messages1 = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
}
]
messages2 = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "is there a person here?"},
],
}
]
messages = [messages1, messages2]
print("="*40)
print("MULTIPLE MESSAGES")
texts = [processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in messages]
image_inputs, video_inputs = process_vision_info(messages)
texts_copy = texts.copy()
inputs = processor(
text=texts,
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
print(f"modified `texts`? {not texts == texts_copy}")
print("="*40)
print("SINGLE MESSAGE")
text = processor.apply_chat_template(messages1, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages1)
text_copy = text
inputs = processor(
text=text,
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
print(f"modified `text`? {not text == text_copy}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment