Skip to content

Instantly share code, notes, and snippets.

@pruksmhc
Created August 6, 2025 18:56
Show Gist options
  • Select an option

  • Save pruksmhc/a316805d864324c4fa94cdb492d0b562 to your computer and use it in GitHub Desktop.

Select an option

Save pruksmhc/a316805d864324c4fa94cdb492d0b562 to your computer and use it in GitHub Desktop.
Qwen_2.5_7B_MMStar_baseline
{
"results": {
"mmstar": {
"alias": "mmstar",
"coarse perception,none": 0.7502214478878857,
"coarse perception_stderr,none": "N/A",
"average,none": 0.6282898425011137,
"average_stderr,none": "N/A",
"fine-grained perception,none": 0.5771677720461803,
"fine-grained perception_stderr,none": "N/A",
"instance reasoning,none": 0.7059851871377787,
"instance reasoning_stderr,none": "N/A",
"logical reasoning,none": 0.6201974043558202,
"logical reasoning_stderr,none": "N/A",
"math,none": 0.6405762719415486,
"math_stderr,none": "N/A",
"science & technology,none": 0.47559097163746894,
"science & technology_stderr,none": "N/A"
}
},
"group_subtasks": {
"mmstar": []
},
"configs": {
"mmstar": {
"task": "mmstar",
"dataset_path": "Lin-Chen/MMStar",
"dataset_kwargs": {
"token": true
},
"test_split": "val",
"full_docs": false,
"process_results_use_image": false,
"doc_to_visual": "<function mmstar_doc_to_visual at 0x742714d83240>",
"doc_to_text": "<function mmstar_doc_to_text at 0x742714d837e0>",
"doc_to_target": "answer",
"process_results": "<function mmstar_process_results at 0x742714d83d80>",
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 0,
"metric_list": [
{
"metric": "coarse perception",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a82c0>",
"higher_is_better": true
},
{
"metric": "fine-grained perception",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a8720>",
"higher_is_better": true
},
{
"metric": "instance reasoning",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a8b80>",
"higher_is_better": true
},
{
"metric": "logical reasoning",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a8fe0>",
"higher_is_better": true
},
{
"metric": "science & technology",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a9440>",
"higher_is_better": true
},
{
"metric": "math",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a98a0>",
"higher_is_better": true
},
{
"metric": "average",
"aggregation": "<function mmstar_aggregate_results at 0x7427144a9d00>",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"until": [
"\n\n"
],
"do_sample": false
},
"repeats": 1,
"should_decontaminate": false,
"metadata": [
{
"version": 0.0
}
],
"lmms_eval_specific_kwargs": {
"default": {
"pre_prompt": "",
"post_prompt": "\nAnswer with the option's letter from the given choices directly"
},
"pre_prompt": "",
"post_prompt": "\nAnswer with the option's letter from the given choices directly"
}
}
},
"versions": {
"mmstar": "Yaml"
},
"n-shot": {
"mmstar": 0
},
"higher_is_better": {
"mmstar": {
"coarse perception": true,
"fine-grained perception": true,
"instance reasoning": true,
"logical reasoning": true,
"science & technology": true,
"math": true,
"average": true
}
},
"n-samples": {
"mmstar": {
"original": 1500,
"effective": 1500
}
},
"config": {
"model": "qwen2_5_vl",
"model_args": "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_pixels=12845056,attn_implementation=flash_attention_2,interleave_visuals=False",
"batch_size": "32",
"batch_sizes": [],
"device": "cuda:0",
"use_cache": null,
"limit": null,
"bootstrap_iters": 100000,
"gen_kwargs": "",
"random_seed": 0,
"numpy_seed": 1234,
"torch_seed": 1234,
"fewshot_seed": 1234
},
"git_hash": "8895505",
"date": "20250730_091239",
"task_hashes": {
"mmstar": "596d11d8efa9bb058c2d9055a457420e06ff5bddea7858f7ff029c864296a3b8"
},
"model_source": "qwen2_5_vl",
"model_name": "Qwen/Qwen2.5-VL-7B-Instruct",
"model_name_sanitized": "Qwen__Qwen2.5-VL-7B-Instruct",
"system_instruction": null,
"system_instruction_sha": null,
"fewshot_as_multiturn": false,
"chat_template": null,
"chat_template_sha": null,
"start_time": 22916651.957306925,
"end_time": 22917384.860846087,
"total_evaluation_time_seconds": "732.9035391621292"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment