Created
August 6, 2025 18:56
-
-
Save pruksmhc/a316805d864324c4fa94cdb492d0b562 to your computer and use it in GitHub Desktop.
Qwen_2.5_7B_MMStar_baseline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "results": { | |
| "mmstar": { | |
| "alias": "mmstar", | |
| "coarse perception,none": 0.7502214478878857, | |
| "coarse perception_stderr,none": "N/A", | |
| "average,none": 0.6282898425011137, | |
| "average_stderr,none": "N/A", | |
| "fine-grained perception,none": 0.5771677720461803, | |
| "fine-grained perception_stderr,none": "N/A", | |
| "instance reasoning,none": 0.7059851871377787, | |
| "instance reasoning_stderr,none": "N/A", | |
| "logical reasoning,none": 0.6201974043558202, | |
| "logical reasoning_stderr,none": "N/A", | |
| "math,none": 0.6405762719415486, | |
| "math_stderr,none": "N/A", | |
| "science & technology,none": 0.47559097163746894, | |
| "science & technology_stderr,none": "N/A" | |
| } | |
| }, | |
| "group_subtasks": { | |
| "mmstar": [] | |
| }, | |
| "configs": { | |
| "mmstar": { | |
| "task": "mmstar", | |
| "dataset_path": "Lin-Chen/MMStar", | |
| "dataset_kwargs": { | |
| "token": true | |
| }, | |
| "test_split": "val", | |
| "full_docs": false, | |
| "process_results_use_image": false, | |
| "doc_to_visual": "<function mmstar_doc_to_visual at 0x742714d83240>", | |
| "doc_to_text": "<function mmstar_doc_to_text at 0x742714d837e0>", | |
| "doc_to_target": "answer", | |
| "process_results": "<function mmstar_process_results at 0x742714d83d80>", | |
| "description": "", | |
| "target_delimiter": " ", | |
| "fewshot_delimiter": "\n\n", | |
| "num_fewshot": 0, | |
| "metric_list": [ | |
| { | |
| "metric": "coarse perception", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a82c0>", | |
| "higher_is_better": true | |
| }, | |
| { | |
| "metric": "fine-grained perception", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a8720>", | |
| "higher_is_better": true | |
| }, | |
| { | |
| "metric": "instance reasoning", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a8b80>", | |
| "higher_is_better": true | |
| }, | |
| { | |
| "metric": "logical reasoning", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a8fe0>", | |
| "higher_is_better": true | |
| }, | |
| { | |
| "metric": "science & technology", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a9440>", | |
| "higher_is_better": true | |
| }, | |
| { | |
| "metric": "math", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a98a0>", | |
| "higher_is_better": true | |
| }, | |
| { | |
| "metric": "average", | |
| "aggregation": "<function mmstar_aggregate_results at 0x7427144a9d00>", | |
| "higher_is_better": true | |
| } | |
| ], | |
| "output_type": "generate_until", | |
| "generation_kwargs": { | |
| "until": [ | |
| "\n\n" | |
| ], | |
| "do_sample": false | |
| }, | |
| "repeats": 1, | |
| "should_decontaminate": false, | |
| "metadata": [ | |
| { | |
| "version": 0.0 | |
| } | |
| ], | |
| "lmms_eval_specific_kwargs": { | |
| "default": { | |
| "pre_prompt": "", | |
| "post_prompt": "\nAnswer with the option's letter from the given choices directly" | |
| }, | |
| "pre_prompt": "", | |
| "post_prompt": "\nAnswer with the option's letter from the given choices directly" | |
| } | |
| } | |
| }, | |
| "versions": { | |
| "mmstar": "Yaml" | |
| }, | |
| "n-shot": { | |
| "mmstar": 0 | |
| }, | |
| "higher_is_better": { | |
| "mmstar": { | |
| "coarse perception": true, | |
| "fine-grained perception": true, | |
| "instance reasoning": true, | |
| "logical reasoning": true, | |
| "science & technology": true, | |
| "math": true, | |
| "average": true | |
| } | |
| }, | |
| "n-samples": { | |
| "mmstar": { | |
| "original": 1500, | |
| "effective": 1500 | |
| } | |
| }, | |
| "config": { | |
| "model": "qwen2_5_vl", | |
| "model_args": "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_pixels=12845056,attn_implementation=flash_attention_2,interleave_visuals=False", | |
| "batch_size": "32", | |
| "batch_sizes": [], | |
| "device": "cuda:0", | |
| "use_cache": null, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "gen_kwargs": "", | |
| "random_seed": 0, | |
| "numpy_seed": 1234, | |
| "torch_seed": 1234, | |
| "fewshot_seed": 1234 | |
| }, | |
| "git_hash": "8895505", | |
| "date": "20250730_091239", | |
| "task_hashes": { | |
| "mmstar": "596d11d8efa9bb058c2d9055a457420e06ff5bddea7858f7ff029c864296a3b8" | |
| }, | |
| "model_source": "qwen2_5_vl", | |
| "model_name": "Qwen/Qwen2.5-VL-7B-Instruct", | |
| "model_name_sanitized": "Qwen__Qwen2.5-VL-7B-Instruct", | |
| "system_instruction": null, | |
| "system_instruction_sha": null, | |
| "fewshot_as_multiturn": false, | |
| "chat_template": null, | |
| "chat_template_sha": null, | |
| "start_time": 22916651.957306925, | |
| "end_time": 22917384.860846087, | |
| "total_evaluation_time_seconds": "732.9035391621292" | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment