hf (pretrained=mistralai/Mistral-7B-v0.1), gen_kwargs: (None), limit: None, num_fewshot: None, batch_size: 8
| Tasks | Version | Filter | n-shot | Metric | Value | Stderr | |
|---|---|---|---|---|---|---|---|
| mmlu | N/A | none | 0 | acc | 0.6273 | ± | 0.0294 |
| - humanities | N/A | none | None | acc | 0.6520 | ± | 0.0233 |
| - formal_logic | 0 | none | None | acc | 0.3571 | ± | 0.0429 |
| - high_school_european_history | 0 | none | None | acc | 0.7455 | ± | 0.0340 |
| - high_school_us_history | 0 | none | None | acc | 0.7549 | ± | 0.0302 |
| - high_school_world_history | 0 | none | None | acc | 0.7764 | ± | 0.0271 |
| - international_law | 0 | none | None | acc | 0.7521 | ± | 0.0394 |
| - jurisprudence | 0 | none | None | acc | 0.7500 | ± | 0.0419 |
| - logical_fallacies | 0 | none | None | acc | 0.7607 | ± | 0.0335 |
| - moral_disputes | 0 | none | None | acc | 0.6792 | ± | 0.0251 |
| - moral_scenarios | 0 | none | None | acc | 0.2413 | ± | 0.0143 |
| - philosophy | 0 | none | None | acc | 0.6849 | ± | 0.0264 |
| - prehistory | 0 | none | None | acc | 0.7160 | ± | 0.0251 |
| - professional_law | 0 | none | None | acc | 0.4446 | ± | 0.0127 |
| - world_religions | 0 | none | None | acc | 0.8129 | ± | 0.0299 |
| - other | N/A | none | None | acc | 0.6461 | ± | 0.0299 |
| - business_ethics | 0 | none | None | acc | 0.5900 | ± | 0.0494 |
| - clinical_knowledge | 0 | none | None | acc | 0.6868 | ± | 0.0285 |
| - college_medicine | 0 | none | None | acc | 0.5954 | ± | 0.0374 |
| - global_facts | 0 | none | None | acc | 0.4000 | ± | 0.0492 |
| - human_aging | 0 | none | None | acc | 0.6502 | ± | 0.0320 |
| - management | 0 | none | None | acc | 0.7767 | ± | 0.0412 |
| - marketing | 0 | none | None | acc | 0.8547 | ± | 0.0231 |
| - medical_genetics | 0 | none | None | acc | 0.7100 | ± | 0.0456 |
| - miscellaneous | 0 | none | None | acc | 0.7944 | ± | 0.0145 |
| - nutrition | 0 | none | None | acc | 0.6993 | ± | 0.0263 |
| - professional_accounting | 0 | none | None | acc | 0.4574 | ± | 0.0297 |
| - professional_medicine | 0 | none | None | acc | 0.6838 | ± | 0.0282 |
| - virology | 0 | none | None | acc | 0.5000 | ± | 0.0389 |
| - social_sciences | N/A | none | None | acc | 0.7024 | ± | 0.0276 |
| - econometrics | 0 | none | None | acc | 0.4211 | ± | 0.0464 |
| - high_school_geography | 0 | none | None | acc | 0.7475 | ± | 0.0310 |
| - high_school_government_and_politics | 0 | none | None | acc | 0.8394 | ± | 0.0265 |
| - high_school_macroeconomics | 0 | none | None | acc | 0.5872 | ± | 0.0250 |
| - high_school_microeconomics | 0 | none | None | acc | 0.6218 | ± | 0.0315 |
| - high_school_psychology | 0 | none | None | acc | 0.7798 | ± | 0.0178 |
| - human_sexuality | 0 | none | None | acc | 0.7557 | ± | 0.0377 |
| - professional_psychology | 0 | none | None | acc | 0.6144 | ± | 0.0197 |
| - public_relations | 0 | none | None | acc | 0.6636 | ± | 0.0453 |
| - security_studies | 0 | none | None | acc | 0.6980 | ± | 0.0294 |
| - sociology | 0 | none | None | acc | 0.8607 | ± | 0.0245 |
| - us_foreign_policy | 0 | none | None | acc | 0.8400 | ± | 0.0368 |
| - stem | N/A | none | None | acc | 0.5087 | ± | 0.0375 |
| - abstract_algebra | 0 | none | None | acc | 0.3000 | ± | 0.0461 |
| - anatomy | 0 | none | None | acc | 0.5556 | ± | 0.0429 |
| - astronomy | 0 | none | None | acc | 0.6184 | ± | 0.0395 |
| - college_biology | 0 | none | None | acc | 0.6667 | ± | 0.0394 |
| - college_chemistry | 0 | none | None | acc | 0.4500 | ± | 0.0500 |
| - college_computer_science | 0 | none | None | acc | 0.5500 | ± | 0.0500 |
| - college_mathematics | 0 | none | None | acc | 0.3400 | ± | 0.0476 |
| - college_physics | 0 | none | None | acc | 0.4510 | ± | 0.0495 |
| - computer_security | 0 | none | None | acc | 0.7400 | ± | 0.0441 |
| - conceptual_physics | 0 | none | None | acc | 0.5277 | ± | 0.0326 |
| - electrical_engineering | 0 | none | None | acc | 0.5862 | ± | 0.0410 |
| - elementary_mathematics | 0 | none | None | acc | 0.3968 | ± | 0.0252 |
| - high_school_biology | 0 | none | None | acc | 0.7419 | ± | 0.0249 |
| - high_school_chemistry | 0 | none | None | acc | 0.5025 | ± | 0.0352 |
| - high_school_computer_science | 0 | none | None | acc | 0.6400 | ± | 0.0482 |
| - high_school_mathematics | 0 | none | None | acc | 0.3444 | ± | 0.0290 |
| - high_school_physics | 0 | none | None | acc | 0.3046 | ± | 0.0376 |
| - high_school_statistics | 0 | none | None | acc | 0.4676 | ± | 0.0340 |
| - machine_learning | 0 | none | None | acc | 0.4821 | ± | 0.0474 |
| Groups | Version | Filter | n-shot | Metric | Value | Stderr | |
|---|---|---|---|---|---|---|---|
| mmlu | N/A | none | 0 | acc | 0.6273 | ± | 0.0294 |
| - humanities | N/A | none | None | acc | 0.6520 | ± | 0.0233 |
| - other | N/A | none | None | acc | 0.6461 | ± | 0.0299 |
| - social_sciences | N/A | none | None | acc | 0.7024 | ± | 0.0276 |
| - stem | N/A | none | None | acc | 0.5087 | ± | 0.0375 |
real 8m7.630s
user 4m50.151s
sys 0m15.217s