Created
September 9, 2025 04:01
-
-
Save davidberard98/df769c0789b813853e7b448a2bb6517a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| metric | side_a_speedup | side_b_speedup | ratio_b_over_a | improvement_percent | |
|---|---|---|---|---|---|
| tritonbench_gemm_fwd[x_(2816, 2816, 2816)-triton_tutorial_matmul]_speedup | 0.9140271292993862 | 0.6970010513896804 | 0.7625605729273494 | -23.743942707265063 | |
| tritonbench_layer_norm_bwd[x_(4096, 7680)-liger_layer_norm]_speedup | 0.9977561823241726 | 0.7614043701113448 | 0.7631166647724796 | -23.688333522752036 | |
| tritonbench_gemm_fwd[x_(2688, 2688, 2688)-triton_tutorial_matmul]_speedup | 0.9499290949872015 | 0.7257081424105393 | 0.7639603273971904 | -23.603967260280957 | |
| tritonbench_layer_norm_bwd[x_(4096, 6656)-liger_layer_norm]_speedup | 0.9266213852456668 | 0.7133837487030577 | 0.7698761976164888 | -23.012380238351117 | |
| tritonbench_layer_norm_bwd[x_(4096, 7168)-liger_layer_norm]_speedup | 0.9561411985507111 | 0.7366730997908861 | 0.770464760756583 | -22.953523924341702 | |
| tritonbench_gemm_fwd[x_(4096, 4096, 4096)-triton_tutorial_matmul]_speedup | 0.932466383549378 | 0.7262403068410183 | 0.7788380574928906 | -22.11619425071094 | |
| tritonbench_gemm_fwd[x_(3328, 3328, 3328)-triton_tutorial_matmul]_speedup | 0.8662131367630359 | 0.681059896289598 | 0.7862497893239766 | -21.37502106760234 | |
| tritonbench_gemm_fwd[x_(3584, 3584, 3584)-triton_tutorial_matmul]_speedup | 0.9239252216948058 | 0.733860809232369 | 0.7942859357018185 | -20.57140642981815 | |
| tritonbench_gemm_fwd[x_(3456, 3456, 3456)-triton_tutorial_matmul]_speedup | 0.9320134486363466 | 0.7484312598877756 | 0.8030262449354406 | -19.697375506455938 | |
| tritonbench_layer_norm_bwd[x_(4096, 5632)-liger_layer_norm]_speedup | 0.930560755740277 | 0.751740837248541 | 0.8078363853314643 | -19.21636146685357 | |
| tritonbench_int4_gemm_fwd[x_(16, 1, 1280, 8192)-triton]_speedup | 0.28203342708712165 | 0.22792606435496146 | 0.8081526601616406 | -19.184733983835944 | |
| tritonbench_layer_norm_bwd[x_(4096, 4608)-liger_layer_norm]_speedup | 0.8456937502177758 | 0.684227485420111 | 0.8090724156870198 | -19.09275843129802 | |
| tritonbench_gemm_fwd[x_(2048, 2048, 2048)-triton_tutorial_matmul]_speedup | 0.9319148704288177 | 0.7546689412125357 | 0.8098045917705736 | -19.019540822942638 | |
| tritonbench_gemm_fwd[x_(3840, 3840, 3840)-triton_tutorial_matmul]_speedup | 0.8105280714900104 | 0.6570415943890352 | 0.8106339774033763 | -18.936602259662372 | |
| tritonbench_gemm_fwd[x_(3968, 3968, 3968)-triton_tutorial_matmul]_speedup | 0.9490344862049137 | 0.7725833045822535 | 0.8140729507857303 | -18.59270492142697 | |
| tritonbench_layer_norm_bwd[x_(4096, 5120)-liger_layer_norm]_speedup | 0.8879154716173385 | 0.7232902955772791 | 0.8145936394821518 | -18.54063605178482 | |
| tritonbench_gemm_fwd[x_(3712, 3712, 3712)-triton_tutorial_matmul]_speedup | 0.9085162379243885 | 0.7425105464487084 | 0.8172782339532648 | -18.27217660467352 | |
| tritonbench_gemm_fwd[x_(2560, 2560, 2560)-triton_tutorial_matmul]_speedup | 0.7961751678560129 | 0.6511723660373726 | 0.8178757543906923 | -18.212424560930774 | |
| tritonbench_gemm_fwd[x_(1920, 1920, 1920)-triton_tutorial_matmul]_speedup | 0.9405162764891412 | 0.7780898839661873 | 0.8273008170264992 | -17.269918297350085 | |
| tritonbench_int4_gemm_fwd[x_(4, 1, 1280, 8192)-triton]_speedup | 0.1679442538532255 | 0.1389541663585856 | 0.8273826771115627 | -17.261732288843735 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_speedup | 68.5115324058302 | 56.9197164006804 | 0.830804893744234 | -16.919510625576596 | |
| tritonbench_gemm_fwd[x_(3200, 3200, 3200)-triton_tutorial_matmul]_speedup | 0.7812828527457611 | 0.65284305712294 | 0.8356039747046425 | -16.439602529535748 | |
| tritonbench_layer_norm_bwd[x_(4096, 6144)-liger_layer_norm]_speedup | 0.9004269870019902 | 0.7682271948151851 | 0.8531809973544108 | -14.681900264558923 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_speedup | 69.96805342165906 | 60.322940955645684 | 0.8621497670102728 | -13.785023298972721 | |
| tritonbench_layer_norm_bwd[x_(4096, 8192)-liger_layer_norm]_speedup | 0.8485369968865842 | 0.7324222510850372 | 0.8631588885015147 | -13.684111149848533 | |
| tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 1024)-triton]_speedup | 0.5294117507190358 | 0.4653614445949409 | 0.8790160852359187 | -12.098391476408132 | |
| tritonbench_gemm_fwd[x_(3072, 3072, 3072)-triton_tutorial_matmul]_speedup | 0.7059819343391301 | 0.6206473856851741 | 0.879126441480634 | -12.087355851936598 | |
| tritonbench_gemm_fwd[x_(2944, 2944, 2944)-triton_tutorial_matmul]_speedup | 0.739337929071021 | 0.651223991104509 | 0.880820482080194 | -11.917951791980597 | |
| tritonbench_low_mem_dropout_fwd[x_131072-triton_dropout]_speedup | 1.1411764333272758 | 1.0159574130472777 | 0.8902719889553776 | -10.972801104462238 | |
| tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 3584)-triton]_speedup | 0.40917662245713743 | 0.3694379307298832 | 0.9028813242344582 | -9.711867576554178 | |
| tritonbench_gemm_fwd[x_average-triton_tutorial_matmul]_speedup | 0.8152680793731638 | 0.737620404647682 | 0.9047581075599294 | -9.524189244007065 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 256, 256, 64)-triton_tutorial_flash_v2]_speedup | 1.1715653142489653 | 1.0602748521449032 | 0.9050070356722664 | -9.499296432773363 | |
| tritonbench_int4_gemm_fwd[x_(64, 1, 1280, 8192)-triton]_speedup | 0.6068667108994487 | 0.5493393127889478 | 0.9052058762207628 | -9.479412377923724 | |
| tritonbench_low_mem_dropout_fwd[x_128-triton_dropout]_speedup | 1.3142857083461572 | 1.1920529967260118 | 0.9069968494339348 | -9.300315056606523 | |
| tritonbench_gemm_fwd[x_(768, 768, 768)-triton_tutorial_matmul]_speedup | 0.7506631543411684 | 0.6814621521028467 | 0.9078135088446467 | -9.218649115535326 | |
| tritonbench_gemm_fwd[x_(1664, 1664, 1664)-triton_tutorial_matmul]_speedup | 0.7782101619705566 | 0.7095238128235636 | 0.9117380464769724 | -8.826195352302758 | |
| tritonbench_int4_gemm_fwd[x_(16, 1, 7168, 8192)-triton]_speedup | 0.7101668982371787 | 0.6478675515015323 | 0.912275062537708 | -8.772493746229204 | |
| tritonbench_int4_gemm_fwd[x_(4, 1, 7168, 8192)-triton]_speedup | 0.32786386474682905 | 0.3005894020035852 | 0.9168116231280786 | -8.318837687192138 | |
| tritonbench_flex_attention_fwd[x_average-compiled]_speedup | 36.83800164133012 | 33.99696962589044 | 0.9228776836729329 | -7.712231632706712 | |
| tritonbench_gemm_fwd[x_(896, 896, 896)-triton_tutorial_matmul]_speedup | 0.7410071781770686 | 0.6858513020404576 | 0.9255663402987561 | -7.443365970124393 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_speedup | 64.86869697263923 | 60.380211059209174 | 0.9308065966652107 | -6.91934033347893 | |
| tritonbench_gemm_fwd[x_(1536, 1536, 1536)-triton_tutorial_matmul]_speedup | 0.730069981078846 | 0.6806282770379919 | 0.9322781304227951 | -6.7721869577204945 | |
| tritonbench_rms_norm_bwd[x_(2048, 32768)-liger_rms]_speedup | 0.41037518147105817 | 0.3833887990796067 | 0.934239730836757 | -6.576026916324295 | |
| tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 3584)-triton]_speedup | 0.8228571246952967 | 0.7712766378318567 | 0.9373153791643474 | -6.268462083565263 | |
| tritonbench_rope_fwd[x_(512, 2048)-liger_rotary_pos_emb]_speedup | 2.9203251678557436 | 2.7407974538612794 | 0.9385247519794917 | -6.14752480205083 | |
| tritonbench_gemm_fwd[x_(1792, 1792, 1792)-triton_tutorial_matmul]_speedup | 0.872135045701087 | 0.8224718713121524 | 0.943055637273455 | -5.694436272654501 | |
| tritonbench_layer_norm_bwd[x_average-liger_layer_norm]_speedup | 0.82141756801026 | 0.7750321206641942 | 0.9435300033107079 | -5.646999668929209 | |
| tritonbench_gemm_fwd[x_(2432, 2432, 2432)-triton_tutorial_matmul]_speedup | 0.7975687041153187 | 0.7538847120719476 | 0.9452285529535336 | -5.4771447046466415 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 13312, 6656)-_triton]_speedup | 1.0857333658872872 | 1.0339598273810182 | 0.952314684126928 | -4.768531587307201 | |
| tritonbench_int4_gemm_fwd[x_(1, 1, 1280, 8192)-triton]_speedup | 0.1614145565778853 | 0.15407071496453845 | 0.9545032259231011 | -4.54967740768989 | |
| tritonbench_int4_gemm_fwd[x_(1, 1, 7168, 8192)-triton]_speedup | 0.2988505865438298 | 0.28612386109569515 | 0.9574144203786994 | -4.258557962130061 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 8192, 2048)-_triton]_speedup | 1.26924851413472 | 1.2155476980927016 | 0.9576908576657837 | -4.230914233421634 | |
| tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 1024)-triton]_speedup | 1.7644882168603548 | 1.6909518005694775 | 0.9583242236540834 | -4.167577634591657 | |
| tritonbench_welford_fwd[x_1536-test_welford]_speedup | 0.5788942570534981 | 0.555729510384026 | 0.9599844939084767 | -4.001550609152327 | |
| tritonbench_jsd_bwd[x_(4, 2048, 4096)-liger_jsd]_speedup | 5.84087672479919 | 5.617193698195862 | 0.9617038610567461 | -3.82961389432539 | |
| tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 3584)-triton]_speedup | 0.36904025620309866 | 0.355106892630371 | 0.9622443260903777 | -3.775567390962231 | |
| tritonbench_swiglu_fwd[x_(4, 1024, 4096)-liger_swiglu]_speedup | 1.0462246840051916 | 1.0129992106039154 | 0.9682425066916971 | -3.175749330830291 | |
| tritonbench_int4_gemm_fwd[x_(64, 1, 7168, 8192)-triton]_speedup | 1.6631204792164707 | 1.6106007885767748 | 0.9684209945725406 | -3.1579005427459395 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 8192, 13312)-_triton]_speedup | 1.1669015889187855 | 1.130393005476213 | 0.9687132284425114 | -3.12867715574886 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 1024)-liger_embedding]_speedup | 0.9918367518256614 | 0.9618420457051134 | 0.9697584243925856 | -3.024157560741436 | |
| tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 3584)-triton]_speedup | 1.8534189738874451 | 1.797447509105221 | 0.9698009648272744 | -3.0199035172725597 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16, 13312, 13312)-_triton]_speedup | 0.8056885030248641 | 0.7823240512278228 | 0.9710006389450486 | -2.899936105495138 | |
| tritonbench_softmax_fwd[x_(4096, 2304)-triton_softmax]_speedup | 4.389227513387762 | 4.274605855094556 | 0.9738856876423942 | -2.611431235760575 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 1024)-liger_embedding]_speedup | 1.5752113151232225 | 1.5345444286760197 | 0.9741832184312226 | -2.5816781568777425 | |
| tritonbench_gemm_fwd[x_(2304, 2304, 2304)-triton_tutorial_matmul]_speedup | 0.6848072399241188 | 0.6679774944336473 | 0.9754241127878023 | -2.4575887212197722 | |
| tritonbench_int4_gemm_fwd[x_average-triton]_speedup | 1.4782812454926078 | 1.4428288824136488 | 0.976017849656785 | -2.3982150343215047 | |
| tritonbench_rope_bwd[x_(8192, 1024)-liger_rotary_pos_emb]_speedup | 3.779593710528573 | 3.68905360892575 | 0.9760450173915225 | -2.3954982608477504 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 256, 256, 64)-triton_tutorial_flash_v2]_speedup | 1.5866013003515287 | 1.5505617599604 | 0.9772850681622763 | -2.271493183772366 | |
| tritonbench_softmax_fwd[x_(4096, 2688)-triton_softmax]_speedup | 4.6219965357295525 | 4.517985801308563 | 0.9774965788881597 | -2.250342111184034 | |
| tritonbench_softmax_fwd[x_(4096, 2816)-triton_softmax]_speedup | 4.658969883582606 | 4.554883537383809 | 0.9776589356017132 | -2.234106439828676 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 4096, 16384)-_triton]_speedup | 1.2012822160879075 | 1.1747973730388783 | 0.977952855129014 | -2.2047144870986024 | |
| tritonbench_layer_norm_fwd[x_(4096, 2048)-liger_layer_norm]_speedup | 1.2887168185753521 | 1.263274276167788 | 0.98025746072307 | -1.9742539276930038 | |
| tritonbench_layer_norm_fwd[x_(4096, 3072)-liger_layer_norm]_speedup | 1.3011648021651314 | 1.275693316849076 | 0.9804240898050186 | -1.9575910194981394 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 16384)-liger_cross_entropy_loss]_speedup | 0.7846806722903867 | 0.770913718905545 | 0.9824553428279842 | -1.754465717201581 | |
| tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 1024)-triton]_speedup | 2.305555954814678 | 2.265124604925214 | 0.9824635139281562 | -1.753648607184377 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(32, 2304, 16384)-_triton]_speedup | 0.7328071938281417 | 0.7200729762640198 | 0.9826226902910176 | -1.7377309708982436 | |
| tritonbench_addmm_fwd[x_(19735, 512, 1536)-triton_addmm]_speedup | 0.9567631388377106 | 0.9402866337972583 | 0.9827789090406763 | -1.722109095932367 | |
| tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 1024)-triton]_speedup | 2.3054914096591363 | 2.2664821713213605 | 0.9830798596020173 | -1.692014039798273 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 4096)-liger_embedding]_speedup | 1.7441508259835 | 1.7148204380701824 | 0.9831835713538257 | -1.6816428646174275 | |
| tritonbench_int4_gemm_fwd[x_(1, 4096, 7168, 8192)-triton]_speedup | 2.2401300715511874 | 2.202515916613222 | 0.9832089415629696 | -1.6791058437030437 | |
| tritonbench_softmax_fwd[x_(4096, 2560)-triton_softmax]_speedup | 4.579807852034335 | 4.5032803590872685 | 0.9832902393682142 | -1.6709760631785753 | |
| tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 1024)-triton]_speedup | 2.2835847346665705 | 2.247043149957335 | 0.9839981481070065 | -1.6001851892993524 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 512, 512, 64)-triton_tutorial_flash_v2]_speedup | 1.2648766191909557 | 1.2446808106601426 | 0.9840333766753228 | -1.5966623324677243 | |
| tritonbench_welford_fwd[x_2048-test_welford]_speedup | 0.5732197634009926 | 0.5640685186388528 | 0.9840353641893919 | -1.596463581060814 | |
| tritonbench_softmax_fwd[x_(4096, 3328)-triton_softmax]_speedup | 4.7831514881183725 | 4.7068702992869165 | 0.9840521068544571 | -1.5947893145542902 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_speedup | 14.490887938291525 | 14.261208026571001 | 0.9841500456908783 | -1.5849954309121728 | |
| tritonbench_welford_fwd[x_4096-test_welford]_speedup | 0.5831165809963016 | 0.5738943549924704 | 0.9841845930910175 | -1.5815406908982532 | |
| tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 1024)-triton]_speedup | 2.3047366206535775 | 2.2685297896745937 | 0.9842902522333696 | -1.5709747766630366 | |
| tritonbench_int4_gemm_fwd[x_(16, 4096, 7168, 8192)-triton]_speedup | 2.252789647456481 | 2.217507372841485 | 0.9843384069813925 | -1.5661593018607478 | |
| tritonbench_int4_gemm_fwd[x_(4, 4096, 1280, 8192)-triton]_speedup | 2.2308745214777237 | 2.196160367845063 | 0.9844392173121123 | -1.5560782687887742 | |
| tritonbench_int4_gemm_fwd[x_(4, 4096, 7168, 8192)-triton]_speedup | 2.2497090438986955 | 2.21495932247359 | 0.9845536819441837 | -1.544631805581631 | |
| tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 3584)-triton]_speedup | 2.2398816432538218 | 2.2057306708117443 | 0.9847532245532102 | -1.5246775446789762 | |
| tritonbench_int4_gemm_fwd[x_(16, 4096, 1280, 8192)-triton]_speedup | 2.2445335020741055 | 2.210451156546966 | 0.98481539905925 | -1.5184600940750026 | |
| tritonbench_int4_gemm_fwd[x_(64, 4096, 7168, 8192)-triton]_speedup | 2.253285955307447 | 2.2195966746560525 | 0.9850488214458347 | -1.4951178554165279 | |
| tritonbench_jsd_bwd[x_(4, 2048, 8192)-liger_jsd]_speedup | 6.232600341610404 | 6.140791662763295 | 0.9852696027636858 | -1.4730397236314197 | |
| tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 3584)-triton]_speedup | 2.255883735393586 | 2.2231088371781573 | 0.9854713708418531 | -1.45286291581469 | |
| tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 3584)-triton]_speedup | 2.2584417633926916 | 2.226136226144642 | 0.9856956518553219 | -1.430434814467807 | |
| tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 3584)-triton]_speedup | 2.259118018304066 | 2.226830591463728 | 0.9857079503688009 | -1.4292049631199122 | |
| tritonbench_int4_gemm_fwd[x_(64, 4096, 1280, 8192)-triton]_speedup | 2.246857902367221 | 2.215660836050993 | 0.9861152473045316 | -1.3884752695468405 | |
| tritonbench_rms_norm_fwd[x_(2048, 8192)-liger_rms]_speedup | 4.1390307845034275 | 4.084436100164419 | 0.9868097902186639 | -1.3190209781336115 | |
| tritonbench_gemm_fwd[x_(512, 512, 512)-triton_tutorial_matmul]_speedup | 0.8659793628841556 | 0.8549618867547684 | 0.9872774380064978 | -1.272256199350219 | |
| tritonbench_addmm_fwd[x_(20211, 512, 1536)-triton_addmm]_speedup | 0.9767350221083531 | 0.9643139509678258 | 0.987283069758556 | -1.2716930241444047 | |
| tritonbench_int4_gemm_fwd[x_(1, 4096, 1280, 8192)-triton]_speedup | 2.1639886828643653 | 2.13663507443488 | 0.987359634250361 | -1.2640365749639004 | |
| tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 1024)-triton]_speedup | 0.7797148045887564 | 0.7700617692883619 | 0.987619787076525 | -1.238021292347502 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(8192, 4096)-liger_lm_head_ce]_speedup | 0.2976182658950201 | 0.29406473800016614 | 0.9880601149120752 | -1.1939885087924762 | |
| tritonbench_softmax_fwd[x_(4096, 4608)-triton_softmax]_speedup | 4.806057230774488 | 4.749138880806348 | 0.9881569554345553 | -1.1843044565444716 | |
| tritonbench_jsd_bwd[x_average-liger_jsd]_speedup | 5.938695265363019 | 5.868801187512297 | 0.988230735081092 | -1.1769264918907951 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_speedup | 13.553821439634765 | 13.395468996890449 | 0.9883167678245152 | -1.1683232175484837 | |
| tritonbench_low_mem_dropout_fwd[x_32-triton_dropout]_speedup | 1.0519480715829528 | 1.0397351127402403 | 0.9883901504526411 | -1.1609849547358886 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_speedup | 14.11732890413987 | 13.95664601650124 | 0.9886180389555486 | -1.1381961044451439 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(32, 8192, 13312)-_triton]_speedup | 0.5186952939068512 | 0.5134849392060832 | 0.989954883412334 | -1.0045116587666048 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(1, 8192, 16384)-_triton]_speedup | 0.6522481508217816 | 0.6458392681672043 | 0.9901741650834845 | -0.9825834916515452 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 1024, 1024, 64)-triton_tutorial_flash_v2]_speedup | 1.0528501360605702 | 1.0425385725815968 | 0.9902060482059146 | -0.9793951794085376 | |
| tritonbench_softmax_fwd[x_(4096, 256)-triton_softmax]_speedup | 3.697761068414634 | 3.66420665910683 | 0.9909257497477548 | -0.9074250252245175 | |
| tritonbench_welford_fwd[x_6144-test_welford]_speedup | 0.6511465258915132 | 0.6455501462419818 | 0.9914053451458269 | -0.85946548541731 | |
| tritonbench_jsd_bwd[x_(4, 2048, 16384)-liger_jsd]_speedup | 6.047223104930588 | 5.996224194120004 | 0.9915665570914687 | -0.8433442908531319 | |
| tritonbench_geglu_fwd[x_(8, 2048, 4096)-liger_geglu]_speedup | 0.9768367639626843 | 0.968691766854009 | 0.9916618646951474 | -0.8338135304852612 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 4096, 4096, 64)-triton_tutorial_flash_v2]_speedup | 0.8395386446203711 | 0.8326111147902348 | 0.9917484086354728 | -0.8251591364527244 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 8192)-liger_embedding]_speedup | 1.1672694439894962 | 1.1577173077224865 | 0.9918166826723722 | -0.8183317327627782 | |
| tritonbench_swiglu_bwd[x_(4, 1024, 4096)-liger_swiglu]_speedup | 1.0070808227345693 | 0.9990891775665965 | 0.9920645443865441 | -0.7935455613455877 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 65536)-liger_embedding]_speedup | 1.6027353473422985 | 1.5903428800535209 | 0.9922679266359682 | -0.7732073364031811 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 2048)-liger_embedding]_speedup | 1.623049557424206 | 1.6107474270181354 | 0.9924203605799972 | -0.7579639420002837 | |
| tritonbench_gemm_fwd[x_(2176, 2176, 2176)-triton_tutorial_matmul]_speedup | 0.6703096817260172 | 0.6652542265385779 | 0.9924580304816398 | -0.754196951836017 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 4096)-liger_cross_entropy_loss]_speedup | 1.459957710600525 | 1.4493076339893123 | 0.9927052156826981 | -0.7294784317301906 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_speedup | 12.574965173923344 | 12.4840832431269 | 0.9927727886686394 | -0.7227211331360617 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 2048)-liger_embedding]_speedup | 0.7019496773187089 | 0.6968945382602288 | 0.9927984309676021 | -0.7201569032397881 | |
| tritonbench_fused_linear_jsd_bwd[x_(2048, 4096)-liger_lm_head_jsd]_speedup | 72.34693312118209 | 71.85800450962729 | 0.9932418889030744 | -0.675811109692559 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 131072)-liger_embedding]_speedup | 1.3783263689131826 | 1.3693405310390288 | 0.9934806167270533 | -0.6519383272946655 | |
| tritonbench_layer_norm_fwd[x_(4096, 4096)-liger_layer_norm]_speedup | 1.3869731407708001 | 1.3780330985252722 | 0.9935542787508058 | -0.6445721249194181 | |
| tritonbench_fused_linear_jsd_fwd[x_(1024, 4096)-liger_lm_head_jsd]_speedup | 0.18110404205443445 | 0.1799668713781164 | 0.9937208984216032 | -0.627910157839684 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 4096)-liger_embedding]_speedup | 1.1722008243103719 | 1.1648690255906224 | 0.9937452708036928 | -0.6254729196307207 | |
| tritonbench_softmax_fwd[x_(4096, 10624)-triton_softmax]_speedup | 4.806125051945107 | 4.7768660254969895 | 0.9939121379215309 | -0.608786207846912 | |
| tritonbench_rms_norm_fwd[x_(2048, 16384)-liger_rms]_speedup | 4.014764900047293 | 3.9906541344303212 | 0.9939944763349187 | -0.6005523665081336 | |
| tritonbench_softmax_fwd[x_(4096, 10496)-triton_softmax]_speedup | 4.8002694057591 | 4.771558596002546 | 0.9940189169961776 | -0.5981083003822385 | |
| tritonbench_jsd_bwd[x_(4, 2048, 131072)-liger_jsd]_speedup | 5.86343850762021 | 5.829686144550078 | 0.9942435888043736 | -0.5756411195626421 | |
| tritonbench_softmax_fwd[x_(4096, 8960)-triton_softmax]_speedup | 4.84670627864951 | 4.81916866319178 | 0.9943182825872826 | -0.5681717412717391 | |
| tritonbench_layer_norm_fwd[x_(4096, 3584)-liger_layer_norm]_speedup | 1.3216080624587836 | 1.3142655553607079 | 0.9944442627836158 | -0.5555737216384249 | |
| tritonbench_softmax_fwd[x_(4096, 12160)-triton_softmax]_speedup | 4.736299715750312 | 4.71045922144424 | 0.9945441598173905 | -0.5455840182609473 | |
| tritonbench_softmax_fwd[x_(4096, 11136)-triton_softmax]_speedup | 4.781424641491178 | 4.755690807487796 | 0.9946179567947021 | -0.5382043205297893 | |
| tritonbench_softmax_fwd[x_(4096, 11392)-triton_softmax]_speedup | 4.770553221641905 | 4.745044676254643 | 0.9946529167159186 | -0.53470832840814 | |
| tritonbench_rms_norm_fwd[x_(2048, 4096)-liger_rms]_speedup | 4.190583109611227 | 4.168903787619387 | 0.9948266574305332 | -0.5173342569466777 | |
| tritonbench_softmax_fwd[x_(4096, 12544)-triton_softmax]_speedup | 4.733559912920285 | 4.709968617746254 | 0.9950161621257527 | -0.49838378742472766 | |
| tritonbench_softmax_fwd[x_(4096, 11776)-triton_softmax]_speedup | 4.746322404460557 | 4.724394296838585 | 0.9953799793285505 | -0.46200206714495096 | |
| tritonbench_embedding_bwd[x_average-liger_embedding]_speedup | 1.382536358109996 | 1.376477924137093 | 0.9956178845226282 | -0.4382115477371773 | |
| tritonbench_softmax_fwd[x_(4096, 10880)-triton_softmax]_speedup | 4.785956091349303 | 4.765177147975516 | 0.995658350603896 | -0.43416493961040414 | |
| tritonbench_softmax_fwd[x_(4096, 6144)-triton_softmax]_speedup | 4.7547759504036575 | 4.734423367595345 | 0.995719549560146 | -0.42804504398540333 | |
| tritonbench_softmax_fwd[x_(4096, 7040)-triton_softmax]_speedup | 4.734405371199078 | 4.714174240784348 | 0.9957267853450398 | -0.42732146549602223 | |
| tritonbench_softmax_fwd[x_(4096, 6912)-triton_softmax]_speedup | 4.729772710337059 | 4.7095881305073 | 0.9957324419024101 | -0.4267558097589941 | |
| tritonbench_softmax_fwd[x_(4096, 11264)-triton_softmax]_speedup | 4.762084479070741 | 4.742306744154943 | 0.9958468324107393 | -0.41531675892606534 | |
| tritonbench_swiglu_fwd[x_average-liger_swiglu]_speedup | 1.0854025059401498 | 1.0809491805245712 | 0.9958970746877711 | -0.4102925312228889 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_average-liger_lm_head_ce]_speedup | 0.3033311701457682 | 0.3020976783382171 | 0.9959335144919055 | -0.40664855080945195 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(16384, 4096)-liger_lm_head_ce]_speedup | 0.3276516362325097 | 0.32635314713300895 | 0.9960369827099557 | -0.3963017290044335 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 32768)-liger_cross_entropy_loss]_speedup | 2.067881461172406 | 2.0603816821306244 | 0.9963732065001785 | -0.36267934998215345 | |
| tritonbench_layer_norm_fwd[x_(4096, 6144)-liger_layer_norm]_speedup | 1.4937146830647252 | 1.4884832313371217 | 0.9964976900964313 | -0.3502309903568701 | |
| tritonbench_softmax_fwd[x_(4096, 7296)-triton_softmax]_speedup | 4.743258721709375 | 4.72734150216041 | 0.996644243866329 | -0.3355756133671006 | |
| tritonbench_geglu_fwd[x_(8, 8192, 4096)-liger_geglu]_speedup | 1.010679097558023 | 1.0073528672373722 | 0.9967089154918829 | -0.32910845081171036 | |
| tritonbench_softmax_fwd[x_(4096, 5376)-triton_softmax]_speedup | 4.704591109168532 | 4.68945039629067 | 0.9967817154506043 | -0.3218284549395656 | |
| tritonbench_jsd_bwd[x_(4, 2048, 65536)-liger_jsd]_speedup | 5.83901322404002 | 5.820836903427041 | 0.9968870903497624 | -0.3112909650237583 | |
| tritonbench_softmax_fwd[x_(4096, 8064)-triton_softmax]_speedup | 4.718631441876037 | 4.704225424210067 | 0.9969469923973883 | -0.3053007602611735 | |
| tritonbench_fused_linear_jsd_fwd[x_(8192, 4096)-liger_lm_head_jsd]_speedup | 0.4368753069040728 | 0.43560309470064196 | 0.9970879283325799 | -0.2912071667420135 | |
| tritonbench_layer_norm_fwd[x_(4096, 2560)-liger_layer_norm]_speedup | 1.2999036121530863 | 1.2961538616529273 | 0.9971153626583528 | -0.2884637341647234 | |
| tritonbench_layer_norm_fwd[x_(4096, 7168)-liger_layer_norm]_speedup | 1.5472921493625484 | 1.5428789436691595 | 0.9971477877043406 | -0.2852212295659351 | |
| tritonbench_layer_norm_bwd[x_(4096, 3584)-liger_layer_norm]_speedup | 0.8482097369719747 | 0.8458383403262112 | 0.9972042331720582 | -0.27957668279418035 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 32768)-liger_embedding]_speedup | 1.8581105588508908 | 1.8529918422700513 | 0.9972452034371921 | -0.27547965628078597 | |
| tritonbench_softmax_fwd[x_(4096, 4864)-triton_softmax]_speedup | 4.8053096718337125 | 4.79228639118974 | 0.9972898144899364 | -0.2710185510063634 | |
| tritonbench_softmax_fwd[x_(4096, 5248)-triton_softmax]_speedup | 4.720122584500994 | 4.707403211173603 | 0.99730528750055 | -0.26947124994499694 | |
| tritonbench_softmax_fwd[x_(4096, 3712)-triton_softmax]_speedup | 4.708535787733428 | 4.696132768140148 | 0.9973658436183936 | -0.2634156381606356 | |
| tritonbench_rope_bwd[x_(8192, 8192)-liger_rotary_pos_emb]_speedup | 3.683395119523763 | 3.674097639565173 | 0.9974758396379175 | -0.25241603620824726 | |
| tritonbench_layer_norm_bwd[x_(4096, 1024)-liger_layer_norm]_speedup | 0.5290930064141324 | 0.5277857068921588 | 0.9975291687734947 | -0.24708312265052657 | |
| tritonbench_rope_fwd[x_(8192, 16384)-liger_rotary_pos_emb]_speedup | 3.059157084130118 | 3.0518462106450714 | 0.9976101673487208 | -0.23898326512792334 | |
| tritonbench_fused_linear_jsd_fwd[x_average-liger_lm_head_jsd]_speedup | 0.3200001155405728 | 0.319252002746645 | 0.9976621483630904 | -0.23378516369095825 | |
| tritonbench_layer_norm_fwd[x_(4096, 15872)-liger_layer_norm]_speedup | 1.6072518451964959 | 1.603581081261356 | 0.9977161239875938 | -0.22838760124062052 | |
| tritonbench_cross_entropy_bwd[x_average-liger_cross_entropy_loss]_speedup | 1.8523424396283736 | 1.8485287262616856 | 0.9979411402097697 | -0.20588597902303496 | |
| tritonbench_softmax_fwd[x_(4096, 1920)-triton_softmax]_speedup | 4.282816445123302 | 4.274021224045633 | 0.9979463931759943 | -0.20536068240056826 | |
| tritonbench_layer_norm_bwd[x_(4096, 4096)-liger_layer_norm]_speedup | 0.9220282487249086 | 0.9201527684469013 | 0.9979659188527022 | -0.20340811472977505 | |
| tritonbench_layer_norm_fwd[x_(4096, 13824)-liger_layer_norm]_speedup | 1.5635449008164877 | 1.5604263183935931 | 0.9980054410837412 | -0.19945589162587707 | |
| tritonbench_rope_fwd[x_average-liger_rotary_pos_emb]_speedup | 2.8729174114752363 | 2.8672685862944247 | 0.9980337669442746 | -0.19662330557254082 | |
| tritonbench_softmax_fwd[x_(4096, 3456)-triton_softmax]_speedup | 4.769346262810168 | 4.760299638205606 | 0.9981031730333559 | -0.18968269666440607 | |
| tritonbench_kl_div_fwd[x_(8, 512, 4096)-liger_kl_div]_speedup | 3.3988630452880417 | 3.392518838391171 | 0.9981334326178085 | -0.18665673821914863 | |
| tritonbench_fused_linear_jsd_fwd[x_(2048, 4096)-liger_lm_head_jsd]_speedup | 0.2938437998056872 | 0.2933265062482356 | 0.9982395627956292 | -0.17604372043708016 | |
| tritonbench_swiglu_fwd[x_(4, 2048, 4096)-liger_swiglu]_speedup | 1.0362195709580697 | 1.0344047632594906 | 0.9982486263052326 | -0.17513736947674108 | |
| tritonbench_softmax_fwd[x_(4096, 12416)-triton_softmax]_speedup | 4.733883915413517 | 4.725776992500469 | 0.9982874690089777 | -0.17125309910223097 | |
| tritonbench_softmax_fwd[x_(4096, 5632)-triton_softmax]_speedup | 4.739318097960334 | 4.73145020272092 | 0.9983398676609615 | -0.16601323390385003 | |
| tritonbench_gemm_fwd[x_(1280, 1280, 1280)-triton_tutorial_matmul]_speedup | 0.7659574654732583 | 0.7647058640428881 | 0.9983659648390569 | -0.16340351609430703 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 32768)-liger_embedding]_speedup | 1.318370903513246 | 1.3162619174307915 | 0.9984003089898037 | -0.15996910101963024 | |
| tritonbench_kl_div_bwd[x_(8, 512, 65536)-liger_kl_div]_speedup | 1.049876268629992 | 1.048225737674776 | 0.9984278804993183 | -0.15721195006817013 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 65536)-liger_cross_entropy_loss]_speedup | 2.102465675464798 | 2.099592500744546 | 0.9986334260988035 | -0.1366573901196544 | |
| tritonbench_layer_norm_bwd[x_(4096, 1536)-liger_layer_norm]_speedup | 0.7695472708107032 | 0.7685243941703278 | 0.9986708072665922 | -0.13291927334078135 | |
| tritonbench_kl_div_bwd[x_(8, 512, 32768)-liger_kl_div]_speedup | 1.040329815222385 | 1.038960602369394 | 0.9986838665652409 | -0.13161334347591147 | |
| tritonbench_swiglu_bwd[x_(4, 8192, 4096)-liger_swiglu]_speedup | 1.0440015237717635 | 1.0427002368110767 | 0.9987535583703121 | -0.12464416296879 | |
| tritonbench_softmax_fwd[x_(4096, 1152)-triton_softmax]_speedup | 3.702936239789029 | 3.698629847531816 | 0.9988370331060688 | -0.11629668939312499 | |
| tritonbench_rope_fwd[x_(2048, 2048)-liger_rotary_pos_emb]_speedup | 3.1046127795441985 | 3.101151516079867 | 0.9988851223292202 | -0.11148776707797792 | |
| tritonbench_rope_bwd[x_(8192, 16384)-liger_rotary_pos_emb]_speedup | 3.950680118765617 | 3.946288582297316 | 0.9988884100113696 | -0.11115899886303682 | |
| tritonbench_layer_norm_fwd[x_(4096, 6656)-liger_layer_norm]_speedup | 1.5235389994650754 | 1.5218623737094041 | 0.9988995189776829 | -0.11004810223170791 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(32768, 4096)-liger_lm_head_ce]_speedup | 545.8503787322865 | 545.2638739867466 | 0.9989255210431438 | -0.10744789568561952 | |
| tritonbench_kl_div_bwd[x_(8, 512, 4096)-liger_kl_div]_speedup | 0.909461730400436 | 0.9085766402205332 | 0.999026797774643 | -0.09732022253570172 | |
| tritonbench_layer_norm_bwd[x_(4096, 15360)-liger_layer_norm]_speedup | 0.8131962749993826 | 0.8124056846552841 | 0.9990277988618441 | -0.09722011381558815 | |
| tritonbench_kl_div_fwd[x_(8, 512, 32768)-liger_kl_div]_speedup | 4.439526200000614 | 4.435228023848629 | 0.9990318389939935 | -0.0968161006006496 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 131072)-liger_cross_entropy_loss]_speedup | 2.1176590619772035 | 2.1156363561161515 | 0.9990448387573949 | -0.09551612426050848 | |
| tritonbench_rope_bwd[x_(8192, 4096)-liger_rotary_pos_emb]_speedup | 3.643590254083874 | 3.6402345627512167 | 0.9990790151749648 | -0.09209848250352204 | |
| tritonbench_softmax_fwd[x_(4096, 9856)-triton_softmax]_speedup | 4.7720544919120504 | 4.767679589064829 | 0.9990832244571732 | -0.09167755428267643 | |
| tritonbench_rope_fwd[x_(8192, 2048)-liger_rotary_pos_emb]_speedup | 2.76027415756235 | 2.7577556399826313 | 0.9990875842630274 | -0.09124157369726493 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(8, 8192, 6656)-_triton]_speedup | 0.6797052263812687 | 0.6791085997974049 | 0.9991222274587467 | -0.08777725412533188 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_average-liger_lm_head_ce]_speedup | 274.3719450071362 | 274.15314598855383 | 0.9992025459506193 | -0.0797454049380697 | |
| tritonbench_softmax_fwd[x_(4096, 9600)-triton_softmax]_speedup | 4.773057600077714 | 4.769677672575322 | 0.9992918737242272 | -0.07081262757727691 | |
| tritonbench_swiglu_bwd[x_(4, 2048, 4096)-liger_swiglu]_speedup | 1.025330008712361 | 1.024612403141267 | 0.9993001223362269 | -0.06998776637731075 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16, 4096, 6656)-_triton]_speedup | 0.6867133315738065 | 0.6862334443739752 | 0.9993011826365281 | -0.0698817363471882 | |
| tritonbench_softmax_fwd[x_(4096, 7552)-triton_softmax]_speedup | 4.7310522497496725 | 4.728140970486364 | 0.9993846444491365 | -0.061535555086345006 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 16384)-liger_cross_entropy_loss]_speedup | 1.8262628228609377 | 1.8252333287889049 | 0.9994362837269939 | -0.05637162730061096 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(8192, 4096)-liger_lm_head_ce]_speedup | 162.13003878566073 | 162.03914270537632 | 0.999439363112689 | -0.05606368873110057 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(16384, 4096)-liger_lm_head_ce]_speedup | 304.236982358219 | 304.0771152625708 | 0.9994745310237796 | -0.05254689762204112 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 65536)-liger_embedding]_speedup | 1.026657270685912 | 1.026154111739422 | 0.999509905631746 | -0.049009436825397934 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(4096, 4096)-liger_lm_head_ce]_speedup | 85.27038015237868 | 85.2324519995216 | 0.9995552013162214 | -0.04447986837785578 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(32768, 4096)-liger_lm_head_ce]_speedup | 0.34583686593487384 | 0.3456936632123823 | 0.9995859240682614 | -0.04140759317385889 | |
| tritonbench_softmax_fwd[x_(4096, 4096)-triton_softmax]_speedup | 4.843709408932528 | 4.841731192063543 | 0.9995915905142169 | -0.04084094857831344 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 131072)-liger_embedding]_speedup | 1.146237444033417 | 1.1457769464996226 | 0.9995982529307592 | -0.04017470692407876 | |
| tritonbench_geglu_bwd[x_(8, 8192, 4096)-liger_geglu]_speedup | 1.006057404513583 | 1.005675944262156 | 0.9996208364952976 | -0.037916350470235116 | |
| tritonbench_jsd_fwd[x_(4, 2048, 32768)-liger_jsd]_speedup | 0.5966337270213812 | 0.5964091993597125 | 0.9996236758810307 | -0.03763241189692623 | |
| tritonbench_softmax_fwd[x_(4096, 1024)-triton_softmax]_speedup | 3.891304442854918 | 3.889980425528898 | 0.9996597497457566 | -0.0340250254243446 | |
| tritonbench_softmax_fwd[x_(4096, 8320)-triton_softmax]_speedup | 4.813267037991012 | 4.811982893999397 | 0.9997332074074678 | -0.026679259253215015 | |
| tritonbench_softmax_fwd[x_(4096, 1792)-triton_softmax]_speedup | 4.238035319881156 | 4.237139320654531 | 0.9997885814631082 | -0.021141853689177204 | |
| tritonbench_layer_norm_fwd[x_(4096, 14848)-liger_layer_norm]_speedup | 1.5778408380350717 | 1.5775421284151177 | 0.999810684567953 | -0.01893154320470325 | |
| tritonbench_fused_linear_jsd_fwd[x_(4096, 4096)-liger_lm_head_jsd]_speedup | 0.36817731339809684 | 0.3681115386595862 | 0.9998213503762532 | -0.017864962374680715 | |
| tritonbench_softmax_fwd[x_(4096, 9728)-triton_softmax]_speedup | 4.7617134595589095 | 4.7609071199530915 | 0.9998306618798745 | -0.01693381201255395 | |
| tritonbench_jsd_bwd[x_(4, 2048, 32768)-liger_jsd]_speedup | 5.809019689177697 | 5.808074522017501 | 0.9998372931732428 | -0.01627068267572307 | |
| tritonbench_fused_linear_jsd_bwd[x_average-liger_lm_head_jsd]_speedup | 132.45637725539433 | 132.4414294735621 | 0.9998871493986022 | -0.01128506013977626 | |
| tritonbench_geglu_bwd[x_(8, 2048, 4096)-liger_geglu]_speedup | 1.005061170978461 | 1.004953022395565 | 0.9998923960192485 | -0.010760398075149169 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 16384)-liger_embedding]_speedup | 1.0995817766077767 | 1.0994639394464667 | 0.9998928345632705 | -0.010716543672950607 | |
| tritonbench_geglu_fwd[x_average-liger_geglu]_speedup | 0.9886903479750182 | 0.9886316639399085 | 0.9999406446768395 | -0.005935532316048153 | |
| tritonbench_jsd_fwd[x_(4, 2048, 8192)-liger_jsd]_speedup | 4.318643381007405 | 4.318417557455946 | 0.9999477096088896 | -0.005229039111043221 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 1024)-liger_embedding]_speedup | 0.6214689170662767 | 0.6214719929551952 | 1.0000049493849716 | 0.000494938497164199 | |
| tritonbench_kl_div_fwd[x_(8, 512, 131072)-liger_kl_div]_speedup | 4.609827578188277 | 4.609881546749964 | 1.0000117072842252 | 0.0011707284225170866 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 32768)-liger_embedding]_speedup | 1.0540788557929333 | 1.0541133298645684 | 1.0000327054009723 | 0.0032705400972332654 | |
| tritonbench_geglu_bwd[x_(8, 4096, 4096)-liger_geglu]_speedup | 1.0050813435606616 | 1.005114430073657 | 1.000032919239032 | 0.003291923903203653 | |
| tritonbench_geglu_bwd[x_average-liger_geglu]_speedup | 1.0051995983941768 | 1.0052582013719065 | 1.000058299841965 | 0.00582998419649261 | |
| tritonbench_kl_div_fwd[x_(8, 512, 65536)-liger_kl_div]_speedup | 4.554194224645726 | 4.554687300879865 | 1.0001082686002873 | 0.010826860028734231 | |
| tritonbench_layer_norm_bwd[x_(4096, 13312)-liger_layer_norm]_speedup | 0.8014388352424096 | 0.8015363743315401 | 1.0001217049696638 | 0.012170496966379396 | |
| tritonbench_kl_div_fwd[x_average-liger_kl_div]_speedup | 4.182155010548579 | 4.182722500695426 | 1.0001356932360028 | 0.013569323600282068 | |
| tritonbench_kl_div_bwd[x_average-liger_kl_div]_speedup | 1.0074821474270703 | 1.0076686962509545 | 1.0001851634039973 | 0.01851634039973238 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 65536)-liger_embedding]_speedup | 1.2320577136922453 | 1.232347133126388 | 1.0002349073675092 | 0.023490736750919616 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(4096, 4096)-liger_lm_head_ce]_speedup | 0.24221791252066915 | 0.24227916500731086 | 1.0002528817377885 | 0.025288173778847955 | |
| tritonbench_layer_norm_bwd[x_(4096, 3072)-liger_layer_norm]_speedup | 0.6744258090810452 | 0.6746351946532081 | 1.0003104649456522 | 0.031046494565223348 | |
| tritonbench_softmax_fwd[x_(4096, 6528)-triton_softmax]_speedup | 4.7400084845565305 | 4.741488199870676 | 1.0003121756678215 | 0.031217566782149042 | |
| tritonbench_layer_norm_fwd[x_(4096, 15360)-liger_layer_norm]_speedup | 1.594165511469532 | 1.5946794004195854 | 1.000322356083077 | 0.032235608307695784 | |
| tritonbench_softmax_fwd[x_(4096, 3840)-triton_softmax]_speedup | 4.724627898263088 | 4.726230648503993 | 1.0003392331153729 | 0.033923311537287226 | |
| tritonbench_softmax_fwd[x_(4096, 9984)-triton_softmax]_speedup | 4.748948474087353 | 4.751051230053391 | 1.0004427834872314 | 0.04427834872313685 | |
| tritonbench_kl_div_fwd[x_(8, 512, 8192)-liger_kl_div]_speedup | 3.871929801151736 | 3.8741954297891623 | 1.0005851419715184 | 0.0585141971518377 | |
| tritonbench_softmax_fwd[x_(4096, 11648)-triton_softmax]_speedup | 4.752620352111677 | 4.75553960643853 | 1.000614241010342 | 0.06142410103420204 | |
| tritonbench_geglu_bwd[x_(8, 1024, 4096)-liger_geglu]_speedup | 1.0045984745240017 | 1.0052894087562478 | 1.000687771532376 | 0.06877715323760913 | |
| tritonbench_geglu_fwd[x_(8, 1024, 4096)-liger_geglu]_speedup | 1.003999372956363 | 1.0046981299428182 | 1.000695973528746 | 0.06959735287459523 | |
| tritonbench_softmax_fwd[x_(4096, 10112)-triton_softmax]_speedup | 4.772511678530406 | 4.776044619948735 | 1.0007402687843012 | 0.07402687843012234 | |
| tritonbench_fused_linear_jsd_bwd[x_(8192, 4096)-liger_lm_head_jsd]_speedup | 277.1352570404711 | 277.34149027019527 | 1.0007441609267853 | 0.07441609267853089 | |
| tritonbench_layer_norm_fwd[x_(4096, 11776)-liger_layer_norm]_speedup | 1.5010150953880834 | 1.502144996349043 | 1.0007527578932625 | 0.07527578932624568 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 8192)-liger_cross_entropy_loss]_speedup | 1.5398279056943722 | 1.5410208558005736 | 1.0007747295017773 | 0.07747295017772604 | |
| tritonbench_fused_linear_jsd_bwd[x_(1024, 4096)-liger_lm_head_jsd]_speedup | 36.40959286517057 | 36.441034048842056 | 1.000863541204317 | 0.08635412043169755 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 16384)-liger_embedding]_speedup | 1.3624161280411369 | 1.3635964634521145 | 1.0008663545496006 | 0.08663545496006009 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 32768)-liger_cross_entropy_loss]_speedup | 1.5662939318660936 | 1.5677367420919375 | 1.000921161856335 | 0.0921161856334951 | |
| tritonbench_softmax_fwd[x_(4096, 3072)-triton_softmax]_speedup | 4.651960799233028 | 4.6563514605003204 | 1.000943830237782 | 0.09438302377819952 | |
| tritonbench_layer_norm_fwd[x_(4096, 13312)-liger_layer_norm]_speedup | 1.548273779966417 | 1.5497825725283565 | 1.0009744998471601 | 0.09744998471601374 | |
| tritonbench_kl_div_bwd[x_(8, 512, 131072)-liger_kl_div]_speedup | 1.0516921828121812 | 1.05275370605673 | 1.001009347850918 | 0.10093478509181075 | |
| tritonbench_softmax_fwd[x_(4096, 8832)-triton_softmax]_speedup | 4.849098346600481 | 4.85402871695745 | 1.0010167602313995 | 0.10167602313995161 | |
| tritonbench_softmax_fwd[x_(4096, 2176)-triton_softmax]_speedup | 4.119675416155235 | 4.123867309979537 | 1.001017530120908 | 0.10175301209081056 | |
| tritonbench_kl_div_bwd[x_(8, 512, 8192)-liger_kl_div]_speedup | 0.977701304160121 | 0.9787044568454528 | 1.0010260318576476 | 0.10260318576476024 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 131072)-liger_cross_entropy_loss]_speedup | 1.2902193308199654 | 1.291687079102783 | 1.0011375959480353 | 0.11375959480353082 | |
| tritonbench_layer_norm_fwd[x_(4096, 12800)-liger_layer_norm]_speedup | 1.5363461978672297 | 1.5381002868413862 | 1.0011417276760872 | 0.1141727676087223 | |
| tritonbench_softmax_fwd[x_(4096, 6272)-triton_softmax]_speedup | 4.715404462181992 | 4.720990577492836 | 1.00118465242073 | 0.11846524207299591 | |
| tritonbench_softmax_fwd[x_(4096, 7680)-triton_softmax]_speedup | 4.729837224860923 | 4.735453323432211 | 1.0011873767117756 | 0.11873767117756007 | |
| tritonbench_layer_norm_fwd[x_(4096, 12288)-liger_layer_norm]_speedup | 1.5263274361145989 | 1.5282355888011274 | 1.0012501594621046 | 0.12501594621046053 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 8192)-liger_embedding]_speedup | 1.1172489800914096 | 1.1186571082995123 | 1.0012603530933522 | 0.12603530933521512 | |
| tritonbench_softmax_fwd[x_(4096, 3584)-triton_softmax]_speedup | 4.794310848447796 | 4.800582246070135 | 1.0013080915736554 | 0.13080915736554122 | |
| tritonbench_fused_linear_jsd_bwd[x_(4096, 4096)-liger_lm_head_jsd]_speedup | 143.9337259947535 | 144.12518906558375 | 1.0013302168724323 | 0.13302168724322883 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 131072)-liger_embedding]_speedup | 1.013173633421558 | 1.0145547851678913 | 1.0013631935344283 | 0.13631935344282642 | |
| tritonbench_layer_norm_bwd[x_(4096, 11264)-liger_layer_norm]_speedup | 0.7862715305293678 | 0.7873440516206237 | 1.0013640594242728 | 0.13640594242727566 | |
| tritonbench_softmax_fwd[x_average-triton_softmax]_speedup | 4.612606979649023 | 4.618910905640853 | 1.0013666731242534 | 0.13666731242534258 | |
| tritonbench_layer_norm_fwd[x_(4096, 1536)-liger_layer_norm]_speedup | 1.324438216370136 | 1.3263010128542327 | 1.001406480469283 | 0.14064804692830446 | |
| tritonbench_rope_fwd[x_(8192, 8192)-liger_rotary_pos_emb]_speedup | 2.7786435110372643 | 2.7826140508328794 | 1.0014289489745063 | 0.14289489745062944 | |
| tritonbench_softmax_fwd[x_(4096, 9472)-triton_softmax]_speedup | 4.7874040683679455 | 4.794387082431816 | 1.0014586222437354 | 0.14586222437353857 | |
| tritonbench_layer_norm_bwd[x_(4096, 10240)-liger_layer_norm]_speedup | 0.824169486277523 | 0.8254487124226256 | 1.0015521396586526 | 0.15521396586526404 | |
| tritonbench_layer_norm_bwd[x_(4096, 2048)-liger_layer_norm]_speedup | 0.7178101773692543 | 0.7189265104750217 | 1.0015551926414008 | 0.15551926414008044 | |
| tritonbench_layer_norm_bwd[x_(4096, 11776)-liger_layer_norm]_speedup | 0.8152547467515124 | 0.8165229312593294 | 1.001555568382607 | 0.15555683826069533 | |
| tritonbench_softmax_fwd[x_(4096, 7168)-triton_softmax]_speedup | 4.724536999817203 | 4.732280363614023 | 1.0016389677543258 | 0.16389677543258152 | |
| tritonbench_softmax_fwd[x_(4096, 8192)-triton_softmax]_speedup | 4.801966609607765 | 4.809959473111806 | 1.0016644979346687 | 0.16644979346687183 | |
| tritonbench_softmax_fwd[x_(4096, 7808)-triton_softmax]_speedup | 4.717948888034661 | 4.725881244103081 | 1.0016813145408459 | 0.16813145408458574 | |
| tritonbench_layer_norm_bwd[x_(4096, 2560)-liger_layer_norm]_speedup | 0.7975934797014604 | 0.798965722273639 | 1.0017204786737879 | 0.1720478673787884 | |
| tritonbench_softmax_fwd[x_(4096, 9344)-triton_softmax]_speedup | 4.770282368562259 | 4.778503261218973 | 1.0017233555629521 | 0.17233555629521113 | |
| tritonbench_softmax_fwd[x_(4096, 5760)-triton_softmax]_speedup | 4.743856038483515 | 4.7524795858012245 | 1.0018178349527795 | 0.181783495277954 | |
| tritonbench_softmax_fwd[x_(4096, 3200)-triton_softmax]_speedup | 4.759289317370831 | 4.768734728697427 | 1.0019846264214536 | 0.19846264214535836 | |
| tritonbench_layer_norm_fwd[x_(4096, 10240)-liger_layer_norm]_speedup | 1.3990848499476018 | 1.4018872069899893 | 1.0020029929153278 | 0.20029929153277948 | |
| tritonbench_softmax_fwd[x_(4096, 6656)-triton_softmax]_speedup | 4.716166406396282 | 4.72574042445641 | 1.0020300424614244 | 0.20300424614243529 | |
| tritonbench_layer_norm_bwd[x_(4096, 9728)-liger_layer_norm]_speedup | 0.7959315339394065 | 0.797638273637922 | 1.0021443297893577 | 0.21443297893577462 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 16384)-liger_embedding]_speedup | 2.05536976100857 | 2.0598662882402925 | 1.0021876974727486 | 0.21876974727486065 | |
| tritonbench_swiglu_fwd[x_(4, 4096, 4096)-liger_swiglu]_speedup | 1.031227144245513 | 1.0335169618477316 | 1.002220478402839 | 0.222047840283901 | |
| tritonbench_layer_norm_fwd[x_(4096, 5120)-liger_layer_norm]_speedup | 1.3626104360456581 | 1.365781768993298 | 1.0023273951701437 | 0.23273951701436868 | |
| tritonbench_layer_norm_bwd[x_(4096, 15872)-liger_layer_norm]_speedup | 0.8407269947723515 | 0.8428121219531028 | 1.002480147769391 | 0.24801477693909835 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(1, 2304, 2048)-_triton]_speedup | 0.4527272829256398 | 0.4538534801813155 | 1.0024875842436487 | 0.24875842436486995 | |
| tritonbench_layer_norm_fwd[x_average-liger_layer_norm]_speedup | 1.4394843139428997 | 1.4431554005843856 | 1.002550279017234 | 0.2550279017234036 | |
| tritonbench_layer_norm_bwd[x_(4096, 14848)-liger_layer_norm]_speedup | 0.8211980105172973 | 0.8232965928296331 | 1.0025555131472053 | 0.2555513147205257 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 65536)-liger_cross_entropy_loss]_speedup | 1.3704670101517349 | 1.373975794129196 | 1.0025602834300058 | 0.2560283430005805 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 8192)-liger_embedding]_speedup | 1.139267040597558 | 1.142259461865306 | 1.0026266197134768 | 0.26266197134767744 | |
| tritonbench_kl_div_fwd[x_(8, 512, 16384)-liger_kl_div]_speedup | 4.2185892140170775 | 4.229823864513758 | 1.0026631297637019 | 0.2663129763701866 | |
| tritonbench_kl_div_bwd[x_(8, 512, 16384)-liger_kl_div]_speedup | 1.0158315833373073 | 1.0187910343388409 | 1.0029133284001772 | 0.29133284001772175 | |
| tritonbench_softmax_fwd[x_(4096, 1536)-triton_softmax]_speedup | 4.230993859177122 | 4.243831466435976 | 1.0030341824370672 | 0.3034182437067212 | |
| tritonbench_jsd_fwd[x_average-liger_jsd]_speedup | 1.8483005164956614 | 1.8540384469756717 | 1.0031044359014134 | 0.3104435901413405 | |
| tritonbench_layer_norm_fwd[x_(4096, 14336)-liger_layer_norm]_speedup | 1.5879594696267045 | 1.5930779456378217 | 1.0032233039375498 | 0.3223303937549771 | |
| tritonbench_jsd_fwd[x_(4, 2048, 65536)-liger_jsd]_speedup | 0.5836544078511092 | 0.5855549917429285 | 1.003256351474868 | 0.32563514748680245 | |
| tritonbench_softmax_fwd[x_(4096, 4736)-triton_softmax]_speedup | 4.740719905003004 | 4.756316527824568 | 1.0032899270857796 | 0.3289927085779576 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 8192, 8192, 64)-triton_tutorial_flash_v2]_speedup | 0.733803464153566 | 0.7363038084201633 | 1.0034073759374813 | 0.3407375937481305 | |
| tritonbench_softmax_fwd[x_(4096, 4992)-triton_softmax]_speedup | 4.731550879906828 | 4.748532020150231 | 1.0035889163351315 | 0.35889163351314757 | |
| tritonbench_layer_norm_bwd[x_(4096, 12800)-liger_layer_norm]_speedup | 0.7947606970832846 | 0.797740430106766 | 1.0037492204061131 | 0.37492204061131407 | |
| tritonbench_layer_norm_bwd[x_(4096, 14336)-liger_layer_norm]_speedup | 0.842127752068173 | 0.8454350806203466 | 1.0039273477736022 | 0.3927347773602152 | |
| tritonbench_softmax_fwd[x_(4096, 12032)-triton_softmax]_speedup | 4.7164388297008975 | 4.734987872991382 | 1.0039328493298112 | 0.39328493298111944 | |
| tritonbench_layer_norm_fwd[x_(4096, 11264)-liger_layer_norm]_speedup | 1.482790675512065 | 1.4886762650375007 | 1.0039692652662542 | 0.3969265266254185 | |
| tritonbench_layer_norm_bwd[x_(4096, 12288)-liger_layer_norm]_speedup | 0.8321095839575321 | 0.8354403010126406 | 1.0040027384845966 | 0.4002738484596646 | |
| tritonbench_softmax_fwd[x_(4096, 11520)-triton_softmax]_speedup | 4.741840585917306 | 4.7608962850411265 | 1.004018629217611 | 0.40186292176109806 | |
| tritonbench_softmax_fwd[x_(4096, 10368)-triton_softmax]_speedup | 4.766377893215445 | 4.785597906548868 | 1.0040324149205166 | 0.4032414920516647 | |
| tritonbench_softmax_fwd[x_(4096, 7424)-triton_softmax]_speedup | 4.704832889050918 | 4.724151126299808 | 1.0041060411080376 | 0.41060411080375836 | |
| tritonbench_addmm_fwd[x_(20116, 512, 1536)-triton_addmm]_speedup | 0.9349881433425802 | 0.9389131288678308 | 1.0041978987147568 | 0.41978987147568425 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 8192)-liger_embedding]_speedup | 1.9230117175791341 | 1.9311369627068171 | 1.0042252707320534 | 0.42252707320533656 | |
| tritonbench_layer_norm_bwd[x_(4096, 10752)-liger_layer_norm]_speedup | 0.7610238132335864 | 0.7642799956253006 | 1.0042786865996725 | 0.42786865996724543 | |
| tritonbench_layer_norm_bwd[x_(4096, 9216)-liger_layer_norm]_speedup | 0.7849627250786703 | 0.7883650876654468 | 1.0043344256715319 | 0.4334425671531861 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 2304, 13312)-_triton]_speedup | 1.0467104845623845 | 1.0513010722901142 | 1.0043857282366375 | 0.43857282366375205 | |
| tritonbench_softmax_fwd[x_(4096, 8576)-triton_softmax]_speedup | 4.805144853604173 | 4.8263395333571975 | 1.0044108305574029 | 0.44108305574028783 | |
| tritonbench_jsd_fwd[x_(4, 2048, 131072)-liger_jsd]_speedup | 0.5803724078855633 | 0.582991093573832 | 1.0045120781978751 | 0.45120781978751356 | |
| tritonbench_softmax_fwd[x_(4096, 2048)-triton_softmax]_speedup | 4.44495959761037 | 4.465358007018123 | 1.0045891101954492 | 0.4589110195449164 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 4096)-liger_embedding]_speedup | 0.8608665137784937 | 0.864849888095587 | 1.0046271683859669 | 0.46271683859668844 | |
| tritonbench_softmax_fwd[x_(4096, 12288)-triton_softmax]_speedup | 4.699630279131025 | 4.721539409487938 | 1.0046618838197128 | 0.46618838197127754 | |
| tritonbench_rope_fwd[x_(8192, 4096)-liger_rotary_pos_emb]_speedup | 2.7608437436638824 | 2.773923236296803 | 1.0047374983328694 | 0.47374983328694054 | |
| tritonbench_softmax_fwd[x_(4096, 10752)-triton_softmax]_speedup | 4.767417369666216 | 4.790636509485369 | 1.004870381176796 | 0.4870381176796057 | |
| tritonbench_softmax_fwd[x_(4096, 9216)-triton_softmax]_speedup | 4.783775327704205 | 4.807331407443259 | 1.0049241609660542 | 0.4924160966054192 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 131072)-liger_embedding]_speedup | 1.0078739839898685 | 1.0128928325216937 | 1.004979638934579 | 0.4979638934579089 | |
| tritonbench_softmax_fwd[x_(4096, 1664)-triton_softmax]_speedup | 4.291609329719174 | 4.313698454825873 | 1.0051470493723027 | 0.5147049372302703 | |
| tritonbench_jsd_fwd[x_(4, 2048, 4096)-liger_jsd]_speedup | 4.40278497730151 | 4.42577579513686 | 1.0052218806854931 | 0.5221880685493119 | |
| tritonbench_welford_fwd[x_average-test_welford]_speedup | 0.6227756046563793 | 0.6260622761513978 | 1.0052774570333916 | 0.5277457033391553 | |
| tritonbench_layer_norm_bwd[x_(4096, 13824)-liger_layer_norm]_speedup | 0.8296436567564022 | 0.8340657754385716 | 1.005330142219683 | 0.5330142219682976 | |
| tritonbench_softmax_fwd[x_(4096, 2944)-triton_softmax]_speedup | 4.6239386860860865 | 4.6489001657884135 | 1.0053983154615433 | 0.5398315461543346 | |
| tritonbench_softmax_fwd[x_(4096, 11904)-triton_softmax]_speedup | 4.711164124480445 | 4.73671675400913 | 1.0054238461776162 | 0.5423846177616154 | |
| tritonbench_softmax_fwd[x_(4096, 10240)-triton_softmax]_speedup | 4.735487501259944 | 4.76140740556327 | 1.0054735450777623 | 0.5473545077762321 | |
| tritonbench_softmax_fwd[x_(4096, 7936)-triton_softmax]_speedup | 4.725209592297598 | 4.751226174732133 | 1.0055059107805384 | 0.5505910780538414 | |
| tritonbench_softmax_fwd[x_(4096, 3968)-triton_softmax]_speedup | 4.797603053522679 | 4.825449614425635 | 1.0058042652950434 | 0.5804265295043409 | |
| tritonbench_softmax_fwd[x_(4096, 8704)-triton_softmax]_speedup | 4.80324234065995 | 4.831205961652928 | 1.0058218218048802 | 0.5821821804880223 | |
| tritonbench_softmax_fwd[x_(4096, 8448)-triton_softmax]_speedup | 4.822759842576639 | 4.851329415634804 | 1.0059239053966453 | 0.5923905396645335 | |
| tritonbench_softmax_fwd[x_(4096, 1280)-triton_softmax]_speedup | 3.949664436740285 | 3.9733331315467852 | 1.0059925837208679 | 0.5992583720867861 | |
| tritonbench_softmax_fwd[x_(4096, 11008)-triton_softmax]_speedup | 4.7446319204623855 | 4.773812596621726 | 1.00615025077783 | 0.6150250777829935 | |
| tritonbench_softmax_fwd[x_(4096, 896)-triton_softmax]_speedup | 3.640496008656105 | 3.663244187765001 | 1.006248648276171 | 0.6248648276170998 | |
| tritonbench_softmax_fwd[x_(4096, 9088)-triton_softmax]_speedup | 4.801837657779311 | 4.833025637001571 | 1.0064950090871425 | 0.6495009087142511 | |
| tritonbench_softmax_fwd[x_(4096, 6784)-triton_softmax]_speedup | 4.703628843923576 | 4.73419768095767 | 1.006498990045438 | 0.6498990045437969 | |
| tritonbench_addmm_fwd[x_(35901, 512, 1536)-triton_addmm]_speedup | 0.997324288184425 | 1.0039594463274348 | 1.0066529595454743 | 0.6652959545474291 | |
| tritonbench_softmax_fwd[x_(4096, 6400)-triton_softmax]_speedup | 4.723123133145658 | 4.754611789620588 | 1.0066669141555828 | 0.6666914155582848 | |
| tritonbench_softmax_fwd[x_(4096, 12672)-triton_softmax]_speedup | 4.721836564039016 | 4.753655545795351 | 1.0067386876535849 | 0.6738687653584874 | |
| tritonbench_softmax_fwd[x_(4096, 5888)-triton_softmax]_speedup | 4.715533067140289 | 4.747577411623739 | 1.0067954871755112 | 0.679548717551115 | |
| tritonbench_layer_norm_bwd[x_(4096, 8704)-liger_layer_norm]_speedup | 0.7473509424429097 | 0.7524342867577843 | 1.0068018169593236 | 0.6801816959323581 | |
| tritonbench_embedding_fwd[x_average-liger_embedding]_speedup | 1.0631471799655985 | 1.0705210641286214 | 1.0069359015402377 | 0.693590154023771 | |
| tritonbench_addmm_fwd[x_(34579, 512, 1536)-triton_addmm]_speedup | 0.9780111861096646 | 0.984882223476552 | 1.007025520223566 | 0.7025520223566062 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 2048)-liger_embedding]_speedup | 1.0816326238366767 | 1.089476605368574 | 1.007251983121657 | 0.7251983121657091 | |
| tritonbench_softmax_fwd[x_(4096, 5504)-triton_softmax]_speedup | 4.747796403849664 | 4.783144026000836 | 1.0074450585375798 | 0.7445058537579818 | |
| tritonbench_swiglu_bwd[x_average-liger_swiglu]_speedup | 0.9790424839901848 | 0.9863691719658316 | 1.0074835240507503 | 0.7483524050750257 | |
| tritonbench_addmm_fwd[x_(34181, 512, 1536)-triton_addmm]_speedup | 0.978531411689928 | 0.986080049985375 | 1.007714252404438 | 0.7714252404438016 | |
| tritonbench_softmax_fwd[x_(4096, 6016)-triton_softmax]_speedup | 4.695456670666228 | 4.731828199794541 | 1.0077461111196138 | 0.7746111119613763 | |
| tritonbench_softmax_fwd[x_(4096, 5120)-triton_softmax]_speedup | 4.726419981844305 | 4.763750622401199 | 1.007898291032175 | 0.7898291032174987 | |
| tritonbench_layer_norm_fwd[x_(4096, 10752)-liger_layer_norm]_speedup | 1.433584486263541 | 1.4456081357819957 | 1.0083871230706416 | 0.83871230706416 | |
| tritonbench_addmm_fwd[x_(15168, 512, 1536)-triton_addmm]_speedup | 1.062910319441614 | 1.071862874208646 | 1.0084226812020558 | 0.8422681202055804 | |
| tritonbench_addmm_fwd[x_(19410, 512, 1536)-triton_addmm]_speedup | 0.9295607581120797 | 0.9374248705539081 | 1.0084600305824014 | 0.8460030582401412 | |
| tritonbench_low_mem_dropout_fwd[x_512-triton_dropout]_speedup | 1.1328671293090333 | 1.142857083461572 | 1.0088182928907399 | 0.8818292890739876 | |
| tritonbench_softmax_fwd[x_(4096, 1408)-triton_softmax]_speedup | 4.0820436115963 | 4.1182793958742305 | 1.008876873381507 | 0.8876873381507 | |
| tritonbench_addmm_fwd[x_(20203, 512, 1536)-triton_addmm]_speedup | 0.9681853302614809 | 0.9771112957135731 | 1.0092192735968035 | 0.9219273596803523 | |
| tritonbench_layer_norm_fwd[x_(4096, 8192)-liger_layer_norm]_speedup | 1.548311658650188 | 1.5626054338570805 | 1.0092318462675363 | 0.9231846267536259 | |
| tritonbench_rms_norm_fwd[x_average-liger_rms]_speedup | 3.847226719983393 | 3.8838816763777557 | 1.0095276309565975 | 0.9527630956597477 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 2048)-liger_embedding]_speedup | 1.0298136740277062 | 1.0396927052908154 | 1.0095930278576233 | 0.9593027857623282 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 4096, 4096, 64)-triton_tutorial_flash_v2]_speedup | 0.7921999801376013 | 0.7998079457312222 | 1.0096035922549498 | 0.9603592254949778 | |
| tritonbench_layer_norm_fwd[x_(4096, 9216)-liger_layer_norm]_speedup | 1.3074408646456794 | 1.3200911291638657 | 1.0096755921130052 | 0.9675592113005171 | |
| tritonbench_cross_entropy_fwd[x_average-liger_cross_entropy_loss]_speedup | 1.1053884899686743 | 1.1165040485382238 | 1.0100557936602583 | 1.0055793660258328 | |
| tritonbench_addmm_fwd[x_(36032, 512, 1536)-triton_addmm]_speedup | 1.0095308075570502 | 1.0198020155756784 | 1.0101742393018034 | 1.017423930180339 | |
| tritonbench_softmax_fwd[x_(4096, 4352)-triton_softmax]_speedup | 4.742286622266863 | 4.79169217029805 | 1.0104180856127947 | 1.041808561279467 | |
| tritonbench_addmm_fwd[x_(34308, 512, 1536)-triton_addmm]_speedup | 0.9368729894359722 | 0.9466334719440221 | 1.0104181491174444 | 1.041814911744443 | |
| tritonbench_geglu_fwd[x_(8, 4096, 4096)-liger_geglu]_speedup | 0.9632461574230026 | 0.973783891725435 | 1.010939814523241 | 1.093981452324111 | |
| tritonbench_layer_norm_fwd[x_(4096, 9728)-liger_layer_norm]_speedup | 1.3652754675847865 | 1.3803695401246143 | 1.0110556974751255 | 1.105569747512547 | |
| tritonbench_addmm_fwd[x_(20224, 512, 1536)-triton_addmm]_speedup | 0.9512961469455794 | 0.9619047504656151 | 1.011151736033093 | 1.1151736033093052 | |
| tritonbench_rope_bwd[x_(8192, 2048)-liger_rotary_pos_emb]_speedup | 3.63594888262531 | 3.6789117578881734 | 1.0118161384138717 | 1.1816138413871746 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 1024, 1024, 64)-triton_tutorial_flash_v2]_speedup | 1.0373973436104333 | 1.0498217593139236 | 1.0119765254654016 | 1.1976525465401622 | |
| tritonbench_jsd_fwd[x_(4, 2048, 16384)-liger_jsd]_speedup | 0.6077141979069992 | 0.6150820445847508 | 1.012123867935169 | 1.2123867935168953 | |
| tritonbench_swiglu_fwd[x_(4, 8192, 4096)-liger_swiglu]_speedup | 1.2279386245518242 | 1.2428757863871474 | 1.0121644205473013 | 1.216442054730127 | |
| tritonbench_rope_bwd[x_average-liger_rotary_pos_emb]_speedup | 3.3778462099213566 | 3.4192723817871746 | 1.0122640787328154 | 1.226407873281543 | |
| tritonbench_layer_norm_fwd[x_(4096, 7680)-liger_layer_norm]_speedup | 1.5377525265343155 | 1.5572193562701542 | 1.0126592734526092 | 1.2659273452609199 | |
| tritonbench_addmm_fwd[x_(19747, 512, 1536)-triton_addmm]_speedup | 0.9435514858719904 | 0.955617786474094 | 1.0127881740241789 | 1.2788174024178867 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 2048, 2048, 64)-triton_tutorial_flash_v2]_speedup | 0.8909412769322257 | 0.9025480255041217 | 1.0130275124437622 | 1.3027512443762213 | |
| tritonbench_addmm_fwd[x_(35884, 512, 1536)-triton_addmm]_speedup | 0.9721679841265889 | 0.9848747263821526 | 1.0130705212093356 | 1.3070521209335606 | |
| tritonbench_softmax_fwd[x_(4096, 4480)-triton_softmax]_speedup | 4.695550208266197 | 4.757378851953863 | 1.0131674970866718 | 1.3167497086671753 | |
| tritonbench_flash_attention_bwd[x_average-triton_tutorial_flash_v2]_speedup | 0.9229942897273619 | 0.9352210650263427 | 1.013246859092262 | 1.3246859092262087 | |
| tritonbench_addmm_fwd[x_(35678, 512, 1536)-triton_addmm]_speedup | 0.9883297103910513 | 1.0017228207040139 | 1.0135512574115204 | 1.3551257411520412 | |
| tritonbench_softmax_fwd[x_(4096, 512)-triton_softmax]_speedup | 3.8666667137031587 | 3.919161412365276 | 1.0135762150060879 | 1.3576215006087855 | |
| tritonbench_addmm_fwd[x_(27456, 512, 1536)-triton_addmm]_speedup | 0.9739365000114044 | 0.9872340865204358 | 1.0136534430210549 | 1.3653443021054867 | |
| tritonbench_addmm_fwd[x_(35656, 512, 1536)-triton_addmm]_speedup | 0.9676084153816562 | 0.9812114584684797 | 1.0140584175071048 | 1.4058417507104792 | |
| tritonbench_addmm_fwd[x_(34533, 512, 1536)-triton_addmm]_speedup | 0.9951219140995763 | 1.0092269453233496 | 1.0141741740624173 | 1.4174174062417277 | |
| tritonbench_softmax_fwd[x_(4096, 640)-triton_softmax]_speedup | 3.7427823262592943 | 3.796344619742883 | 1.0143108224883388 | 1.4310822488338815 | |
| tritonbench_addmm_fwd[x_(20067, 512, 1536)-triton_addmm]_speedup | 0.9601263039619264 | 0.9740829229863445 | 1.0145362323340446 | 1.4536232334044552 | |
| tritonbench_addmm_fwd[x_(35917, 512, 1536)-triton_addmm]_speedup | 1.0124360297632633 | 1.0274820877767536 | 1.014861243151341 | 1.4861243151341075 | |
| tritonbench_addmm_fwd[x_(33894, 512, 1536)-triton_addmm]_speedup | 0.957884447357298 | 0.9721385178670049 | 1.0148807829055293 | 1.48807829055293 | |
| tritonbench_layer_norm_fwd[x_(4096, 4608)-liger_layer_norm]_speedup | 1.2568759951486166 | 1.2757718150038089 | 1.0150339571510059 | 1.5033957151005861 | |
| tritonbench_addmm_fwd[x_(19632, 512, 1536)-triton_addmm]_speedup | 0.9706227567365846 | 0.9856401909341301 | 1.0154719576615296 | 1.5471957661529645 | |
| tritonbench_addmm_fwd[x_average-triton_addmm]_speedup | 0.9740244352307735 | 0.9892682240810557 | 1.0156503146111222 | 1.5650314611122207 | |
| tritonbench_addmm_fwd[x_(35503, 512, 1536)-triton_addmm]_speedup | 0.9799031627423144 | 0.9953177977503902 | 1.015730773809258 | 1.5730773809258025 | |
| tritonbench_layer_norm_fwd[x_(4096, 8704)-liger_layer_norm]_speedup | 1.2585900524837856 | 1.2784646708286294 | 1.0157911770442025 | 1.579117704420252 | |
| tritonbench_addmm_fwd[x_(35405, 512, 1536)-triton_addmm]_speedup | 0.9916911550414265 | 1.007442262797223 | 1.0158830777865904 | 1.5883077786590372 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 2048, 2048, 64)-triton_tutorial_flash_v2]_speedup | 0.8595862685933638 | 0.8733226103174755 | 1.0159801781694233 | 1.598017816942332 | |
| tritonbench_rms_norm_fwd[x_(2048, 1024)-liger_rms]_speedup | 3.790908916125762 | 3.851851798621251 | 1.016076060871906 | 1.607606087190594 | |
| tritonbench_addmm_fwd[x_(35504, 512, 1536)-triton_addmm]_speedup | 0.9707799517133658 | 0.9866831204153033 | 1.016381847064177 | 1.6381847064177002 | |
| tritonbench_softmax_fwd[x_(4096, 4224)-triton_softmax]_speedup | 4.6997534033538235 | 4.777500119143809 | 1.0165427223765622 | 1.654272237656218 | |
| tritonbench_welford_fwd[x_5120-test_welford]_speedup | 0.6651642838158686 | 0.676330828342283 | 1.0167876490035739 | 1.678764900357388 | |
| tritonbench_welford_fwd[x_8192-test_welford]_speedup | 0.6786546582257925 | 0.6903228050317051 | 1.0171930549131079 | 1.7193054913107852 | |
| tritonbench_addmm_fwd[x_(34516, 512, 1536)-triton_addmm]_speedup | 0.9671729316858216 | 0.9840398940521813 | 1.017439448327984 | 1.7439448327984053 | |
| tritonbench_welford_fwd[x_2560-test_welford]_speedup | 0.6243278675589694 | 0.6353740248782801 | 1.0176928788434505 | 1.76928788434505 | |
| tritonbench_addmm_fwd[x_(35380, 512, 1536)-triton_addmm]_speedup | 0.962448236198496 | 0.979561700465237 | 1.0177811788967854 | 1.7781178896785432 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 16384, 16384, 64)-triton_tutorial_flash_v2]_speedup | 0.7171653746765837 | 0.7299916500526769 | 1.017884682988045 | 1.788468298804502 | |
| tritonbench_addmm_fwd[x_(35844, 512, 1536)-triton_addmm]_speedup | 0.9670972775894111 | 0.9844443838893157 | 1.017937292040718 | 1.7937292040717923 | |
| tritonbench_addmm_fwd[x_(33887, 512, 1536)-triton_addmm]_speedup | 0.9566074532897889 | 0.9738154929117872 | 1.0179886112770913 | 1.7988611277091282 | |
| tritonbench_addmm_fwd[x_(20068, 512, 1536)-triton_addmm]_speedup | 0.9229264954723729 | 0.9397686534541081 | 1.0182486450051638 | 1.8248645005163766 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 4096)-liger_embedding]_speedup | 1.0897284506061946 | 1.1096849668291047 | 1.0183132928316303 | 1.8313292831630301 | |
| tritonbench_addmm_fwd[x_(20120, 512, 1536)-triton_addmm]_speedup | 0.9250097949755752 | 0.9419768132723346 | 1.0183425282509657 | 1.8342528250965717 | |
| tritonbench_low_mem_dropout_fwd[x_32768-triton_dropout]_speedup | 1.1412429490007971 | 1.1633986996484713 | 1.0194137021105563 | 1.9413702110556263 | |
| tritonbench_layer_norm_fwd[x_(4096, 5632)-liger_layer_norm]_speedup | 1.4241586108050595 | 1.4519589158171715 | 1.0195205118314714 | 1.952051183147141 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_speedup | 33.75424574773952 | 34.41452670770589 | 1.0195614194700406 | 1.9561419470040642 | |
| tritonbench_low_mem_dropout_fwd[x_average-triton_dropout]_speedup | 1.1309832684928391 | 1.1532152554267951 | 1.0196572200078455 | 1.9657220007845533 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 512, 512, 64)-triton_tutorial_flash_v2]_speedup | 1.1834707176637906 | 1.2068437876858717 | 1.0197495972424397 | 1.9749597242439743 | |
| tritonbench_rms_norm_bwd[x_(2048, 8192)-liger_rms]_speedup | 1.251090082277544 | 1.275799403540684 | 1.0197502335068935 | 1.9750233506893533 | |
| tritonbench_softmax_fwd[x_(4096, 384)-triton_softmax]_speedup | 3.7785233653950767 | 3.8533329996094268 | 1.0197986427448036 | 1.9798642744803585 | |
| tritonbench_addmm_fwd[x_(35605, 512, 1536)-triton_addmm]_speedup | 0.9858983764689899 | 1.0054266652990116 | 1.019807608264822 | 1.9807608264821974 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 13312, 2304)-_triton]_speedup | 1.1024444558335045 | 1.1261183518136761 | 1.0214740033884728 | 2.147400338847283 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 1024)-liger_embedding]_speedup | 1.0005964853196214 | 1.0220877729956892 | 1.0214784760803979 | 2.147847608039788 | |
| tritonbench_addmm_fwd[x_(35916, 512, 1536)-triton_addmm]_speedup | 0.987603268367134 | 1.0094152189554142 | 1.022085741599806 | 2.208574159980592 | |
| tritonbench_addmm_fwd[x_(34839, 512, 1536)-triton_addmm]_speedup | 0.980745818496503 | 1.0024771059466575 | 1.0221579200648225 | 2.2157920064822534 | |
| tritonbench_welford_fwd[x_1024-test_welford]_speedup | 0.5970861890354304 | 0.6106643179322852 | 1.022740651427208 | 2.2740651427207936 | |
| tritonbench_welford_fwd[x_3072-test_welford]_speedup | 0.6314564635200584 | 0.6458466985689951 | 1.022788958353072 | 2.278895835307204 | |
| tritonbench_addmm_fwd[x_(35541, 512, 1536)-triton_addmm]_speedup | 0.977724025654443 | 1.0002475915751519 | 1.0230367315619893 | 2.3036731561989265 | |
| tritonbench_softmax_fwd[x_(4096, 2432)-triton_softmax]_speedup | 4.410058014013073 | 4.511810769195866 | 1.023072883590073 | 2.307288359007309 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 32768)-liger_embedding]_speedup | 1.0423011112738387 | 1.0671834731553886 | 1.0238725274418448 | 2.387252744184476 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 16384)-liger_embedding]_speedup | 1.0745573396983834 | 1.1015843549962028 | 1.0251517664989431 | 2.515176649894313 | |
| tritonbench_rms_norm_bwd[x_(2048, 16384)-liger_rms]_speedup | 1.0398610995367572 | 1.0670877385212463 | 1.026182957509054 | 2.6182957509053972 | |
| tritonbench_layer_norm_fwd[x_(4096, 1024)-liger_layer_norm]_speedup | 1.2779782357994327 | 1.3117117908903853 | 1.0263960325348191 | 2.6396032534819147 | |
| tritonbench_welford_fwd[x_7168-test_welford]_speedup | 0.6446894570653673 | 0.6628415565030984 | 1.0281563460341971 | 2.8156346034197144 | |
| tritonbench_addmm_fwd[x_(34238, 512, 1536)-triton_addmm]_speedup | 0.9480106875908492 | 0.9750872360890065 | 1.0285614380223562 | 2.8561438022356223 | |
| tritonbench_addmm_fwd[x_(35791, 512, 1536)-triton_addmm]_speedup | 0.9834911256985909 | 1.0119017846153278 | 1.0288875600138805 | 2.8887560013880487 | |
| tritonbench_addmm_fwd[x_(35561, 512, 1536)-triton_addmm]_speedup | 0.9846714731370093 | 1.0131383666231903 | 1.0289100418391222 | 2.891004183912216 | |
| tritonbench_addmm_fwd[x_(33660, 512, 1536)-triton_addmm]_speedup | 1.0714285856267163 | 1.1030390497760243 | 1.0295030994817242 | 2.950309948172425 | |
| tritonbench_rope_fwd[x_(8192, 1024)-liger_rotary_pos_emb]_speedup | 2.8344004655420574 | 2.920879033381363 | 1.0305103562078222 | 3.0510356207822165 | |
| tritonbench_addmm_fwd[x_(35249, 512, 1536)-triton_addmm]_speedup | 0.9769024975658357 | 1.008231483720672 | 1.0320697165099888 | 3.20697165099888 | |
| tritonbench_rms_norm_fwd[x_(2048, 2048)-liger_rms]_speedup | 3.4503675375024785 | 3.566794127379919 | 1.0337432428899198 | 3.374324288991981 | |
| tritonbench_addmm_fwd[x_(33961, 512, 1536)-triton_addmm]_speedup | 0.9677419171293598 | 1.000498756939776 | 1.0338487351127497 | 3.384873511274966 | |
| tritonbench_rms_norm_bwd[x_(2048, 1024)-liger_rms]_speedup | 0.37369613521198625 | 0.38715769196902494 | 1.0360227347532036 | 3.602273475320361 | |
| tritonbench_addmm_fwd[x_(35410, 512, 1536)-triton_addmm]_speedup | 0.9786769555207153 | 1.014345777535752 | 1.0364459608595349 | 3.6445960859534887 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 4096)-liger_cross_entropy_loss]_speedup | 0.6053642375147344 | 0.6278791520845198 | 1.0371923433439316 | 3.719234334393162 | |
| tritonbench_grouped_gemm_fwd[x_512-triton]_speedup | 0.2094707526130375 | 0.21734036949282884 | 1.03756904857419 | 3.7569048574189967 | |
| tritonbench_flex_attention_bwd[x_average-compiled]_speedup | 12.163566977108449 | 12.63428161781806 | 1.038698733816773 | 3.8698733816773023 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_speedup | 27.4930922339489 | 28.569285680023714 | 1.039144139804904 | 3.9144139804903944 | |
| tritonbench_rms_norm_fwd[x_(2048, 32768)-liger_rms]_speedup | 3.4977050721101732 | 3.6406501100512374 | 1.0408682364562045 | 4.086823645620452 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 8192, 8192, 64)-triton_tutorial_flash_v2]_speedup | 0.8232426925500342 | 0.8573266584936939 | 1.0414020874428693 | 4.140208744286933 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_speedup | 30.108392348824133 | 31.36907620385871 | 1.0418715101234495 | 4.187151012344947 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 16384, 16384, 64)-triton_tutorial_flash_v2]_speedup | 0.8722730039719995 | 0.9098174590839943 | 1.0430420922589965 | 4.304209225899647 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 65536)-liger_embedding]_speedup | 1.019785613466018 | 1.064760307798593 | 1.0441021070886816 | 4.410210708868156 | |
| tritonbench_swiglu_bwd[x_(4, 4096, 4096)-liger_swiglu]_speedup | 0.8397575807420454 | 0.8790748703443867 | 1.0468198090782328 | 4.68198090782328 | |
| tritonbench_gemm_fwd[x_(1024, 1024, 1024)-triton_tutorial_matmul]_speedup | 0.6286307179214503 | 0.6587472818876933 | 1.047908196509745 | 4.790819650974498 | |
| tritonbench_grouped_gemm_fwd[x_256-triton]_speedup | 0.18229469443114404 | 0.1913846180187703 | 1.0498638954687718 | 4.986389546877179 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 8192)-liger_cross_entropy_loss]_speedup | 1.015305757169131 | 1.0668318049153616 | 1.050749291415322 | 5.074929141532203 | |
| tritonbench_gemm_fwd[x_(1152, 1152, 1152)-triton_tutorial_matmul]_speedup | 0.6487985282176437 | 0.6852886373662823 | 1.0562425892809635 | 5.624258928096348 | |
| tritonbench_grouped_gemm_fwd[x_average-triton]_speedup | 0.1766186330633104 | 0.1867622298116646 | 1.0574322005126047 | 5.743220051260467 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_average-_triton]_speedup | 0.7490762171158447 | 0.7955222829950928 | 1.0620044593834237 | 6.200445938342369 | |
| tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 1024)-triton]_speedup | 0.46376812205499474 | 0.49293562790301565 | 1.0628924336558048 | 6.289243365580477 | |
| tritonbench_softmax_fwd[x_(4096, 768)-triton_softmax]_speedup | 3.6040725346487035 | 3.836492754042984 | 1.064488219135394 | 6.448821913539393 | |
| tritonbench_grouped_gemm_fwd[x_1024-triton]_speedup | 0.1502182176822012 | 0.16030534723584264 | 1.06714984180535 | 6.714984180535 | |
| tritonbench_rope_bwd[x_(2048, 2048)-liger_rotary_pos_emb]_speedup | 2.3423039086195816 | 2.505056012250436 | 1.069483768964366 | 6.948376896436592 | |
| tritonbench_rms_norm_bwd[x_average-liger_rms]_speedup | 0.7417710056318009 | 0.7957658492801478 | 1.0727917959025064 | 7.2791795902506395 | |
| tritonbench_gemm_fwd[x_(384, 384, 384)-triton_tutorial_matmul]_speedup | 0.8941176933832611 | 0.9617021455777318 | 1.0755878702486439 | 7.5587870248643885 | |
| tritonbench_grouped_gemm_fwd[x_128-triton]_speedup | 0.16449086752685893 | 0.1780185844992166 | 1.0822399272114531 | 8.223992721145311 | |
| tritonbench_low_mem_dropout_fwd[x_524288-triton_dropout]_speedup | 0.9678714756167762 | 1.0486725498470697 | 1.083483268456489 | 8.348326845648902 | |
| tritonbench_flash_attention_fwd[x_average-triton_tutorial_flash_v2]_speedup | 1.0137224294991736 | 1.099296923022081 | 1.0844160995482612 | 8.441609954826124 | |
| tritonbench_gemm_fwd[x_(1408, 1408, 1408)-triton_tutorial_matmul]_speedup | 0.7003105432871565 | 0.7605863047047715 | 1.0860700470603932 | 8.607004706039323 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_speedup | 21.133578614875113 | 23.182881310623575 | 1.09696903364516 | 9.696903364516007 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_speedup | 21.43795374600298 | 23.793965348831318 | 1.1098990897518664 | 10.989908975186635 | |
| tritonbench_rope_bwd[x_(512, 2048)-liger_rotary_pos_emb]_speedup | 2.3189012301652254 | 2.5975147498715248 | 1.1201489378167468 | 12.014893781674685 | |
| tritonbench_low_mem_dropout_fwd[x_8192-triton_dropout]_speedup | 1.142011791011773 | 1.3013698534562392 | 1.1395415211109876 | 13.954152111098761 | |
| tritonbench_low_mem_dropout_fwd[x_2048-triton_dropout]_speedup | 1.156462589747948 | 1.32167833448748 | 1.142863025751262 | 14.286302575126197 | |
| tritonbench_gemm_fwd[x_(640, 640, 640)-triton_tutorial_matmul]_speedup | 0.7739938791848584 | 0.8862876381142197 | 1.1450835232025671 | 14.508352320256712 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 128, 128, 64)-triton_tutorial_flash_v2]_speedup | 0.8574108463957293 | 0.9961766913578586 | 1.1618428849430291 | 16.184288494302912 | |
| tritonbench_gemm_fwd[x_(256, 256, 256)-triton_tutorial_matmul]_speedup | 0.8888888817027578 | 1.0338164006689101 | 1.1630434601550295 | 16.304346015502947 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4, 4096, 2304)-_triton]_speedup | 0.45796851179520687 | 0.5385405598811152 | 1.175933598076582 | 17.593359807658192 | |
| tritonbench_rms_norm_bwd[x_(2048, 2048)-liger_rms]_speedup | 0.5401493724877225 | 0.6363636188707251 | 1.1781252580927315 | 17.81252580927315 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4, 13312, 2048)-_triton]_speedup | 0.5509157536024145 | 0.6530612582193145 | 1.1854103897900448 | 18.541038979004476 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(128, 2304, 6656)-_triton]_speedup | 0.7800324847897045 | 0.9375609547782697 | 1.2019511662146156 | 20.195116621461562 | |
| tritonbench_rms_norm_bwd[x_(2048, 4096)-liger_rms]_speedup | 0.8354541628057371 | 1.0247978436995997 | 1.2266356304431862 | 22.663563044318625 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(128, 8192, 2304)-_triton]_speedup | 0.46483180210391334 | 0.6297827945382797 | 1.3548616761757009 | 35.48616761757009 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(8, 2304, 2304)-_triton]_speedup | 0.4835466514737925 | 0.6571428571428571 | 1.3590061168657959 | 35.90061168657959 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(64, 4096, 2048)-_triton]_speedup | 0.3534421589365737 | 0.5155279476907165 | 1.4585921194059637 | 45.85921194059637 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(64, 13312, 2048)-_triton]_speedup | 0.48988137172834756 | 0.7657952003408762 | 1.563225802277557 | 56.322580227755694 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 128, 128, 64)-triton_tutorial_flash_v2]_speedup | 0.8108107706545644 | 1.4835163982891106 | 1.82967031517289 | 82.96703151728899 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment