Last active
September 19, 2025 18:16
-
-
Save davidberard98/50a197b08511070eb1fef76013350f30 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| metric | side_a_speedup | side_b_speedup | ratio_b_over_a | improvement_percent | |
|---|---|---|---|---|---|
| tritonbench_rope_bwd[x_(2048, 2048)-liger_rotary_pos_emb]_speedup | 3.872456642947213 | 2.7721369338334196 | 0.7158600313530245 | -28.413996864697555 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_speedup | 80.91770255783995 | 65.44677234875213 | 0.8088066057235227 | -19.11933942764773 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_speedup | 26.4510668357185 | 22.165953910167254 | 0.8379984840624729 | -16.20015159375271 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 128, 128, 64)-triton_tutorial_flash_v2]_speedup | 1.5028248392486048 | 1.2918659460892634 | 0.8596250955867761 | -14.037490441322387 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_speedup | 68.91569269467183 | 61.62544280408594 | 0.8942149515512374 | -10.578504844876257 | |
| tritonbench_gemm_fwd[x_(1280, 1280, 1280)-triton_tutorial_matmul]_speedup | 0.7517605254921877 | 0.6761268526603815 | 0.8993912685395554 | -10.060873146044457 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_speedup | 74.69016768218381 | 67.24201043480133 | 0.9002792806802183 | -9.972071931978165 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(64, 13312, 2048)-_triton]_speedup | 0.7259858665437773 | 0.6570363458826082 | 0.9050263595496436 | -9.497364045035638 | |
| tritonbench_gemm_fwd[x_(384, 384, 384)-triton_tutorial_matmul]_speedup | 0.89019609476801 | 0.8134920854904238 | 0.9138347048157118 | -8.616529518428816 | |
| tritonbench_gemm_fwd[x_(896, 896, 896)-triton_tutorial_matmul]_speedup | 0.7415458998327725 | 0.6811593897250264 | 0.9185667264543382 | -8.143327354566177 | |
| tritonbench_flex_attention_fwd[x_average-compiled]_speedup | 39.41781812574853 | 36.34228982561435 | 0.9219761913172667 | -7.802380868273328 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_speedup | 23.819493344175164 | 21.989429822120723 | 0.9231695025745807 | -7.683049742541925 | |
| tritonbench_rms_norm_bwd[x_(2048, 32768)-liger_rms]_speedup | 0.41033445340348207 | 0.38039704742117975 | 0.9270414518352305 | -7.295854816476954 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(1, 2304, 2048)-_triton]_speedup | 0.5301339302390289 | 0.4935834243893425 | 0.9310542039193637 | -6.8945796080636335 | |
| tritonbench_swiglu_fwd[x_(4, 1024, 4096)-liger_swiglu]_speedup | 1.0282033889499047 | 0.960015832888271 | 0.9336828133475876 | -6.631718665241237 | |
| tritonbench_flex_attention_bwd[x_average-compiled]_speedup | 13.099646110268003 | 12.240713832015242 | 0.9344308791991337 | -6.556912080086629 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(8, 2304, 2304)-_triton]_speedup | 0.5310853440839427 | 0.4965787209404082 | 0.9350262184262415 | -6.497378157375855 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 13312, 2304)-_triton]_speedup | 1.1652799755934045 | 1.101271991190709 | 0.9450707248529692 | -5.492927514703084 | |
| tritonbench_gemm_fwd[x_(768, 768, 768)-triton_tutorial_matmul]_speedup | 0.6883117275814891 | 0.6530612441873078 | 0.9487870364812171 | -5.121296351878291 | |
| tritonbench_welford_fwd[x_1536-test_welford]_speedup | 0.5786131110905074 | 0.5509859878398601 | 0.9522528564923483 | -4.774714350765175 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(64, 4096, 2048)-_triton]_speedup | 0.48101262737814 | 0.45898618455916523 | 0.954208181728961 | -4.579181827103895 | |
| tritonbench_rms_norm_fwd[x_(2048, 2048)-liger_rms]_speedup | 3.62355216867367 | 3.4634146820911513 | 0.9558064906676551 | -4.419350933234489 | |
| tritonbench_layer_norm_fwd[x_(4096, 1024)-liger_layer_norm]_speedup | 1.3716980744576996 | 1.3121596542105796 | 0.9565950981810202 | -4.340490181897982 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 13312, 6656)-_triton]_speedup | 1.0943520171180818 | 1.0476445539650099 | 0.9573195256896646 | -4.268047431033539 | |
| tritonbench_swiglu_bwd[x_(4, 1024, 4096)-liger_swiglu]_speedup | 1.0125297698478177 | 0.9733073502658477 | 0.9612629467794659 | -3.8737053220534112 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(32, 2304, 16384)-_triton]_speedup | 0.7418017809921026 | 0.7142857385908088 | 0.9629064756834458 | -3.7093524316554216 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_speedup | 14.46750965910237 | 13.952769297870766 | 0.964420942279603 | -3.5579057720397045 | |
| tritonbench_welford_fwd[x_2048-test_welford]_speedup | 0.573423364009273 | 0.5550247932852954 | 0.967914508060261 | -3.2085491939738975 | |
| tritonbench_softmax_fwd[x_(4096, 1152)-triton_softmax]_speedup | 3.8399280905049586 | 3.7268040474126396 | 0.9705400620985475 | -2.9459937901452515 | |
| tritonbench_softmax_fwd[x_(4096, 1664)-triton_softmax]_speedup | 4.27961420988512 | 4.162913917189848 | 0.9727311185139735 | -2.7268881486026464 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16, 13312, 13312)-_triton]_speedup | 0.7964477277481811 | 0.7764127871887845 | 0.9748446258789112 | -2.5155374121088814 | |
| tritonbench_softmax_fwd[x_(4096, 2304)-triton_softmax]_speedup | 4.371457742591337 | 4.264560418975628 | 0.9755465270602519 | -2.445347293974809 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 2048, 2048, 64)-triton_tutorial_flash_v2]_speedup | 0.8602836245386501 | 0.8394570627967098 | 0.9757910517556241 | -2.4208948244375916 | |
| tritonbench_layer_norm_fwd[x_(4096, 1536)-liger_layer_norm]_speedup | 1.3276836548361237 | 1.2963482786678653 | 0.9763984620477035 | -2.360153795229647 | |
| tritonbench_rms_norm_bwd[x_(2048, 16384)-liger_rms]_speedup | 1.026982534867518 | 1.0035272889300886 | 0.9771610079615861 | -2.2838992038413886 | |
| tritonbench_softmax_fwd[x_(4096, 2688)-triton_softmax]_speedup | 4.60295484235613 | 4.505405708986644 | 0.9788072799515989 | -2.1192720048401115 | |
| tritonbench_gemm_fwd[x_(3712, 3712, 3712)-triton_tutorial_matmul]_speedup | 0.9031034318033014 | 0.8840483070543161 | 0.9789003960366573 | -2.109960396334265 | |
| tritonbench_swiglu_bwd[x_(4, 4096, 4096)-liger_swiglu]_speedup | 0.8317691716450402 | 0.814533313135485 | 0.9792780748588377 | -2.0721925141162334 | |
| tritonbench_rope_bwd[x_average-liger_rotary_pos_emb]_speedup | 3.5458751006000697 | 3.472904093845866 | 0.9794208750494752 | -2.057912495052483 | |
| tritonbench_welford_fwd[x_6144-test_welford]_speedup | 0.6534903344689879 | 0.6400794047059176 | 0.9794779982875069 | -2.0522001712493076 | |
| tritonbench_gemm_fwd[x_(2688, 2688, 2688)-triton_tutorial_matmul]_speedup | 0.9456165236626565 | 0.9272812389394656 | 0.9806102323041344 | -1.9389767695865645 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 16384)-liger_embedding]_speedup | 2.0761310621157105 | 2.036851046961825 | 0.9810801852201678 | -1.8919814779832178 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 16384)-liger_cross_entropy_loss]_speedup | 0.7860623482826301 | 0.7714934331802106 | 0.9814659547881293 | -1.8534045211870742 | |
| tritonbench_gemm_fwd[x_(2176, 2176, 2176)-triton_tutorial_matmul]_speedup | 0.6874999909951017 | 0.6747868596457358 | 0.9815081723405339 | -1.849182765946611 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 2048)-liger_embedding]_speedup | 1.595283276447927 | 1.5666847009839169 | 0.9820730425208944 | -1.7926957479105554 | |
| tritonbench_rms_norm_bwd[x_(2048, 8192)-liger_rms]_speedup | 1.270231263588272 | 1.2490090643698293 | 0.9832926492782958 | -1.670735072170415 | |
| tritonbench_swiglu_fwd[x_average-liger_swiglu]_speedup | 1.082621605023014 | 1.065415528210507 | 0.9841070261920911 | -1.5892973807908883 | |
| tritonbench_swiglu_bwd[x_average-liger_swiglu]_speedup | 0.9784946601845061 | 0.9632974446018294 | 0.9844687802590449 | -1.553121974095506 | |
| tritonbench_flash_attention_fwd[x_average-triton_tutorial_flash_v2]_speedup | 1.0857202236476184 | 1.0689801371189696 | 0.9845815835755473 | -1.541841642445274 | |
| tritonbench_gemm_fwd[x_(512, 512, 512)-triton_tutorial_matmul]_speedup | 0.8625429106470411 | 0.8494208787064684 | 0.9847868068027721 | -1.5213193197227914 | |
| tritonbench_gemm_fwd[x_(1408, 1408, 1408)-triton_tutorial_matmul]_speedup | 0.7433071044420512 | 0.7324633044858829 | 0.9854114135444623 | -1.4588586455537733 | |
| tritonbench_softmax_fwd[x_(4096, 3712)-triton_softmax]_speedup | 4.768472862818265 | 4.70628912272967 | 0.986959401494456 | -1.3040598505543954 | |
| tritonbench_gemm_fwd[x_(2304, 2304, 2304)-triton_tutorial_matmul]_speedup | 0.681637319614989 | 0.6734348639529706 | 0.9879665396450837 | -1.2033460354916259 | |
| tritonbench_gemm_fwd[x_(1152, 1152, 1152)-triton_tutorial_matmul]_speedup | 0.6261180374292956 | 0.619389609587523 | 0.9892537390083855 | -1.0746260991614465 | |
| tritonbench_welford_fwd[x_4096-test_welford]_speedup | 0.5862376091293723 | 0.5803147217893215 | 0.9898967803364801 | -1.0103219663519902 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 8192, 13312)-_triton]_speedup | 1.1589687544326976 | 1.1474460847701715 | 0.9900578254431317 | -0.99421745568683 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_speedup | 13.958826780459077 | 13.82357734417826 | 0.990310830673094 | -0.9689169326906 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 32768)-liger_embedding]_speedup | 1.8492561671923893 | 1.8317803677285527 | 0.9905498222615804 | -0.9450177738419607 | |
| tritonbench_gemm_fwd[x_(1536, 1536, 1536)-triton_tutorial_matmul]_speedup | 0.6968010721219631 | 0.6909090864685826 | 0.9915442356662316 | -0.8455764333768379 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 65536)-liger_embedding]_speedup | 1.5996391411486017 | 1.5864827395160452 | 0.9917753940284871 | -0.8224605971512866 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 8192, 8192, 64)-triton_tutorial_flash_v2]_speedup | 0.748064081684358 | 0.7421104759515026 | 0.992041315873033 | -0.7958684126966986 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 8192)-liger_embedding]_speedup | 1.9464324876929522 | 1.9310420636305135 | 0.9920930090513026 | -0.7906990948697401 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 16384, 16384, 64)-triton_tutorial_flash_v2]_speedup | 0.7128300880706262 | 0.7072219817239129 | 0.9921326183608602 | -0.7867381639139803 | |
| tritonbench_layer_norm_bwd[x_(4096, 1024)-liger_layer_norm]_speedup | 0.5068161329841744 | 0.5028674783247186 | 0.9922089010146425 | -0.7791098985357459 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 2048)-liger_embedding]_speedup | 0.6984162670443088 | 0.6931901400125959 | 0.9925171745299837 | -0.748282547001633 | |
| tritonbench_rms_norm_fwd[x_(2048, 1024)-liger_rms]_speedup | 3.573065664827788 | 3.546511559043127 | 0.992568256988375 | -0.7431743011624969 | |
| tritonbench_layer_norm_fwd[x_(4096, 11776)-liger_layer_norm]_speedup | 1.5105322777640664 | 1.4996615998558793 | 0.9928034123678058 | -0.7196587632194218 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(16384, 4096)-liger_lm_head_ce]_speedup | 0.33129807766263464 | 0.3290352983427091 | 0.993169959403659 | -0.6830040596340958 | |
| tritonbench_softmax_fwd[x_(4096, 10496)-triton_softmax]_speedup | 4.8010782255092135 | 4.769148231434905 | 0.9933494118248987 | -0.6650588175101335 | |
| tritonbench_layer_norm_fwd[x_(4096, 9216)-liger_layer_norm]_speedup | 1.316494054026588 | 1.307750436484222 | 0.9933584071150015 | -0.6641592884998548 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 8192, 2048)-_triton]_speedup | 1.257534304247788 | 1.249207062395763 | 0.9933781195281141 | -0.6621880471885944 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 8192)-liger_cross_entropy_loss]_speedup | 1.5512598486379368 | 1.5410141733222444 | 0.9933952552664285 | -0.6604744733571533 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 4096)-liger_cross_entropy_loss]_speedup | 1.4758064430952629 | 1.4662991781095551 | 0.993557918770318 | -0.644208122968204 | |
| tritonbench_embedding_bwd[x_average-liger_embedding]_speedup | 1.3788076289243911 | 1.3701926454156363 | 0.9937518597025204 | -0.6248140297479554 | |
| tritonbench_gemm_fwd[x_(1792, 1792, 1792)-triton_tutorial_matmul]_speedup | 0.8901234821633761 | 0.8848485246157024 | 0.9940739036174472 | -0.5926096382552815 | |
| tritonbench_softmax_fwd[x_(4096, 7936)-triton_softmax]_speedup | 4.752804805062711 | 4.724799614997022 | 0.9941076498584883 | -0.5892350141511682 | |
| tritonbench_softmax_fwd[x_(4096, 9088)-triton_softmax]_speedup | 4.831076714901533 | 4.802812588485966 | 0.9941495181957294 | -0.5850481804270613 | |
| tritonbench_layer_norm_fwd[x_(4096, 4096)-liger_layer_norm]_speedup | 1.3825974961544958 | 1.3746835142719656 | 0.9942760044737953 | -0.5723995526204728 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 1024)-liger_embedding]_speedup | 0.6206148682877406 | 0.617080355298678 | 0.9943048206389025 | -0.5695179361097535 | |
| tritonbench_kl_div_bwd[x_(8, 512, 4096)-liger_kl_div]_speedup | 0.9225020140104235 | 0.9173714649789185 | 0.9944384413761865 | -0.5561558623813467 | |
| tritonbench_softmax_fwd[x_(4096, 7680)-triton_softmax]_speedup | 4.728850123065216 | 4.703344121181108 | 0.994606299370813 | -0.5393700629186982 | |
| tritonbench_layer_norm_fwd[x_(4096, 14336)-liger_layer_norm]_speedup | 1.590535839211541 | 1.5822611550161025 | 0.9947975493595037 | -0.5202450640496292 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 1024, 1024, 64)-triton_tutorial_flash_v2]_speedup | 1.0564868009346586 | 1.051018089412294 | 0.994823682115549 | -0.5176317884451054 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_speedup | 13.530005919819493 | 13.461528084988394 | 0.9949388170827932 | -0.5061182917206808 | |
| tritonbench_swiglu_bwd[x_(4, 8192, 4096)-liger_swiglu]_speedup | 1.0405088367894777 | 1.035281034078711 | 0.9949757248319994 | -0.5024275168000636 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 512, 512, 64)-triton_tutorial_flash_v2]_speedup | 1.269646764682159 | 1.2633093492643095 | 0.9950085207995344 | -0.49914792004656094 | |
| tritonbench_softmax_fwd[x_(4096, 8192)-triton_softmax]_speedup | 4.828960863746952 | 4.80623323621277 | 0.9952934744812683 | -0.4706525518731741 | |
| tritonbench_rope_bwd[x_(8192, 1024)-liger_rotary_pos_emb]_speedup | 3.660442954472158 | 3.643227109121236 | 0.9952967863274885 | -0.4703213672511475 | |
| tritonbench_softmax_fwd[x_(4096, 9984)-triton_softmax]_speedup | 4.775740741336714 | 4.7540616393377615 | 0.9954605781232412 | -0.453942187675882 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 131072)-liger_embedding]_speedup | 1.3724070616233144 | 1.366375712362822 | 0.9956052767220839 | -0.4394723277916124 | |
| tritonbench_layer_norm_fwd[x_(4096, 14848)-liger_layer_norm]_speedup | 1.5761075390136778 | 1.5693789158030425 | 0.9957308603353003 | -0.42691396646996616 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(8192, 4096)-liger_lm_head_ce]_speedup | 0.30486699890441094 | 0.30357620321176726 | 0.9957660366740829 | -0.42339633259170784 | |
| tritonbench_softmax_fwd[x_(4096, 11392)-triton_softmax]_speedup | 4.767256156174415 | 4.747089616790845 | 0.9957697806195182 | -0.4230219380481848 | |
| tritonbench_softmax_fwd[x_(4096, 6016)-triton_softmax]_speedup | 4.721392923233391 | 4.701573056188471 | 0.9958021144676631 | -0.4197885532336909 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 16384)-liger_embedding]_speedup | 1.3652479735431031 | 1.3596798650252062 | 0.9959215405363712 | -0.4078459463628836 | |
| tritonbench_softmax_fwd[x_(4096, 7424)-triton_softmax]_speedup | 4.720852055783301 | 4.701819511380732 | 0.995968409054621 | -0.4031590945378971 | |
| tritonbench_layer_norm_fwd[x_(4096, 7680)-liger_layer_norm]_speedup | 1.553024983805427 | 1.5468416746686708 | 0.996018538528849 | -0.3981461471150971 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(4096, 4096)-liger_lm_head_ce]_speedup | 85.64285174915061 | 85.3046579242822 | 0.996051114390037 | -0.3948885609962982 | |
| tritonbench_swiglu_fwd[x_(4, 2048, 4096)-liger_swiglu]_speedup | 1.0368469134979423 | 1.0330299756990873 | 0.9963187065041472 | -0.36812934958527777 | |
| tritonbench_layer_norm_fwd[x_(4096, 9728)-liger_layer_norm]_speedup | 1.3681481248970226 | 1.3631202916596163 | 0.9963250812204382 | -0.3674918779561831 | |
| tritonbench_softmax_fwd[x_(4096, 7552)-triton_softmax]_speedup | 4.7266372050522705 | 4.709476607293861 | 0.9963693854607528 | -0.3630614539247179 | |
| tritonbench_gemm_fwd[x_(1024, 1024, 1024)-triton_tutorial_matmul]_speedup | 0.6487068647842971 | 0.6464208379895781 | 0.9964760249677963 | -0.35239750322036967 | |
| tritonbench_layer_norm_fwd[x_(4096, 8704)-liger_layer_norm]_speedup | 1.2627420186677483 | 1.2583440277247029 | 0.996517110480187 | -0.3482889519813015 | |
| tritonbench_gemm_fwd[x_(3200, 3200, 3200)-triton_tutorial_matmul]_speedup | 0.7927434146792091 | 0.7901010354658564 | 0.9966667913420361 | -0.33332086579639375 | |
| tritonbench_softmax_fwd[x_(4096, 6144)-triton_softmax]_speedup | 4.75322383494346 | 4.737516527323237 | 0.996695441206713 | -0.3304558793286949 | |
| tritonbench_gemm_fwd[x_average-triton_tutorial_matmul]_speedup | 0.8100316891520294 | 0.8074681757352743 | 0.9968352924322768 | -0.3164707567723246 | |
| tritonbench_layer_norm_bwd[x_(4096, 8192)-liger_layer_norm]_speedup | 1.0535418855482095 | 1.0502839163434845 | 0.9969076035329818 | -0.3092396467018199 | |
| tritonbench_flex_attention_bwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_speedup | 12.570266342869415 | 12.532452196796532 | 0.9969917784523051 | -0.3008221547694867 | |
| tritonbench_layer_norm_fwd[x_(4096, 8192)-liger_layer_norm]_speedup | 1.5622476868330748 | 1.5576408012932401 | 0.9970511170676312 | -0.2948882932368835 | |
| tritonbench_softmax_fwd[x_(4096, 9472)-triton_softmax]_speedup | 4.7890072008932885 | 4.774970624283648 | 0.9970690007300422 | -0.2930999269957768 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_average-liger_lm_head_ce]_speedup | 0.3074590956700557 | 0.30656587213808706 | 0.99709482157286 | -0.29051784271399983 | |
| tritonbench_softmax_fwd[x_(4096, 6656)-triton_softmax]_speedup | 4.735732118043402 | 4.722039642482404 | 0.9971086887476533 | -0.2891311252346651 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 2048)-liger_embedding]_speedup | 1.086724679813746 | 1.0836383921795132 | 0.9971600096219755 | -0.2839990378024515 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 65536)-liger_embedding]_speedup | 1.232110230330958 | 1.2286176508507851 | 0.9971653676804269 | -0.28346323195731493 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(32, 8192, 13312)-_triton]_speedup | 0.516865874461181 | 0.5154721231020777 | 0.9973034564130273 | -0.26965435869726884 | |
| tritonbench_softmax_fwd[x_(4096, 3456)-triton_softmax]_speedup | 4.694013224022822 | 4.681383425216347 | 0.9973093815028389 | -0.26906184971611236 | |
| tritonbench_layer_norm_bwd[x_(4096, 9728)-liger_layer_norm]_speedup | 0.8093814523063336 | 0.8073455195568475 | 0.997484581906733 | -0.2515418093267052 | |
| tritonbench_layer_norm_fwd[x_(4096, 12288)-liger_layer_norm]_speedup | 1.5294116805342963 | 1.525576305681538 | 0.9974922547659513 | -0.25077452340487305 | |
| tritonbench_layer_norm_bwd[x_(4096, 14848)-liger_layer_norm]_speedup | 0.8189254484018068 | 0.8168957313158525 | 0.9975214873467231 | -0.24785126532769208 | |
| tritonbench_layer_norm_bwd[x_(4096, 12800)-liger_layer_norm]_speedup | 0.8097976628269815 | 0.8078085647125293 | 0.997543709736691 | -0.24562902633089578 | |
| tritonbench_layer_norm_fwd[x_(4096, 5120)-liger_layer_norm]_speedup | 1.353982382938206 | 1.350706226478775 | 0.9975803551798645 | -0.2419644820135458 | |
| tritonbench_geglu_fwd[x_(8, 1024, 4096)-liger_geglu]_speedup | 1.0056968057805804 | 1.0033373442991351 | 0.9976539037731019 | -0.23460962268980756 | |
| tritonbench_softmax_fwd[x_(4096, 2816)-triton_softmax]_speedup | 4.560869571819394 | 4.550737137142392 | 0.9977783985011087 | -0.2221601498891279 | |
| tritonbench_rope_bwd[x_(8192, 16384)-liger_rotary_pos_emb]_speedup | 3.9495622296723334 | 3.942054787043776 | 0.9980991709480723 | -0.19008290519276771 | |
| tritonbench_jsd_fwd[x_(4, 2048, 8192)-liger_jsd]_speedup | 4.312912571954319 | 4.304769696203691 | 0.9981119775523439 | -0.1888022447656068 | |
| tritonbench_gemm_fwd[x_(2944, 2944, 2944)-triton_tutorial_matmul]_speedup | 0.6941931548606403 | 0.6929388051060109 | 0.9981930825075894 | -0.18069174924105758 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 131072)-liger_embedding]_speedup | 1.145863575426287 | 1.1438362646126634 | 0.9982307572584552 | -0.17692427415447742 | |
| tritonbench_jsd_bwd[x_(4, 2048, 16384)-liger_jsd]_speedup | 6.045495949378903 | 6.0348060814293625 | 0.998231763276487 | -0.1768236723513006 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 16384)-liger_cross_entropy_loss]_speedup | 1.8301455684489667 | 1.8269875863707186 | 0.9982744639919957 | -0.17255360080042914 | |
| tritonbench_layer_norm_bwd[x_(4096, 5120)-liger_layer_norm]_speedup | 0.8940909713450915 | 0.8925575411894171 | 0.9982849282625375 | -0.1715071737462548 | |
| tritonbench_layer_norm_bwd[x_(4096, 2048)-liger_layer_norm]_speedup | 0.6915506509477481 | 0.6903735571302598 | 0.9982978921126382 | -0.17021078873618034 | |
| tritonbench_layer_norm_fwd[x_(4096, 15360)-liger_layer_norm]_speedup | 1.5932422708692313 | 1.5905816543728648 | 0.9983300615700367 | -0.16699384299633246 | |
| tritonbench_layer_norm_fwd[x_average-liger_layer_norm]_speedup | 1.4432388991739507 | 1.4408733342284876 | 0.9983609332129164 | -0.16390667870835873 | |
| tritonbench_softmax_fwd[x_(4096, 10624)-triton_softmax]_speedup | 4.807671776019647 | 4.799946880670175 | 0.9983932149053929 | -0.16067850946070994 | |
| tritonbench_layer_norm_fwd[x_(4096, 3584)-liger_layer_norm]_speedup | 1.3280462142819194 | 1.326133966127583 | 0.9985601042088958 | -0.1439895791104151 | |
| tritonbench_softmax_fwd[x_(4096, 8576)-triton_softmax]_speedup | 4.80920217446447 | 4.80250488091991 | 0.9986074002918569 | -0.1392599708143094 | |
| tritonbench_geglu_fwd[x_(8, 8192, 4096)-liger_geglu]_speedup | 1.0122192460713488 | 1.0108586085748021 | 0.9986557877635427 | -0.13442122364573095 | |
| tritonbench_rope_bwd[x_(8192, 8192)-liger_rotary_pos_emb]_speedup | 3.6799477862867565 | 3.6754025950315388 | 0.9987648761560816 | -0.12351238439184398 | |
| tritonbench_softmax_fwd[x_(4096, 4992)-triton_softmax]_speedup | 4.7787371972204316 | 4.772996253668429 | 0.9987986484054111 | -0.12013515945888953 | |
| tritonbench_rope_fwd[x_(8192, 16384)-liger_rotary_pos_emb]_speedup | 3.058421423562507 | 3.0548653659873573 | 0.9988372898686384 | -0.11627101313615995 | |
| tritonbench_kl_div_bwd[x_(8, 512, 16384)-liger_kl_div]_speedup | 1.0200919757227167 | 1.018925272263689 | 0.998856276211563 | -0.11437237884369678 | |
| tritonbench_swiglu_fwd[x_(4, 8192, 4096)-liger_swiglu]_speedup | 1.2400881439021776 | 1.2386811897519423 | 0.9988654402050744 | -0.11345597949256048 | |
| tritonbench_layer_norm_fwd[x_(4096, 10240)-liger_layer_norm]_speedup | 1.3920863490112292 | 1.390512647991649 | 0.9988695377835592 | -0.1130462216440753 | |
| tritonbench_fused_linear_jsd_fwd[x_(4096, 4096)-liger_lm_head_jsd]_speedup | 0.3717545343344134 | 0.3713412407951508 | 0.9988882622776812 | -0.11117377223187619 | |
| tritonbench_rms_norm_fwd[x_(2048, 16384)-liger_rms]_speedup | 3.992656975337204 | 3.9882982165919745 | 0.9989083062301235 | -0.10916937698765006 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 4096)-liger_embedding]_speedup | 0.8633951689084083 | 0.8624601592779951 | 0.9989170548271711 | -0.10829451728289197 | |
| tritonbench_softmax_fwd[x_(4096, 4224)-triton_softmax]_speedup | 4.715607639143241 | 4.710672376403699 | 0.9989534195553983 | -0.1046580444601708 | |
| tritonbench_layer_norm_bwd[x_(4096, 4608)-liger_layer_norm]_speedup | 0.8777714573325273 | 0.8768641001791483 | 0.9989662945339595 | -0.10337054660405354 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 32768)-liger_embedding]_speedup | 1.3170397871856907 | 1.3156913315887218 | 0.9989761466509297 | -0.10238533490702917 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 131072)-liger_embedding]_speedup | 1.0153899148973713 | 1.0143564701967627 | 0.9989822188644516 | -0.10177811355484012 | |
| tritonbench_embedding_bwd[x_(8, 2048, 4096, 8192)-liger_embedding]_speedup | 1.118684666217797 | 1.1176017210913967 | 0.9990319478230969 | -0.09680521769031003 | |
| tritonbench_layer_norm_bwd[x_(4096, 10240)-liger_layer_norm]_speedup | 0.8222178949502688 | 0.8214493456073374 | 0.9990652729067909 | -0.09347270932090757 | |
| tritonbench_softmax_fwd[x_(4096, 11904)-triton_softmax]_speedup | 4.734621241039573 | 4.7303481611370035 | 0.9990974822092356 | -0.09025177907644455 | |
| tritonbench_jsd_fwd[x_(4, 2048, 16384)-liger_jsd]_speedup | 0.6087472947033401 | 0.6082043930405621 | 0.9991081657898084 | -0.08918342101915666 | |
| tritonbench_jsd_bwd[x_(4, 2048, 32768)-liger_jsd]_speedup | 5.811029960415806 | 5.806049348948052 | 0.9991429038394773 | -0.08570961605226524 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 8192)-liger_embedding]_speedup | 1.168778296560491 | 1.1677973853256465 | 0.9991607379793661 | -0.08392620206338908 | |
| tritonbench_kl_div_bwd[x_average-liger_kl_div]_speedup | 1.010764280007287 | 1.0099473117545639 | 0.9991917321685357 | -0.080826783146426 | |
| tritonbench_layer_norm_bwd[x_(4096, 7680)-liger_layer_norm]_speedup | 1.0064029284606988 | 1.0056621520748075 | 0.9992639365755579 | -0.07360634244421016 | |
| tritonbench_softmax_fwd[x_(4096, 4864)-triton_softmax]_speedup | 4.79613259306054 | 4.792723156593179 | 0.9992891279794277 | -0.07108720205722951 | |
| tritonbench_layer_norm_fwd[x_(4096, 15872)-liger_layer_norm]_speedup | 1.603117736297034 | 1.602006138588575 | 0.9993066025761611 | -0.06933974238388929 | |
| tritonbench_geglu_bwd[x_(8, 1024, 4096)-liger_geglu]_speedup | 1.0028581137786496 | 1.0021648729698867 | 0.9993087349055282 | -0.06912650944718157 | |
| tritonbench_layer_norm_bwd[x_(4096, 15360)-liger_layer_norm]_speedup | 0.8259637394059474 | 0.8254207346724636 | 0.9993425804214185 | -0.0657419578581453 | |
| tritonbench_layer_norm_bwd[x_(4096, 11776)-liger_layer_norm]_speedup | 0.823121498475445 | 0.822600770845257 | 0.9993673745235029 | -0.06326254764971173 | |
| tritonbench_geglu_fwd[x_(8, 2048, 4096)-liger_geglu]_speedup | 0.9455933746434811 | 0.9450191824295952 | 0.9993927704769479 | -0.060722952305214406 | |
| tritonbench_softmax_fwd[x_(4096, 11520)-triton_softmax]_speedup | 4.739759763553262 | 4.737073905955592 | 0.9994333346558357 | -0.05666653441642744 | |
| tritonbench_softmax_fwd[x_(4096, 11136)-triton_softmax]_speedup | 4.781281553821182 | 4.77876546650166 | 0.9994737629877684 | -0.05262370122316451 | |
| tritonbench_softmax_fwd[x_(4096, 3072)-triton_softmax]_speedup | 4.664489906078971 | 4.662052019066326 | 0.9994773518515997 | -0.05226481484003065 | |
| tritonbench_kl_div_bwd[x_(8, 512, 65536)-liger_kl_div]_speedup | 1.0491676041213225 | 1.0486588755866386 | 0.9995151122349895 | -0.04848877650105443 | |
| tritonbench_layer_norm_bwd[x_(4096, 12288)-liger_layer_norm]_speedup | 0.830807532916175 | 0.8304109925223229 | 0.9995227048646751 | -0.047729513532490664 | |
| tritonbench_layer_norm_bwd[x_average-liger_layer_norm]_speedup | 0.8245709273827535 | 0.8241876952942216 | 0.9995352345373754 | -0.04647654626246389 | |
| tritonbench_layer_norm_bwd[x_(4096, 10752)-liger_layer_norm]_speedup | 0.7753725016942574 | 0.7750146417019049 | 0.9995384670057675 | -0.046153299423246086 | |
| tritonbench_layer_norm_fwd[x_(4096, 12800)-liger_layer_norm]_speedup | 1.536606194974814 | 1.535935117663141 | 0.999563273066406 | -0.04367269335939783 | |
| tritonbench_softmax_fwd[x_(4096, 3840)-triton_softmax]_speedup | 4.733108195536526 | 4.731081310361444 | 0.9995717644534318 | -0.042823554656823415 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 4096)-liger_embedding]_speedup | 1.1653020137586945 | 1.1648238581420198 | 0.9995896723673099 | -0.04103276326901106 | |
| tritonbench_softmax_fwd[x_(4096, 12544)-triton_softmax]_speedup | 4.71202900530713 | 4.710194201981066 | 0.9996106128964831 | -0.038938710351688055 | |
| tritonbench_softmax_fwd[x_(4096, 12416)-triton_softmax]_speedup | 4.722057049946883 | 4.720319779612497 | 0.999632094590131 | -0.03679054098689738 | |
| tritonbench_fused_linear_jsd_bwd[x_(8192, 4096)-liger_lm_head_jsd]_speedup | 278.4738465208261 | 278.3737789332258 | 0.9996406571430297 | -0.035934285697025725 | |
| tritonbench_welford_fwd[x_5120-test_welford]_speedup | 0.6678498838780406 | 0.6676428193351723 | 0.9996899534642937 | -0.03100465357063209 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(4096, 4096)-liger_lm_head_ce]_speedup | 0.2440582650790855 | 0.24399030082625134 | 0.999721524477722 | -0.027847552227799888 | |
| tritonbench_layer_norm_fwd[x_(4096, 6656)-liger_layer_norm]_speedup | 1.5185034074796486 | 1.5180967350214107 | 0.9997321886429528 | -0.02678113570472318 | |
| tritonbench_jsd_bwd[x_(4, 2048, 65536)-liger_jsd]_speedup | 5.839072768844507 | 5.8376351181951005 | 0.9997537878518868 | -0.024621214811315628 | |
| tritonbench_softmax_fwd[x_(4096, 4736)-triton_softmax]_speedup | 4.758445813518913 | 4.757439239505333 | 0.9997884658031158 | -0.021153419688424435 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 1024)-liger_embedding]_speedup | 1.5369960296655287 | 1.5366741836896918 | 0.9997906006459191 | -0.020939935408093024 | |
| tritonbench_softmax_fwd[x_(4096, 6528)-triton_softmax]_speedup | 4.735801664724452 | 4.73487417655724 | 0.9998041539251696 | -0.019584607483036365 | |
| tritonbench_jsd_bwd[x_average-liger_jsd]_speedup | 5.935903350659348 | 5.934793560392529 | 0.9998130376791434 | -0.018696232085657627 | |
| tritonbench_jsd_bwd[x_(4, 2048, 8192)-liger_jsd]_speedup | 6.230539310543682 | 6.229525043010322 | 0.9998372103147406 | -0.01627896852594013 | |
| tritonbench_layer_norm_bwd[x_(4096, 3584)-liger_layer_norm]_speedup | 0.8469750824808776 | 0.8468788479889653 | 0.9998863786032165 | -0.011362139678350314 | |
| tritonbench_layer_norm_bwd[x_(4096, 7168)-liger_layer_norm]_speedup | 0.964829539803206 | 0.9647260701422596 | 0.9998927586100158 | -0.010724138998419175 | |
| tritonbench_layer_norm_bwd[x_(4096, 15872)-liger_layer_norm]_speedup | 0.8385049460067674 | 0.8384167483974786 | 0.9998948156362001 | -0.010518436379991503 | |
| tritonbench_softmax_fwd[x_(4096, 5632)-triton_softmax]_speedup | 4.740278482436041 | 4.739817731653933 | 0.9999028009042474 | -0.00971990957525648 | |
| tritonbench_kl_div_fwd[x_(8, 512, 131072)-liger_kl_div]_speedup | 4.606177431922028 | 4.605788730153373 | 0.9999156129405782 | -0.00843870594218199 | |
| tritonbench_geglu_bwd[x_(8, 2048, 4096)-liger_geglu]_speedup | 1.0049163941036896 | 1.0048615893819843 | 0.999945463401705 | -0.005453659829501856 | |
| tritonbench_fused_linear_jsd_fwd[x_(2048, 4096)-liger_lm_head_jsd]_speedup | 0.29443544579373526 | 0.2944339194004598 | 0.9999948158644034 | -0.000518413559658093 | |
| tritonbench_jsd_fwd[x_average-liger_jsd]_speedup | 1.8500953664338733 | 1.85012355233838 | 1.0000152348387106 | 0.001523483871057607 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_average-liger_lm_head_ce]_speedup | 274.75736848423674 | 274.77246200413526 | 1.000054933994971 | 0.005493399497091822 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(32768, 4096)-liger_lm_head_ce]_speedup | 546.4995953339925 | 546.5323976201926 | 1.0000600225260552 | 0.0060022526055192316 | |
| tritonbench_fused_linear_jsd_fwd[x_average-liger_lm_head_jsd]_speedup | 0.3226564725420707 | 0.3226796220922283 | 1.0000717467403495 | 0.007174674034948758 | |
| tritonbench_layer_norm_fwd[x_(4096, 13312)-liger_layer_norm]_speedup | 1.5477865387090335 | 1.5478999089296361 | 1.0000732466769593 | 0.007324667695929321 | |
| tritonbench_softmax_fwd[x_(4096, 3968)-triton_softmax]_speedup | 4.814963249463494 | 4.815333170558845 | 1.000076827397466 | 0.00768273974660616 | |
| tritonbench_softmax_fwd[x_(4096, 11008)-triton_softmax]_speedup | 4.75019160454774 | 4.7508300647681745 | 1.0001344072562932 | 0.013440725629321193 | |
| tritonbench_fused_linear_cross_entropy_fwd[x_(32768, 4096)-liger_lm_head_ce]_speedup | 0.3496130410340915 | 0.3496616861716205 | 1.0001391399399322 | 0.013913993993219798 | |
| tritonbench_cross_entropy_bwd[x_average-liger_cross_entropy_loss]_speedup | 1.8539619767060653 | 1.8542330588149405 | 1.0001462177284546 | 0.014621772845457848 | |
| tritonbench_layer_norm_bwd[x_(4096, 4096)-liger_layer_norm]_speedup | 0.6794676895397839 | 0.6795684974718376 | 1.0001483630989458 | 0.014836309894583621 | |
| tritonbench_layer_norm_bwd[x_(4096, 3072)-liger_layer_norm]_speedup | 0.8417222331936004 | 0.841860396351664 | 1.000164143410516 | 0.016414341051596004 | |
| tritonbench_softmax_fwd[x_(4096, 9600)-triton_softmax]_speedup | 4.79830461721342 | 4.7991239857506285 | 1.0001707620925668 | 0.017076209256683406 | |
| tritonbench_softmax_fwd[x_(4096, 5888)-triton_softmax]_speedup | 4.716451977771094 | 4.717301293102251 | 1.000180075051153 | 0.0180075051152917 | |
| tritonbench_layer_norm_bwd[x_(4096, 8704)-liger_layer_norm]_speedup | 0.7799972108794566 | 0.7801772814683975 | 1.0002308605549215 | 0.023086055492149704 | |
| tritonbench_layer_norm_bwd[x_(4096, 11264)-liger_layer_norm]_speedup | 0.7937575743235263 | 0.7939519291076897 | 1.0002448540844842 | 0.024485408448415846 | |
| tritonbench_softmax_fwd[x_(4096, 12672)-triton_softmax]_speedup | 4.751350171537092 | 4.7525308350003375 | 1.0002484900966295 | 0.02484900966295278 | |
| tritonbench_addmm_fwd[x_(35901, 512, 1536)-triton_addmm]_speedup | 1.0024183980688852 | 1.0026901077216068 | 1.0002710541359228 | 0.027105413592276584 | |
| tritonbench_layer_norm_fwd[x_(4096, 6144)-liger_layer_norm]_speedup | 1.5019641719267829 | 1.5024006658692226 | 1.0002906154158655 | 0.029061541586550277 | |
| tritonbench_jsd_fwd[x_(4, 2048, 32768)-liger_jsd]_speedup | 0.5968401452435037 | 0.5970274795421712 | 1.0003138768398212 | 0.03138768398212033 | |
| tritonbench_jsd_fwd[x_(4, 2048, 131072)-liger_jsd]_speedup | 0.5811393535470355 | 0.5813228993817452 | 1.0003158379028876 | 0.03158379028875835 | |
| tritonbench_softmax_fwd[x_(4096, 8064)-triton_softmax]_speedup | 4.719322610085519 | 4.7208983804798645 | 1.0003338975790674 | 0.033389757906743256 | |
| tritonbench_layer_norm_bwd[x_(4096, 13824)-liger_layer_norm]_speedup | 0.8239021020553735 | 0.8241977641173305 | 1.000358855816995 | 0.03588558169949074 | |
| tritonbench_softmax_fwd[x_(4096, 5120)-triton_softmax]_speedup | 4.725899237106533 | 4.727604510948316 | 1.0003608358444027 | 0.03608358444027182 | |
| tritonbench_jsd_bwd[x_(4, 2048, 131072)-liger_jsd]_speedup | 5.832342907344473 | 5.8344601887043055 | 1.0003630241557242 | 0.03630241557242009 | |
| tritonbench_kl_div_bwd[x_(8, 512, 8192)-liger_kl_div]_speedup | 0.9809439173222789 | 0.9813287655113241 | 1.0003923243543786 | 0.0392324354378637 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 131072)-liger_cross_entropy_loss]_speedup | 1.2906769833897092 | 1.2911915644223544 | 1.0003986907950382 | 0.03986907950381813 | |
| tritonbench_layer_norm_bwd[x_(4096, 14336)-liger_layer_norm]_speedup | 0.8402587902742877 | 0.8406082186713911 | 1.0004158580679523 | 0.04158580679523016 | |
| tritonbench_kl_div_bwd[x_(8, 512, 131072)-liger_kl_div]_speedup | 1.052250229404886 | 1.052688556518659 | 1.0004165616709066 | 0.04165616709066455 | |
| tritonbench_fused_linear_jsd_bwd[x_average-liger_lm_head_jsd]_speedup | 132.66779222704378 | 132.73292379231944 | 1.000490937281629 | 0.0490937281629078 | |
| tritonbench_softmax_fwd[x_(4096, 10880)-triton_softmax]_speedup | 4.761572095100628 | 4.76420478922783 | 1.0005529043926293 | 0.055290439262933866 | |
| tritonbench_welford_fwd[x_average-test_welford]_speedup | 0.6241323950778657 | 0.624489383755402 | 1.000571975882604 | 0.05719758826039367 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(1, 8192, 16384)-_triton]_speedup | 0.6607601723998078 | 0.6611523124844976 | 1.0005934681009383 | 0.059346810093829916 | |
| tritonbench_softmax_fwd[x_(4096, 11648)-triton_softmax]_speedup | 4.75219274383081 | 4.755052515869277 | 1.0006017794716302 | 0.060177947163020384 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16, 4096, 6656)-_triton]_speedup | 0.6817548815393024 | 0.6821651730605418 | 1.000601816770733 | 0.06018167707328992 | |
| tritonbench_layer_norm_bwd[x_(4096, 6144)-liger_layer_norm]_speedup | 0.953681119421572 | 0.954279462644334 | 1.0006274038675789 | 0.06274038675788685 | |
| tritonbench_softmax_fwd[x_(4096, 640)-triton_softmax]_speedup | 3.598503874402092 | 3.600985057118775 | 1.000689503972563 | 0.06895039725629548 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(8192, 4096)-liger_lm_head_ce]_speedup | 162.3205156621521 | 162.43469045018367 | 1.0007033909889074 | 0.07033909889073708 | |
| tritonbench_softmax_fwd[x_(4096, 12032)-triton_softmax]_speedup | 4.709313445258512 | 4.712673519869383 | 1.0007134956400607 | 0.07134956400607351 | |
| tritonbench_softmax_fwd[x_(4096, 1536)-triton_softmax]_speedup | 4.2254758476138825 | 4.228529887982024 | 1.0007227683882907 | 0.07227683882906621 | |
| tritonbench_softmax_fwd[x_(4096, 4480)-triton_softmax]_speedup | 4.711020041177997 | 4.714453033944093 | 1.000728715381401 | 0.07287153814010683 | |
| tritonbench_rope_fwd[x_(8192, 8192)-liger_rotary_pos_emb]_speedup | 2.7782865692045577 | 2.7803489445779532 | 1.0007423191675962 | 0.07423191675961505 | |
| tritonbench_softmax_fwd[x_(4096, 5760)-triton_softmax]_speedup | 4.744207746137763 | 4.747756104352998 | 1.0007479348302408 | 0.07479348302408262 | |
| tritonbench_fused_linear_jsd_fwd[x_(1024, 4096)-liger_lm_head_jsd]_speedup | 0.18093557760265308 | 0.18108129339309353 | 1.0008053462584372 | 0.08053462584372006 | |
| tritonbench_fused_linear_jsd_fwd[x_(8192, 4096)-liger_lm_head_jsd]_speedup | 0.44350033243748105 | 0.44386203478020897 | 1.000815562731915 | 0.0815562731915076 | |
| tritonbench_fused_linear_cross_entropy_bwd[x_(16384, 4096)-liger_lm_head_ce]_speedup | 304.56651119165184 | 304.81810202188257 | 1.0008260620290994 | 0.08260620290994058 | |
| tritonbench_kl_div_fwd[x_(8, 512, 32768)-liger_kl_div]_speedup | 4.42930805617185 | 4.433021988912163 | 1.0008384905030794 | 0.0838490503079381 | |
| tritonbench_layer_norm_bwd[x_(4096, 5632)-liger_layer_norm]_speedup | 0.9024844561689532 | 0.9032699926645171 | 1.000870415540339 | 0.08704155403389535 | |
| tritonbench_swiglu_bwd[x_(4, 2048, 4096)-liger_swiglu]_speedup | 1.0291708624556895 | 1.0300680809272738 | 1.0008717876732767 | 0.08717876732766694 | |
| tritonbench_layer_norm_fwd[x_(4096, 2048)-liger_layer_norm]_speedup | 1.2957907384923155 | 1.296928271143072 | 1.0008778675575967 | 0.08778675575966677 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 65536)-liger_embedding]_speedup | 1.021507216691437 | 1.0224168195220864 | 1.000890451693132 | 0.0890451693132066 | |
| tritonbench_geglu_fwd[x_average-liger_geglu]_speedup | 0.9795143663275336 | 0.9804472685764447 | 1.0009524130334186 | 0.0952413033418642 | |
| tritonbench_softmax_fwd[x_(4096, 8832)-triton_softmax]_speedup | 4.829291283311924 | 4.83396217131708 | 1.000967199477343 | 0.09671994773430281 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 65536)-liger_cross_entropy_loss]_speedup | 1.372374166809067 | 1.3737238807194068 | 1.000983488281099 | 0.09834882810990653 | |
| tritonbench_kl_div_bwd[x_(8, 512, 32768)-liger_kl_div]_speedup | 1.0396299394620934 | 1.0407109356681534 | 1.0010397894145098 | 0.1039789414509773 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 131072)-liger_cross_entropy_loss]_speedup | 2.1155103413392733 | 2.117737797288932 | 1.001052916597065 | 0.10529165970649679 | |
| tritonbench_fused_linear_jsd_bwd[x_(2048, 4096)-liger_lm_head_jsd]_speedup | 71.8774387858073 | 71.95404878193854 | 1.0010658420420284 | 0.10658420420284198 | |
| tritonbench_fused_linear_jsd_bwd[x_(1024, 4096)-liger_lm_head_jsd]_speedup | 36.48583862561098 | 36.526825037851054 | 1.0011233512448663 | 0.11233512448662886 | |
| tritonbench_softmax_fwd[x_(4096, 4608)-triton_softmax]_speedup | 4.737780474572858 | 4.743251054023396 | 1.0011546713656105 | 0.11546713656105378 | |
| tritonbench_rope_fwd[x_(8192, 4096)-liger_rotary_pos_emb]_speedup | 2.762818928836894 | 2.766066368751536 | 1.0011754081603925 | 0.11754081603925215 | |
| tritonbench_softmax_fwd[x_(4096, 3200)-triton_softmax]_speedup | 4.6703648365937465 | 4.675889521212759 | 1.0011829235642844 | 0.11829235642843994 | |
| tritonbench_softmax_fwd[x_(4096, 4352)-triton_softmax]_speedup | 4.796080916990599 | 4.801956118966497 | 1.001225000594774 | 0.12250005947740394 | |
| tritonbench_geglu_bwd[x_average-liger_geglu]_speedup | 1.002764451281167 | 1.0039962519361727 | 1.0012284047898106 | 0.12284047898105666 | |
| tritonbench_jsd_fwd[x_(4, 2048, 65536)-liger_jsd]_speedup | 0.5832776076697858 | 0.5840057103951928 | 1.0012482953499893 | 0.12482953499892702 | |
| tritonbench_gemm_fwd[x_(4096, 4096, 4096)-triton_tutorial_matmul]_speedup | 0.9068648900137336 | 0.9080325756896083 | 1.0012876071052403 | 0.12876071052403404 | |
| tritonbench_layer_norm_bwd[x_(4096, 9216)-liger_layer_norm]_speedup | 0.7856755294396502 | 0.7867362919796942 | 1.001350128011242 | 0.13501280112420133 | |
| tritonbench_softmax_fwd[x_(4096, 5248)-triton_softmax]_speedup | 4.691878087517371 | 4.698629866747876 | 1.0014390355215042 | 0.14390355215041506 | |
| tritonbench_layer_norm_bwd[x_(4096, 13312)-liger_layer_norm]_speedup | 0.813214212282992 | 0.8143985189662989 | 1.001456328068815 | 0.1456328068814905 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 16384)-liger_embedding]_speedup | 1.0986306471481364 | 1.1002790811321956 | 1.0015004442014597 | 0.1500444201459672 | |
| tritonbench_softmax_fwd[x_(4096, 9856)-triton_softmax]_speedup | 4.774046472705178 | 4.781455387521872 | 1.0015519151015921 | 0.15519151015921384 | |
| tritonbench_layer_norm_bwd[x_(4096, 6656)-liger_layer_norm]_speedup | 0.9352037135943546 | 0.9366566672652529 | 1.0015536226490311 | 0.1553622649031139 | |
| tritonbench_softmax_fwd[x_(4096, 5504)-triton_softmax]_speedup | 4.737640772509983 | 4.745117178729238 | 1.001578086346824 | 0.15780863468239925 | |
| tritonbench_jsd_bwd[x_(4, 2048, 4096)-liger_jsd]_speedup | 5.8569392074287165 | 5.866285582068034 | 1.0015957779837399 | 0.15957779837398878 | |
| tritonbench_fused_linear_jsd_bwd[x_(4096, 4096)-liger_lm_head_jsd]_speedup | 143.83404497593068 | 144.07704241626234 | 1.0016894292333385 | 0.16894292333384797 | |
| tritonbench_jsd_fwd[x_(4, 2048, 4096)-liger_jsd]_speedup | 4.417655225485256 | 4.425411135466918 | 1.0017556621297468 | 0.175566212974676 | |
| tritonbench_softmax_fwd[x_(4096, 9216)-triton_softmax]_speedup | 4.803214051280067 | 4.81168611045731 | 1.0017638312777222 | 0.1763831277722172 | |
| tritonbench_layer_norm_fwd[x_(4096, 10752)-liger_layer_norm]_speedup | 1.433993338475147 | 1.4365229329910985 | 1.0017640211067098 | 0.17640211067098122 | |
| tritonbench_layer_norm_bwd[x_(4096, 1536)-liger_layer_norm]_speedup | 0.6562133984349304 | 0.6573857403373233 | 1.0017865253973615 | 0.17865253973614603 | |
| tritonbench_softmax_fwd[x_(4096, 2048)-triton_softmax]_speedup | 4.322033987486602 | 4.329954841867878 | 1.0018326682307934 | 0.18326682307934217 | |
| tritonbench_kl_div_fwd[x_(8, 512, 65536)-liger_kl_div]_speedup | 4.548820331922483 | 4.557575726239857 | 1.001924761515844 | 0.19247615158439757 | |
| tritonbench_softmax_fwd[x_(4096, 12288)-triton_softmax]_speedup | 4.7203158462319825 | 4.729529104208536 | 1.0019518308258775 | 0.19518308258774653 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 32768)-liger_embedding]_speedup | 1.0535452591351346 | 1.0556678715000651 | 1.0020147329661688 | 0.20147329661688307 | |
| tritonbench_layer_norm_bwd[x_(4096, 2560)-liger_layer_norm]_speedup | 0.7354784659876081 | 0.7369633850758623 | 1.0020189837730467 | 0.20189837730466564 | |
| tritonbench_kl_div_fwd[x_(8, 512, 8192)-liger_kl_div]_speedup | 3.868215014136774 | 3.8764998651190323 | 1.0021417762332188 | 0.21417762332187618 | |
| tritonbench_kl_div_fwd[x_average-liger_kl_div]_speedup | 4.1746045332607595 | 4.183618747769876 | 1.0021592978298415 | 0.21592978298414778 | |
| tritonbench_softmax_fwd[x_(4096, 7808)-triton_softmax]_speedup | 4.709918097082515 | 4.7213116712876 | 1.0024190599433442 | 0.24190599433442372 | |
| tritonbench_softmax_fwd[x_(4096, 2176)-triton_softmax]_speedup | 4.237798664838354 | 4.248447163051464 | 1.0025127428307208 | 0.2512742830720782 | |
| tritonbench_geglu_bwd[x_(8, 8192, 4096)-liger_geglu]_speedup | 1.0031404314980366 | 1.0056660201532144 | 1.0025176820471748 | 0.25176820471748407 | |
| tritonbench_softmax_fwd[x_(4096, 3328)-triton_softmax]_speedup | 4.773967174689948 | 4.786604549481771 | 1.0026471432101214 | 0.2647143210121383 | |
| tritonbench_softmax_fwd[x_(4096, 7040)-triton_softmax]_speedup | 4.735270741519857 | 4.747839572191535 | 1.0026543003257389 | 0.26543003257388786 | |
| tritonbench_softmax_fwd[x_(4096, 2944)-triton_softmax]_speedup | 4.732699131638342 | 4.746758934951704 | 1.0029707790253075 | 0.2970779025307513 | |
| tritonbench_addmm_fwd[x_(20203, 512, 1536)-triton_addmm]_speedup | 0.9656384328215393 | 0.9685781062199709 | 1.0030442796170012 | 0.30442796170011555 | |
| tritonbench_geglu_bwd[x_(8, 4096, 4096)-liger_geglu]_speedup | 1.0001428657442923 | 1.003292525239605 | 1.0031492095811418 | 0.3149209581141843 | |
| tritonbench_softmax_fwd[x_(4096, 8960)-triton_softmax]_speedup | 4.825668094786236 | 4.8410719351643925 | 1.0031920637879757 | 0.3192063787975652 | |
| tritonbench_softmax_fwd[x_average-triton_softmax]_speedup | 4.598279266841951 | 4.612989688000419 | 1.0031991143436076 | 0.3199114343607645 | |
| tritonbench_embedding_bwd[x_(32, 512, 768, 4096)-liger_embedding]_speedup | 1.7234042999595416 | 1.7290340240187698 | 1.0032666299250619 | 0.32666299250618813 | |
| tritonbench_welford_fwd[x_1024-test_welford]_speedup | 0.595690544867554 | 0.5976483581592229 | 1.0032866281134347 | 0.32866281134347197 | |
| tritonbench_softmax_fwd[x_(4096, 1920)-triton_softmax]_speedup | 4.288438598548208 | 4.3028504470998135 | 1.003360628401322 | 0.3360628401321897 | |
| tritonbench_layer_norm_fwd[x_(4096, 2560)-liger_layer_norm]_speedup | 1.2916666804468893 | 1.2960152272125567 | 1.0033666168149225 | 0.3366616814922452 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 65536)-liger_cross_entropy_loss]_speedup | 2.0957948047944557 | 2.1029817137159093 | 1.003429204474127 | 0.342920447412709 | |
| tritonbench_gemm_fwd[x_(2560, 2560, 2560)-triton_tutorial_matmul]_speedup | 0.7873936596456409 | 0.7902414329625181 | 1.003616708468492 | 0.3616708468491936 | |
| tritonbench_softmax_fwd[x_(4096, 8320)-triton_softmax]_speedup | 4.823275901000963 | 4.840931579791913 | 1.0036605160379246 | 0.36605160379246104 | |
| tritonbench_addmm_fwd[x_(20116, 512, 1536)-triton_addmm]_speedup | 0.9334638339271351 | 0.9369333643651827 | 1.0037168343453127 | 0.37168343453126695 | |
| tritonbench_kl_div_fwd[x_(8, 512, 16384)-liger_kl_div]_speedup | 4.216765946916615 | 4.2325194302025855 | 1.0037359159802288 | 0.3735915980228821 | |
| tritonbench_softmax_fwd[x_(4096, 10368)-triton_softmax]_speedup | 4.766170734240107 | 4.784026085127714 | 1.0037462675768902 | 0.37462675768902276 | |
| tritonbench_layer_norm_fwd[x_(4096, 13824)-liger_layer_norm]_speedup | 1.5595980467872133 | 1.5654630366189963 | 1.0037605778257193 | 0.3760577825719258 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 32768)-liger_cross_entropy_loss]_speedup | 1.5672431964551443 | 1.573185119612579 | 1.0037913217111896 | 0.3791321711189566 | |
| tritonbench_addmm_fwd[x_(20067, 512, 1536)-triton_addmm]_speedup | 0.9643555725955517 | 0.9680599689479571 | 1.003841317930517 | 0.38413179305170075 | |
| tritonbench_softmax_fwd[x_(4096, 10112)-triton_softmax]_speedup | 4.723815039281187 | 4.742738630635089 | 1.0040059975245732 | 0.40059975245732016 | |
| tritonbench_softmax_fwd[x_(4096, 11264)-triton_softmax]_speedup | 4.74410991006552 | 4.763389958154881 | 1.0040639969256309 | 0.4063996925630864 | |
| tritonbench_softmax_fwd[x_(4096, 9728)-triton_softmax]_speedup | 4.759988411338069 | 4.780213519193115 | 1.0042489825829977 | 0.42489825829976624 | |
| tritonbench_softmax_fwd[x_(4096, 8448)-triton_softmax]_speedup | 4.829355993790675 | 4.850016638947823 | 1.0042781367088514 | 0.42781367088513544 | |
| tritonbench_swiglu_fwd[x_(4, 4096, 4096)-liger_swiglu]_speedup | 1.0253479737420317 | 1.029935114502727 | 1.0044737405038744 | 0.44737405038743905 | |
| tritonbench_softmax_fwd[x_(4096, 6400)-triton_softmax]_speedup | 4.736075201701759 | 4.757289835405113 | 1.0044793701112962 | 0.4479370111296177 | |
| tritonbench_softmax_fwd[x_(4096, 9344)-triton_softmax]_speedup | 4.778900354366162 | 4.80077550035501 | 1.0045774434214476 | 0.45774434214476134 | |
| tritonbench_softmax_fwd[x_(4096, 11776)-triton_softmax]_speedup | 4.724915907675665 | 4.746988041032503 | 1.0046714341139875 | 0.4671434113987516 | |
| tritonbench_gemm_fwd[x_(640, 640, 640)-triton_tutorial_matmul]_speedup | 0.7801857869680611 | 0.7838709671362366 | 1.0047234648845589 | 0.4723464884558881 | |
| tritonbench_softmax_fwd[x_(4096, 1280)-triton_softmax]_speedup | 3.8212558063546456 | 3.839743856433038 | 1.0048382131464864 | 0.48382131464863587 | |
| tritonbench_softmax_fwd[x_(4096, 10240)-triton_softmax]_speedup | 4.7278449154269415 | 4.750892570752123 | 1.0048748754955936 | 0.48748754955936224 | |
| tritonbench_softmax_fwd[x_(4096, 6912)-triton_softmax]_speedup | 4.71230134788611 | 4.735458010338793 | 1.0049140877764684 | 0.4914087776468401 | |
| tritonbench_softmax_fwd[x_(4096, 8704)-triton_softmax]_speedup | 4.806738796801383 | 4.830622124987059 | 1.0049687177097222 | 0.49687177097221813 | |
| tritonbench_addmm_fwd[x_(20224, 512, 1536)-triton_addmm]_speedup | 0.9456732125776249 | 0.9505688411409998 | 1.0051768713528755 | 0.5176871352875478 | |
| tritonbench_kl_div_fwd[x_(8, 512, 4096)-liger_kl_div]_speedup | 3.378340418494809 | 3.3963067459922427 | 1.0053180926939975 | 0.5318092693997523 | |
| tritonbench_softmax_fwd[x_(4096, 10752)-triton_softmax]_speedup | 4.765968692811354 | 4.7917434325137975 | 1.0054080799442346 | 0.5408079944234645 | |
| tritonbench_addmm_fwd[x_(35916, 512, 1536)-triton_addmm]_speedup | 0.9936724913049736 | 0.9992676293895483 | 1.005630766810528 | 0.5630766810527987 | |
| tritonbench_softmax_fwd[x_(4096, 12160)-triton_softmax]_speedup | 4.713819820437702 | 4.740697181991932 | 1.0057018219995804 | 0.5701821999580359 | |
| tritonbench_softmax_fwd[x_(4096, 7168)-triton_softmax]_speedup | 4.698772099813405 | 4.726291597497385 | 1.005856742378519 | 0.5856742378518964 | |
| tritonbench_gemm_fwd[x_(3328, 3328, 3328)-triton_tutorial_matmul]_speedup | 0.8480888249786462 | 0.8531017721820877 | 1.0059108752005637 | 0.5910875200563659 | |
| tritonbench_rms_norm_fwd[x_average-liger_rms]_speedup | 3.823401588007052 | 3.8461253503667536 | 1.005943336538589 | 0.5943336538589028 | |
| tritonbench_addmm_fwd[x_(20068, 512, 1536)-triton_addmm]_speedup | 0.9354207960694176 | 0.941293062469078 | 1.0062776735607495 | 0.6277673560749486 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(8, 8192, 6656)-_triton]_speedup | 0.6842993402782463 | 0.6886632750171143 | 1.006377230667932 | 0.6377230667931943 | |
| tritonbench_layer_norm_fwd[x_(4096, 5632)-liger_layer_norm]_speedup | 1.4376743793168425 | 1.4469272967907743 | 1.0064360314178573 | 0.6436031417857313 | |
| tritonbench_softmax_fwd[x_(4096, 7296)-triton_softmax]_speedup | 4.713908058993972 | 4.744495155284071 | 1.0064886917409726 | 0.6488691740972552 | |
| tritonbench_softmax_fwd[x_(4096, 6272)-triton_softmax]_speedup | 4.694673098802581 | 4.727826146374548 | 1.0070618436841583 | 0.7061843684158298 | |
| tritonbench_gemm_fwd[x_(3584, 3584, 3584)-triton_tutorial_matmul]_speedup | 0.9203578001767542 | 0.9269044054406707 | 1.0071131089046665 | 0.7113108904666543 | |
| tritonbench_rope_fwd[x_(8192, 2048)-liger_rotary_pos_emb]_speedup | 2.7812572340747423 | 2.8015301502456733 | 1.0072891194394233 | 0.7289119439423342 | |
| tritonbench_addmm_fwd[x_(35380, 512, 1536)-triton_addmm]_speedup | 0.9708029246700546 | 0.9779032117366308 | 1.007313829497361 | 0.7313829497360924 | |
| tritonbench_cross_entropy_bwd[x_(8, 2048, 32768)-liger_cross_entropy_loss]_speedup | 2.0552548539204967 | 2.0703779040822834 | 1.0073582359545041 | 0.7358235954504133 | |
| tritonbench_softmax_fwd[x_(4096, 5376)-triton_softmax]_speedup | 4.688833915949177 | 4.72355309805354 | 1.0074046517165527 | 0.740465171655269 | |
| tritonbench_softmax_fwd[x_(4096, 6784)-triton_softmax]_speedup | 4.695564642000827 | 4.731657909012764 | 1.007686672373561 | 0.768667237356091 | |
| tritonbench_addmm_fwd[x_(34181, 512, 1536)-triton_addmm]_speedup | 0.9731969042601993 | 0.9809594741113515 | 1.0079763610192052 | 0.797636101920518 | |
| tritonbench_geglu_fwd[x_(8, 4096, 4096)-liger_geglu]_speedup | 0.954548038814724 | 0.9625739390022466 | 1.0084080631473389 | 0.8408063147338884 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 2304, 13312)-_triton]_speedup | 1.0539714623873773 | 1.0628738835015592 | 1.0084465485374878 | 0.844654853748783 | |
| tritonbench_addmm_fwd[x_(34238, 512, 1536)-triton_addmm]_speedup | 0.9607605158251642 | 0.9688888827567239 | 1.0084603465667805 | 0.8460346566780519 | |
| tritonbench_rope_bwd[x_(8192, 2048)-liger_rotary_pos_emb]_speedup | 3.622706284472685 | 3.6538819615315137 | 1.0086056319808345 | 0.8605631980834527 | |
| tritonbench_gemm_fwd[x_(2048, 2048, 2048)-triton_tutorial_matmul]_speedup | 0.948164129761847 | 0.9563794834815925 | 1.0086644848311326 | 0.866448483113258 | |
| tritonbench_welford_fwd[x_3072-test_welford]_speedup | 0.6320777366511622 | 0.6375825919542389 | 1.0087091428536026 | 0.8709142853602581 | |
| tritonbench_addmm_fwd[x_(19632, 512, 1536)-triton_addmm]_speedup | 0.9677419617551467 | 0.9763500231086938 | 1.0088949964905263 | 0.8894996490526275 | |
| tritonbench_rope_bwd[x_(8192, 4096)-liger_rotary_pos_emb]_speedup | 3.6390519976385236 | 3.6717788002594793 | 1.0089932220375508 | 0.8993222037550819 | |
| tritonbench_layer_norm_fwd[x_(4096, 7168)-liger_layer_norm]_speedup | 1.529836614127413 | 1.543926730737488 | 1.0092102100838474 | 0.921021008384737 | |
| tritonbench_addmm_fwd[x_(33660, 512, 1536)-triton_addmm]_speedup | 1.0754609716402375 | 1.0853970846130292 | 1.0092389340336894 | 0.9238934033689361 | |
| tritonbench_layer_norm_fwd[x_(4096, 11264)-liger_layer_norm]_speedup | 1.4705882796996042 | 1.4842767991442158 | 1.0093081929412682 | 0.9308192941268167 | |
| tritonbench_gemm_fwd[x_(3840, 3840, 3840)-triton_tutorial_matmul]_speedup | 0.8356968585206377 | 0.8437144094564455 | 1.0095938507534905 | 0.9593850753490507 | |
| tritonbench_addmm_fwd[x_(36032, 512, 1536)-triton_addmm]_speedup | 1.0209654286796948 | 1.0307768695450366 | 1.0096099638535556 | 0.9609963853555614 | |
| tritonbench_addmm_fwd[x_(27456, 512, 1536)-triton_addmm]_speedup | 0.9811715444777841 | 0.9906400958542367 | 1.0096502506923926 | 0.9650250692392559 | |
| tritonbench_cross_entropy_fwd[x_average-liger_cross_entropy_loss]_speedup | 1.1071305427328388 | 1.1180387949988726 | 1.0098527245387954 | 0.9852724538795377 | |
| tritonbench_embedding_fwd[x_(8, 2048, 4096, 1024)-liger_embedding]_speedup | 1.0056649210152915 | 1.0163735433788625 | 1.0106483005818279 | 1.0648300581827863 | |
| tritonbench_gemm_fwd[x_(2816, 2816, 2816)-triton_tutorial_matmul]_speedup | 0.8953938922197581 | 0.9050966584712193 | 1.0108363105173828 | 1.0836310517382763 | |
| tritonbench_addmm_fwd[x_(19410, 512, 1536)-triton_addmm]_speedup | 0.9406250011368684 | 0.9509569358001994 | 1.0109841165723252 | 1.098411657232523 | |
| tritonbench_gemm_fwd[x_(3968, 3968, 3968)-triton_tutorial_matmul]_speedup | 0.9233261606744085 | 0.9335506901344728 | 1.0110735836322413 | 1.1073583632241313 | |
| tritonbench_addmm_fwd[x_(35410, 512, 1536)-triton_addmm]_speedup | 0.9987959858720636 | 1.0107003570485003 | 1.0119187214854921 | 1.1918721485492112 | |
| tritonbench_addmm_fwd[x_(35561, 512, 1536)-triton_addmm]_speedup | 0.9832442229393471 | 0.9950762798319264 | 1.0120336907316965 | 1.2033690731696467 | |
| tritonbench_addmm_fwd[x_(20120, 512, 1536)-triton_addmm]_speedup | 0.9256942419076779 | 0.9370048255167637 | 1.0122184875923792 | 1.2218487592379201 | |
| tritonbench_addmm_fwd[x_(35678, 512, 1536)-triton_addmm]_speedup | 0.9997579237684245 | 1.0120600887587325 | 1.0123051437731416 | 1.2305143773141625 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 512, 512, 64)-triton_tutorial_flash_v2]_speedup | 1.187992438988762 | 1.202978791738349 | 1.0126148553288299 | 1.2614855328829888 | |
| tritonbench_addmm_fwd[x_(35917, 512, 1536)-triton_addmm]_speedup | 1.0231046437803402 | 1.0362694199317053 | 1.0128674776635962 | 1.2867477663596194 | |
| tritonbench_addmm_fwd[x_(35656, 512, 1536)-triton_addmm]_speedup | 0.9672170169227955 | 0.9803294459145921 | 1.0135568634157346 | 1.3556863415734588 | |
| tritonbench_addmm_fwd[x_(35405, 512, 1536)-triton_addmm]_speedup | 0.9934513769083727 | 1.007358293402241 | 1.0139985879703006 | 1.3998587970300624 | |
| tritonbench_addmm_fwd[x_(33894, 512, 1536)-triton_addmm]_speedup | 0.9668874512566172 | 0.9804853316109574 | 1.0140635606933026 | 1.4063560693302612 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 1024, 1024, 64)-triton_tutorial_flash_v2]_speedup | 1.0355204247925078 | 1.0505172483618674 | 1.014482402481211 | 1.448240248121091 | |
| tritonbench_addmm_fwd[x_(20211, 512, 1536)-triton_addmm]_speedup | 0.9688958359887372 | 0.9830441266404071 | 1.0146024888601486 | 1.4602488860148588 | |
| tritonbench_addmm_fwd[x_average-triton_addmm]_speedup | 0.976674213890852 | 0.9911525119898416 | 1.0148240814522085 | 1.4824081452208482 | |
| tritonbench_softmax_fwd[x_(4096, 4096)-triton_softmax]_speedup | 4.770261391391121 | 4.842377041394896 | 1.0151177564680045 | 1.5117756468004506 | |
| tritonbench_addmm_fwd[x_(33961, 512, 1536)-triton_addmm]_speedup | 0.9787490455686836 | 0.9935531374577089 | 1.015125523704009 | 1.5125523704009103 | |
| tritonbench_softmax_fwd[x_(4096, 256)-triton_softmax]_speedup | 3.4013843353309237 | 3.453608094825279 | 1.0153536778987002 | 1.5353677898700235 | |
| tritonbench_rms_norm_fwd[x_(2048, 8192)-liger_rms]_speedup | 4.0773583474471655 | 4.143132265590126 | 1.0161315029335456 | 1.6131502933545594 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 256, 256, 64)-triton_tutorial_flash_v2]_speedup | 1.53685906973334 | 1.5619967345596684 | 1.016356519163914 | 1.6356519163913896 | |
| tritonbench_layer_norm_fwd[x_(4096, 4608)-liger_layer_norm]_speedup | 1.2598958786185914 | 1.2809047154389495 | 1.0166750579765313 | 1.667505797653135 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 256, 256, 64)-triton_tutorial_flash_v2]_speedup | 1.1660351178870558 | 1.1857693065401007 | 1.0169241803701459 | 1.692418037014587 | |
| tritonbench_addmm_fwd[x_(35844, 512, 1536)-triton_addmm]_speedup | 0.9718855037594868 | 0.9884264650184711 | 1.017019454652837 | 1.7019454652837052 | |
| tritonbench_welford_fwd[x_2560-test_welford]_speedup | 0.6247007314039265 | 0.6353532249754771 | 1.0170521547935611 | 1.7052154793561147 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 2048, 2048, 64)-triton_tutorial_flash_v2]_speedup | 0.8925026471777469 | 0.9083892772478239 | 1.0178000929412516 | 1.7800092941251622 | |
| tritonbench_addmm_fwd[x_(35605, 512, 1536)-triton_addmm]_speedup | 1.0019441217758047 | 1.0200049199779049 | 1.0180257539413375 | 1.8025753941337541 | |
| tritonbench_softmax_fwd[x_(4096, 896)-triton_softmax]_speedup | 3.641237019616522 | 3.707216423825704 | 1.018120052019061 | 1.8120052019060973 | |
| tritonbench_softmax_fwd[x_(4096, 3584)-triton_softmax]_speedup | 4.704447641458184 | 4.79024044470145 | 1.0182365305731562 | 1.8236530573156218 | |
| tritonbench_addmm_fwd[x_(34533, 512, 1536)-triton_addmm]_speedup | 0.9953894899082237 | 1.0136476889854003 | 1.0183427685969038 | 1.8342768596903758 | |
| tritonbench_addmm_fwd[x_(35503, 512, 1536)-triton_addmm]_speedup | 0.9831284491089135 | 1.0014673625430912 | 1.018653629086616 | 1.8653629086615942 | |
| tritonbench_addmm_fwd[x_(34839, 512, 1536)-triton_addmm]_speedup | 0.9865853205617965 | 1.0057001093005653 | 1.0193746940486446 | 1.9374694048644558 | |
| tritonbench_embedding_fwd[x_average-liger_embedding]_speedup | 1.0554445315973007 | 1.0759858979400547 | 1.0194622888534624 | 1.946228885346235 | |
| tritonbench_gemm_fwd[x_(3072, 3072, 3072)-triton_tutorial_matmul]_speedup | 0.7108400254661341 | 0.7247222556736624 | 1.0195293310874343 | 1.9529331087434265 | |
| tritonbench_addmm_fwd[x_(15168, 512, 1536)-triton_addmm]_speedup | 1.0528301505510524 | 1.0733996056912702 | 1.019537296808466 | 1.9537296808465943 | |
| tritonbench_layer_norm_fwd[x_(4096, 3072)-liger_layer_norm]_speedup | 1.2915643225648366 | 1.317195300397192 | 1.0198449100711116 | 1.9844910071111554 | |
| tritonbench_addmm_fwd[x_(33887, 512, 1536)-triton_addmm]_speedup | 0.951427847907326 | 0.9703666585587187 | 1.019905671978226 | 1.9905671978226014 | |
| tritonbench_addmm_fwd[x_(34308, 512, 1536)-triton_addmm]_speedup | 0.9288480337079285 | 0.9474858153242829 | 1.0200654799708764 | 2.0065479970876376 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_average-_triton]_speedup | 0.7893782863407978 | 0.8052217833653433 | 1.0200708548723691 | 2.007085487236915 | |
| tritonbench_softmax_fwd[x_(4096, 2432)-triton_softmax]_speedup | 4.396335690154272 | 4.488188543312112 | 1.0208930481272271 | 2.0893048127227143 | |
| tritonbench_addmm_fwd[x_(34579, 512, 1536)-triton_addmm]_speedup | 0.9865853205617965 | 1.0074570623383048 | 1.0211555365172302 | 2.1155536517230233 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 65536)-liger_embedding]_speedup | 1.021826978973435 | 1.0439469775502799 | 1.0216474990698203 | 2.164749906982033 | |
| tritonbench_addmm_fwd[x_(35504, 512, 1536)-triton_addmm]_speedup | 0.9699355696334175 | 0.9921530703386081 | 1.0229061613995531 | 2.290616139955315 | |
| tritonbench_gemm_fwd[x_(2432, 2432, 2432)-triton_tutorial_matmul]_speedup | 0.7775446078052627 | 0.7956359681092646 | 1.0232672956926132 | 2.326729569261321 | |
| tritonbench_rms_norm_fwd[x_(2048, 4096)-liger_rms]_speedup | 4.187429980134267 | 4.286210915292205 | 1.023589871502704 | 2.3589871502704085 | |
| tritonbench_addmm_fwd[x_(19735, 512, 1536)-triton_addmm]_speedup | 0.9394177786802789 | 0.9619652124805717 | 1.0240014978553718 | 2.4001497855371756 | |
| tritonbench_gemm_fwd[x_(3456, 3456, 3456)-triton_tutorial_matmul]_speedup | 0.9290246842066845 | 0.9513232119492733 | 1.0240020831756802 | 2.40020831756802 | |
| tritonbench_softmax_fwd[x_(4096, 2560)-triton_softmax]_speedup | 4.475117508427814 | 4.583173957871456 | 1.0241460585649749 | 2.414605856497487 | |
| tritonbench_softmax_fwd[x_(4096, 512)-triton_softmax]_speedup | 3.837349559712514 | 3.9307230645141638 | 1.0243328118402237 | 2.4332811840223734 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 4096, 16384)-_triton]_speedup | 1.2194818878551292 | 1.2494677504980742 | 1.0245890184524882 | 2.4589018452488176 | |
| tritonbench_addmm_fwd[x_(35791, 512, 1536)-triton_addmm]_speedup | 0.988625444897008 | 1.0130541139681863 | 1.0247097312711015 | 2.4709731271101543 | |
| tritonbench_rope_fwd[x_average-liger_rotary_pos_emb]_speedup | 2.8457374437689253 | 2.9181759066525537 | 1.0254550759917225 | 2.5455075991722476 | |
| tritonbench_addmm_fwd[x_(35249, 512, 1536)-triton_addmm]_speedup | 0.9779820968841367 | 1.0029629854475677 | 1.0255432984336015 | 2.554329843360148 | |
| tritonbench_addmm_fwd[x_(34516, 512, 1536)-triton_addmm]_speedup | 0.9625332333799332 | 0.9871889797853208 | 1.0256154754457765 | 2.561547544577647 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 16384)-liger_embedding]_speedup | 1.073636395578419 | 1.10205994081131 | 1.0264740887603554 | 2.6474088760355396 | |
| tritonbench_rope_fwd[x_(2048, 2048)-liger_rotary_pos_emb]_speedup | 3.0217040638397648 | 3.1063478403326963 | 1.028011934558996 | 2.8011934558995977 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 8192)-liger_embedding]_speedup | 1.0875763279052024 | 1.1188222148338822 | 1.0287298336006108 | 2.8729833600610766 | |
| tritonbench_welford_fwd[x_7168-test_welford]_speedup | 0.6535596026749115 | 0.6730924202432981 | 1.0298868190268216 | 2.9886819026821554 | |
| tritonbench_addmm_fwd[x_(19747, 512, 1536)-triton_addmm]_speedup | 0.9352686987937017 | 0.9634485501182053 | 1.0301302196479467 | 3.013021964794671 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 4096, 4096, 64)-triton_tutorial_flash_v2]_speedup | 0.8091591199657246 | 0.8339146685645856 | 1.0305941662004743 | 3.059416620047428 | |
| tritonbench_addmm_fwd[x_(35884, 512, 1536)-triton_addmm]_speedup | 0.9643202187036429 | 0.9942899844440457 | 1.0310786449968785 | 3.1078644996878513 | |
| tritonbench_softmax_fwd[x_(4096, 1792)-triton_softmax]_speedup | 4.220402943636216 | 4.355297148975286 | 1.0319623995956289 | 3.1962399595628854 | |
| tritonbench_rope_fwd[x_(8192, 1024)-liger_rotary_pos_emb]_speedup | 2.8334756794862375 | 2.924669544603786 | 1.0321844531003999 | 3.2184453100399857 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 16384, 16384, 64)-triton_tutorial_flash_v2]_speedup | 0.8471208324311976 | 0.874834584203284 | 1.0327152287030283 | 3.2715228703028343 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 1024)-liger_embedding]_speedup | 0.9932156880461214 | 1.0259209119083823 | 1.032928621905479 | 3.292862190547896 | |
| tritonbench_addmm_fwd[x_(35541, 512, 1536)-triton_addmm]_speedup | 0.9864439698779935 | 1.0202419553590478 | 1.0342624482617442 | 3.4262448261744183 | |
| tritonbench_flash_attention_bwd[x_average-triton_tutorial_flash_v2]_speedup | 0.9270976882359618 | 0.9588675771972968 | 1.0342681136674876 | 3.426811366748761 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 32768)-liger_embedding]_speedup | 1.044725786397817 | 1.081650472774965 | 1.0353439025416067 | 3.5343902541606687 | |
| tritonbench_softmax_fwd[x_(4096, 1408)-triton_softmax]_speedup | 3.956780941059147 | 4.09861328542786 | 1.035845387066272 | 3.584538706627205 | |
| tritonbench_gemm_fwd[x_(1664, 1664, 1664)-triton_tutorial_matmul]_speedup | 0.8002662671032473 | 0.8290712869917317 | 1.0359942947398633 | 3.5994294739863264 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_speedup | 28.56819643229996 | 29.63050695034608 | 1.0371850746883358 | 3.718507468833576 | |
| tritonbench_flash_attention_fwd[x_(4, 48, 8192, 8192, 64)-triton_tutorial_flash_v2]_speedup | 0.8033807376466122 | 0.8354446620616406 | 1.0399112437136033 | 3.991124371360333 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 4096)-liger_cross_entropy_loss]_speedup | 0.6066079613770747 | 0.6319600871162818 | 1.041793262458433 | 4.179326245843296 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_speedup | 31.275174513001716 | 32.626690198489314 | 1.0432136896606523 | 4.321368966065231 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 131072)-liger_embedding]_speedup | 0.989763793313489 | 1.0340357278066306 | 1.0447297979500036 | 4.472979795000365 | |
| tritonbench_cross_entropy_fwd[x_(8, 2048, 8192)-liger_cross_entropy_loss]_speedup | 1.0198186000834084 | 1.0666786849424028 | 1.045949431452968 | 4.594943145296804 | |
| tritonbench_welford_fwd[x_8192-test_welford]_speedup | 0.6756810326049216 | 0.7071695152662154 | 1.046602584861525 | 4.660258486152502 | |
| tritonbench_rms_norm_fwd[x_(2048, 32768)-liger_rms]_speedup | 3.486346391622217 | 3.6491844635919373 | 1.0467073703178273 | 4.670737031782735 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 4096)-liger_embedding]_speedup | 1.0583523795810728 | 1.1116333308686848 | 1.0503432999401412 | 5.0343299940141195 | |
| tritonbench_gemm_fwd[x_(1920, 1920, 1920)-triton_tutorial_matmul]_speedup | 0.9384098409927033 | 0.9895349390481165 | 1.0544805646979685 | 5.448056469796847 | |
| tritonbench_softmax_fwd[x_(4096, 768)-triton_softmax]_speedup | 3.646258458506749 | 3.8601893404120466 | 1.058671343334479 | 5.867134333447899 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 4096, 4096, 64)-triton_tutorial_flash_v2]_speedup | 0.7650221036392292 | 0.810132938705313 | 1.0589667080878977 | 5.896670808789772 | |
| tritonbench_softmax_fwd[x_(4096, 1024)-triton_softmax]_speedup | 3.6956520638796606 | 3.927165261853107 | 1.062644749552101 | 6.264474955210098 | |
| tritonbench_embedding_fwd[x_(32, 512, 768, 2048)-liger_embedding]_speedup | 1.0024722067409508 | 1.072351369109586 | 1.0697068326670254 | 6.970683266702538 | |
| tritonbench_softmax_fwd[x_(4096, 384)-triton_softmax]_speedup | 3.5155278874343403 | 3.8394648321585056 | 1.0921446095996061 | 9.214460959960613 | |
| tritonbench_gemm_fwd[x_(256, 256, 256)-triton_tutorial_matmul]_speedup | 0.8652173803010095 | 0.9504504669853755 | 1.0985106039533248 | 9.851060395332478 | |
| tritonbench_flex_attention_fwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_speedup | 30.97561112599094 | 34.16689586844004 | 1.1030257233495342 | 10.30257233495342 | |
| tritonbench_rope_fwd[x_(512, 2048)-liger_rotary_pos_emb]_speedup | 2.7627119388936148 | 3.1051724131009792 | 1.123958082413945 | 12.395808241394501 | |
| tritonbench_rms_norm_bwd[x_average-liger_rms]_speedup | 0.7644359463282543 | 0.8662195324343752 | 1.1331486131637436 | 13.31486131637436 | |
| tritonbench_flash_attention_bwd[x_(4, 48, 128, 128, 64)-triton_tutorial_flash_v2]_speedup | 0.9088146036474087 | 1.0638205973095058 | 1.1705584318737845 | 17.05584318737845 | |
| tritonbench_rms_norm_bwd[x_(2048, 4096)-liger_rms]_speedup | 0.9124579177369803 | 1.0770083158592108 | 1.1803375201459567 | 18.03375201459567 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(128, 8192, 2304)-_triton]_speedup | 0.5843780401856269 | 0.6916099466462786 | 1.1834974949205648 | 18.34974949205648 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4, 13312, 2048)-_triton]_speedup | 0.6565566384407119 | 0.7775446078052627 | 1.1842765152019343 | 18.427651520193432 | |
| tritonbench_rope_bwd[x_(512, 2048)-liger_rotary_pos_emb]_speedup | 2.2880597028756555 | 2.802905985395061 | 1.2250143568685474 | 22.50143568685474 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(4, 4096, 2304)-_triton]_speedup | 0.5023298795703591 | 0.6164705490810757 | 1.2272225367289333 | 22.722253672893334 | |
| tritonbench_fp8_gemm_blockwise_fwd[x_(128, 2304, 6656)-_triton]_speedup | 0.7445652213210681 | 1.0165631522376146 | 1.365311087770049 | 36.531108777004896 | |
| tritonbench_rms_norm_bwd[x_(2048, 1024)-liger_rms]_speedup | 0.4071146095013766 | 0.598566305701288 | 1.470264863337615 | 47.0264863337615 | |
| tritonbench_rms_norm_bwd[x_(2048, 2048)-liger_rms]_speedup | 0.5594948988718965 | 0.8888091723246544 | 1.5885920928264952 | 58.859209282649516 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment