Created
February 17, 2026 13:49
-
-
Save sohaibiftikhar/8922927e42f0e4ba3c126c42f2a96f85 to your computer and use it in GitHub Desktop.
Reproducer#LLVM#149706
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; *** IR Dump After LoopVectorizePass on broadcast_add_fusion *** | |
| ; Function Attrs: nofree norecurse nosync nounwind memory(readwrite, target_mem0: none, target_mem1: none) uwtable | |
| define noalias noundef ptr @broadcast_add_fusion(ptr readonly captures(none) %0) local_unnamed_addr #0 { | |
| %2 = getelementptr inbounds nuw i8, ptr %0, i64 24 | |
| %3 = load ptr, ptr %2, align 8, !invariant.load !3 | |
| %4 = load ptr, ptr %3, align 8, !invariant.load !3, !dereferenceable !4 | |
| %5 = getelementptr inbounds nuw i8, ptr %3, i64 16 | |
| %6 = load ptr, ptr %5, align 8, !invariant.load !3, !dereferenceable !5 | |
| %7 = getelementptr inbounds nuw i8, ptr %3, i64 32 | |
| %8 = load ptr, ptr %7, align 8, !invariant.load !3, !dereferenceable !4 | |
| tail call void @llvm.experimental.noalias.scope.decl(metadata !6) | |
| tail call void @llvm.experimental.noalias.scope.decl(metadata !9) | |
| tail call void @llvm.experimental.noalias.scope.decl(metadata !11) | |
| %9 = load i64, ptr %6, align 4, !invariant.load !3, !alias.scope !9, !noalias !13 | |
| br label %vector.ph | |
| vector.ph: ; preds = %1 | |
| %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %9, i64 0 | |
| %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer | |
| br label %vector.body | |
| vector.body: ; preds = %vector.ph | |
| %wide.load = load <4 x i64>, ptr %4, align 4 | |
| %10 = add <4 x i64> %wide.load, %broadcast.splat | |
| store <4 x i64> %10, ptr %8, align 4 | |
| br label %middle.block | |
| middle.block: ; preds = %vector.body | |
| br label %broadcast_add_fusion_wrapped.exit | |
| broadcast_add_fusion_wrapped.exit: ; preds = %middle.block | |
| ret ptr null | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; *** IR Dump After LoopVectorizePass on broadcast_add_fusion *** | |
| ; Function Attrs: nofree norecurse nosync nounwind memory(readwrite, target_mem0: none, target_mem1: none) uwtable | |
| define noalias noundef ptr @broadcast_add_fusion(ptr readonly captures(none) %0) local_unnamed_addr #0 { | |
| %2 = getelementptr inbounds nuw i8, ptr %0, i64 24 | |
| %3 = load ptr, ptr %2, align 8, !invariant.load !3 | |
| %4 = load ptr, ptr %3, align 8, !invariant.load !3, !dereferenceable !4 | |
| %5 = getelementptr inbounds nuw i8, ptr %3, i64 16 | |
| %6 = load ptr, ptr %5, align 8, !invariant.load !3, !dereferenceable !5 | |
| %7 = getelementptr inbounds nuw i8, ptr %3, i64 32 | |
| %8 = load ptr, ptr %7, align 8, !invariant.load !3, !dereferenceable !4 | |
| tail call void @llvm.experimental.noalias.scope.decl(metadata !6) | |
| tail call void @llvm.experimental.noalias.scope.decl(metadata !9) | |
| tail call void @llvm.experimental.noalias.scope.decl(metadata !11) | |
| %9 = load i64, ptr %6, align 4, !invariant.load !3, !alias.scope !9, !noalias !13 | |
| br label %vector.ph | |
| vector.ph: ; preds = %1 | |
| %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %9, i64 0 | |
| %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer | |
| br label %vector.body | |
| vector.body: ; preds = %vector.ph | |
| %wide.vec = load <16 x i64>, ptr %4, align 4, !invariant.load !3, !alias.scope !6, !noalias !14 | |
| %strided.vec = shufflevector <16 x i64> %wide.vec, <16 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> | |
| %strided.vec2 = shufflevector <16 x i64> %wide.vec, <16 x i64> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> | |
| %strided.vec3 = shufflevector <16 x i64> %wide.vec, <16 x i64> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> | |
| %strided.vec4 = shufflevector <16 x i64> %wide.vec, <16 x i64> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> | |
| %10 = add <4 x i64> %strided.vec, %broadcast.splat | |
| %11 = add <4 x i64> %strided.vec2, %broadcast.splat | |
| %12 = add <4 x i64> %strided.vec3, %broadcast.splat | |
| %13 = add <4 x i64> %strided.vec4, %broadcast.splat | |
| %14 = shufflevector <4 x i64> %10, <4 x i64> %11, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
| %15 = shufflevector <4 x i64> %12, <4 x i64> %13, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | |
| %16 = shufflevector <8 x i64> %14, <8 x i64> %15, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | |
| %interleaved.vec = shufflevector <16 x i64> %16, <16 x i64> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> | |
| store <16 x i64> %interleaved.vec, ptr %8, align 4, !alias.scope !11, !noalias !15 | |
| br label %middle.block | |
| middle.block: ; preds = %vector.body | |
| br label %broadcast_add_fusion_wrapped.exit | |
| broadcast_add_fusion_wrapped.exit: ; preds = %middle.block | |
| ret ptr null | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; ModuleID = '__compute_module_broadcast_add_fusion_kernel_module' | |
| source_filename = "__compute_module_broadcast_add_fusion_kernel_module" | |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | |
| target triple = "x86_64-grtev4-linux-gnu" | |
| %XLA_CPU_KernelCallFrame = type { ptr, ptr, i64, ptr } | |
| %XLA_CPU_KernelArg = type { ptr, i64 } | |
| %kernel_dim3 = type { i64, i64, i64 } | |
| ; Function Attrs: uwtable | |
| define ptr @broadcast_add_fusion(ptr %0) #0 { | |
| %2 = getelementptr inbounds %XLA_CPU_KernelCallFrame, ptr %0, i32 0, i32 3 | |
| %3 = load ptr, ptr %2, align 8, !invariant.load !3 | |
| %4 = getelementptr inbounds %XLA_CPU_KernelArg, ptr %3, i32 0, i32 0 | |
| %5 = load ptr, ptr %4, align 8, !invariant.load !3, !dereferenceable !4 | |
| %6 = getelementptr inbounds %XLA_CPU_KernelArg, ptr %3, i32 1, i32 0 | |
| %7 = load ptr, ptr %6, align 8, !invariant.load !3, !dereferenceable !5 | |
| %8 = getelementptr inbounds %XLA_CPU_KernelArg, ptr %3, i32 2, i32 0 | |
| %9 = load ptr, ptr %8, align 8, !invariant.load !3, !dereferenceable !4 | |
| %10 = getelementptr inbounds %XLA_CPU_KernelCallFrame, ptr %0, i32 0, i32 1 | |
| %11 = load ptr, ptr %10, align 8 | |
| %12 = getelementptr inbounds %kernel_dim3, ptr %11, i32 0, i32 0 | |
| %13 = load i64, ptr %12, align 4, !invariant.load !3 | |
| %14 = getelementptr inbounds %kernel_dim3, ptr %11, i32 0, i32 1 | |
| %15 = load i64, ptr %14, align 4, !invariant.load !3 | |
| %16 = getelementptr inbounds %kernel_dim3, ptr %11, i32 0, i32 2 | |
| %17 = load i64, ptr %16, align 4, !invariant.load !3 | |
| call void @broadcast_add_fusion_wrapped(ptr %5, ptr %7, ptr %9, i64 %13, i64 %15, i64 %17) | |
| ret ptr null | |
| } | |
| ; Function Attrs: alwaysinline | |
| define internal void @broadcast_add_fusion_wrapped(ptr noalias align 32 dereferenceable(128) %0, ptr noalias align 32 dereferenceable(8) %1, ptr noalias align 32 dereferenceable(128) %2, i64 %3, i64 %4, i64 %5) #1 { | |
| %7 = getelementptr inbounds [1 x i64], ptr %1, i32 0, i32 0 | |
| %8 = load i64, ptr %7, align 4, !invariant.load !3 | |
| br label %9 | |
| 9: ; preds = %24, %6 | |
| %10 = phi i64 [ %25, %24 ], [ 0, %6 ] | |
| %11 = icmp slt i64 %10, 4 | |
| br i1 %11, label %12, label %26 | |
| 12: ; preds = %9 | |
| %13 = mul nsw i64 %10, 4 | |
| br label %14 | |
| 14: ; preds = %17, %12 | |
| %15 = phi i64 [ %23, %17 ], [ 0, %12 ] | |
| %16 = icmp slt i64 %15, 4 | |
| br i1 %16, label %17, label %24 | |
| 17: ; preds = %14 | |
| %18 = add nsw i64 %13, %15 | |
| %19 = getelementptr inbounds [16 x i64], ptr %0, i32 0, i64 %18 | |
| %20 = load i64, ptr %19, align 4, !invariant.load !3 | |
| %21 = add i64 %20, %8 | |
| %22 = getelementptr inbounds [16 x i64], ptr %2, i32 0, i64 %18 | |
| store i64 %21, ptr %22, align 4 | |
| %23 = add i64 %15, 1 | |
| br label %14 | |
| 24: ; preds = %14 | |
| %25 = add i64 %10, 1 | |
| br label %9, !llvm.loop !6 | |
| 26: ; preds = %9 | |
| ret void | |
| } | |
| attributes #0 = { uwtable "frame-pointer"="all" "prefer-vector-width"="256" } | |
| attributes #1 = { alwaysinline } | |
| !llvm.module.flags = !{!0, !1} | |
| !xla_cpu_memory_region_name = !{!2} | |
| !0 = !{i32 2, !"Debug Info Version", i32 3} | |
| !1 = !{i32 1, !"xla_dylib_index", i64 0} | |
| !2 = !{!"xla_cpu_emitter__loop_fusion_kernel_emitter__hlo_opcode__fusion"} | |
| !3 = !{} | |
| !4 = !{i64 128} | |
| !5 = !{i64 8} | |
| !6 = distinct !{!6, !7} | |
| !7 = !{!"llvm.loop.unroll.disable"} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment