Last active
September 21, 2023 15:05
-
-
Save banach-space/3e2c8154a19d075efb609a03f25bd743 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // -----// IR Dump After CleanupBufferAllocView (iree-codegen-cleanup-buffer-alloc-view) //----- // | |
| func.func @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32() { | |
| %c0_i32 = arith.constant 0 : i32 | |
| %c10 = arith.constant 10 : index | |
| %c20 = arith.constant 20 : index | |
| %c0 = arith.constant 0 : index | |
| %c1 = arith.constant 1 : index | |
| %c2 = arith.constant 2 : index | |
| %c5 = arith.constant 5 : index | |
| %c3 = arith.constant 3 : index | |
| %c9 = arith.constant 9 : index | |
| %cst = arith.constant dense<0> : vector<1xi32> | |
| %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %alloca_0 = memref.alloca() {alignment = 64 : i64} : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
| memref.assume_alignment %0, 64 : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
| memref.assume_alignment %1, 64 : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
| memref.assume_alignment %2, 64 : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index | |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index | |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index | |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index | |
| %3 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_id_y] | |
| %4 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_count_y] | |
| %5 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_id_x] | |
| %6 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_count_x] | |
| scf.for %arg0 = %3 to %c10 step %4 { | |
| scf.for %arg1 = %5 to %c20 step %6 { | |
| %subview = memref.subview %2[0, %arg0, %arg1, 0] [1, 2, 5, 1] [1, 1, 1, 1] : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_1 = memref.subview %0[0, %arg0, %arg1, 0] [1, 2, 13, 1] [1, 1, 1, 1] : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| scf.for %arg2 = %c0 to %c2 step %c1 { | |
| scf.for %arg3 = %c0 to %c5 step %c1 { | |
| %subview_2 = memref.subview %subview_1[0, %arg2, %arg3, 0] [1, 1, 9, 1] [1, 1, 1, 1] : memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_3 = memref.subview %subview[0, %arg2, %arg3, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| vector.transfer_write %cst, %subview_3[%c0, %c0, %c0, %c0] {in_bounds = [true]} : vector<1xi32>, memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_4 = memref.subview %subview_3[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %cast = memref.cast %subview_4 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %7 = scf.for %arg4 = %c0 to %c9 step %c3 iter_args(%arg5 = %cast) -> (memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
| %subview_5 = memref.subview %subview_2[0, 0, %arg4, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_6 = memref.subview %1[0, %arg4, 0] [1, 3, 1] [1, 1, 1] : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_7 = memref.subview %subview_5[0, 0, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_8 = memref.subview %subview_6[0, 0, 0] [1, 3, 1] [1, 1, 1] : memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview_7 : memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca_0 : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
| ^bb0(%in: i32, %out: i32): | |
| linalg.yield %in : i32 | |
| } | |
| %collapse_shape = memref.collapse_shape %alloca_0 [[0, 1, 2]] : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> into memref<3xi32, #hal.descriptor_type<storage_buffer>> | |
| %collapse_shape_9 = memref.collapse_shape %subview_8 [[0, 1]] : memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> into memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg5 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
| ^bb0(%in: i32, %out: i32): | |
| linalg.yield %in : i32 | |
| } | |
| %collapse_shape_10 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> into memref<1xi32, #hal.descriptor_type<storage_buffer>> | |
| %8 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
| %9 = vector.transfer_read %collapse_shape_9[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
| %10 = vector.transfer_read %collapse_shape_10[%c0], %c0_i32 {in_bounds = [true]} : memref<1xi32, #hal.descriptor_type<storage_buffer>>, vector<1xi32> | |
| %11 = vector.extract_strided_slice %8 {offsets = [0], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
| %12 = vector.extract_strided_slice %8 {offsets = [1], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
| %13 = vector.extract_strided_slice %8 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
| %14 = vector.extract %9[0] : vector<3xi32> | |
| %15 = vector.extract %9[1] : vector<3xi32> | |
| %16 = vector.extract %9[2] : vector<3xi32> | |
| %17 = vector.outerproduct %11, %14, %10 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
| %18 = vector.outerproduct %12, %15, %17 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
| %19 = vector.outerproduct %13, %16, %18 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
| vector.transfer_write %19, %collapse_shape_10[%c0] {in_bounds = [true]} : vector<1xi32>, memref<1xi32, #hal.descriptor_type<storage_buffer>> | |
| %cast_11 = memref.cast %alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| scf.yield %cast_11 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| } | |
| linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%7 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_4 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
| ^bb0(%in: i32, %out: i32): | |
| linalg.yield %in : i32 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return | |
| } | |
| // -----// IR Dump After OptimizeVectorTransfer (iree-codegen-optimize-vector-transfer) //----- // | |
| func.func @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32() { | |
| %c0_i32 = arith.constant 0 : i32 | |
| %c10 = arith.constant 10 : index | |
| %c20 = arith.constant 20 : index | |
| %c0 = arith.constant 0 : index | |
| %c1 = arith.constant 1 : index | |
| %c2 = arith.constant 2 : index | |
| %c5 = arith.constant 5 : index | |
| %c3 = arith.constant 3 : index | |
| %c9 = arith.constant 9 : index | |
| %cst = arith.constant dense<0> : vector<1xi32> | |
| %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %alloca_0 = memref.alloca() {alignment = 64 : i64} : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
| memref.assume_alignment %0, 64 : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
| memref.assume_alignment %1, 64 : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
| memref.assume_alignment %2, 64 : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index | |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index | |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index | |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index | |
| %3 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_id_y] | |
| %4 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_count_y] | |
| %5 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_id_x] | |
| %6 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_count_x] | |
| %collapse_shape = memref.collapse_shape %alloca_0 [[0, 1, 2]] : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> into memref<3xi32, #hal.descriptor_type<storage_buffer>> | |
| %collapse_shape_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> into memref<1xi32, #hal.descriptor_type<storage_buffer>> | |
| %cast = memref.cast %alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview = memref.subview %collapse_shape_1[0] [1] [1] : memref<1xi32, #hal.descriptor_type<storage_buffer>> to memref<i32, #hal.descriptor_type<storage_buffer>> | |
| %7 = vector.transfer_read %subview[], %c0_i32 : memref<i32, #hal.descriptor_type<storage_buffer>>, vector<i32> | |
| %8 = vector.shape_cast %7 : vector<i32> to vector<1xi32> | |
| %9 = scf.for %arg0 = %3 to %c10 step %4 iter_args(%arg1 = %8) -> (vector<1xi32>) { | |
| %11 = scf.for %arg2 = %5 to %c20 step %6 iter_args(%arg3 = %arg1) -> (vector<1xi32>) { | |
| %subview_3 = memref.subview %2[0, %arg0, %arg2, 0] [1, 2, 5, 1] [1, 1, 1, 1] : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_4 = memref.subview %0[0, %arg0, %arg2, 0] [1, 2, 13, 1] [1, 1, 1, 1] : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %12 = scf.for %arg4 = %c0 to %c2 step %c1 iter_args(%arg5 = %arg3) -> (vector<1xi32>) { | |
| %13 = scf.for %arg6 = %c0 to %c5 step %c1 iter_args(%arg7 = %arg5) -> (vector<1xi32>) { | |
| %subview_5 = memref.subview %subview_4[0, %arg4, %arg6, 0] [1, 1, 9, 1] [1, 1, 1, 1] : memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_6 = memref.subview %subview_3[0, %arg4, %arg6, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_7 = memref.subview %subview_6[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<i32, strided<[], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %14 = vector.shape_cast %cst : vector<1xi32> to vector<i32> | |
| vector.transfer_write %14, %subview_7[] : vector<i32>, memref<i32, strided<[], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_8 = memref.subview %subview_6[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %cast_9 = memref.cast %subview_8 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %15:2 = scf.for %arg8 = %c0 to %c9 step %c3 iter_args(%arg9 = %cast_9, %arg10 = %arg7) -> (memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<1xi32>) { | |
| %subview_10 = memref.subview %subview_5[0, 0, %arg8, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_11 = memref.subview %1[0, %arg8, 0] [1, 3, 1] [1, 1, 1] : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_12 = memref.subview %subview_10[0, 0, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| %subview_13 = memref.subview %subview_11[0, 0, 0] [1, 3, 1] [1, 1, 1] : memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview_12 : memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca_0 : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
| ^bb0(%in: i32, %out: i32): | |
| linalg.yield %in : i32 | |
| } | |
| %collapse_shape_14 = memref.collapse_shape %subview_13 [[0, 1]] : memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> into memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
| linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg9 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
| ^bb0(%in: i32, %out: i32): | |
| linalg.yield %in : i32 | |
| } | |
| %16 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
| %17 = vector.transfer_read %collapse_shape_14[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
| %18 = vector.extract_strided_slice %16 {offsets = [0], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
| %19 = vector.extract_strided_slice %16 {offsets = [1], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
| %20 = vector.extract_strided_slice %16 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
| %21 = vector.extract %17[0] : vector<3xi32> | |
| %22 = vector.extract %17[1] : vector<3xi32> | |
| %23 = vector.extract %17[2] : vector<3xi32> | |
| %24 = vector.outerproduct %18, %21, %arg10 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
| %25 = vector.outerproduct %19, %22, %24 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
| %26 = vector.outerproduct %20, %23, %25 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
| scf.yield %cast, %26 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<1xi32> | |
| } | |
| linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%15#0 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_8 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
| ^bb0(%in: i32, %out: i32): | |
| linalg.yield %in : i32 | |
| } | |
| scf.yield %15#1 : vector<1xi32> | |
| } | |
| scf.yield %13 : vector<1xi32> | |
| } | |
| scf.yield %12 : vector<1xi32> | |
| } | |
| scf.yield %11 : vector<1xi32> | |
| } | |
| %subview_2 = memref.subview %collapse_shape_1[0] [1] [1] : memref<1xi32, #hal.descriptor_type<storage_buffer>> to memref<i32, #hal.descriptor_type<storage_buffer>> | |
| %10 = vector.shape_cast %9 : vector<1xi32> to vector<i32> | |
| vector.transfer_write %10, %subview_2[] : vector<i32>, memref<i32, #hal.descriptor_type<storage_buffer>> | |
| return | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; ModuleID = 'files/module_pipeline_dispatch_0_embedded_elf_arm_64.codegen.bc' | |
| source_filename = "pipeline_dispatch_0" | |
| target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | |
| target triple = "aarch64-unknown-unknown-eabi-elf" | |
| %iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 } | |
| %iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 } | |
| %iree_hal_executable_src_loc_v0_t = type { i32, i32, ptr } | |
| %iree_hal_executable_library_v0_t = type { ptr, %iree_hal_executable_import_table_v0_t, %iree_hal_executable_export_table_v0_t, %iree_hal_executable_constant_table_v0_t } | |
| %iree_hal_executable_import_table_v0_t = type { i32, ptr } | |
| %iree_hal_executable_export_table_v0_t = type { i32, ptr, ptr, ptr, ptr, ptr } | |
| %iree_hal_executable_constant_table_v0_t = type { i32 } | |
| %iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr } | |
| %iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 } | |
| @0 = private constant [20 x i8] c"pipeline_dispatch_0\00", align 1 | |
| @iree_hal_executable_library_query_v0_header = private constant %iree_hal_executable_library_header_t { i32 3, ptr @0, i32 0, i32 0 } | |
| @iree_hal_executable_library_query_v0_funcs = private constant [1 x ptr] [ptr @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32] | |
| @iree_hal_executable_library_query_v0_attrs = private constant [1 x %iree_hal_executable_dispatch_attrs_v0_t] zeroinitializer | |
| @1 = private constant [65 x i8] c"pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32\00", align 1 | |
| @iree_hal_executable_library_query_v0_names = private constant [1 x ptr] [ptr @1] | |
| @2 = private constant [1 x i8] zeroinitializer, align 1 | |
| @iree_hal_executable_library_query_v0_tags = private constant [1 x ptr] [ptr @2] | |
| @3 = private constant [64 x i8] c"/home/andwar02/work/VOSA//test_standalone_ops/conv2d_plain.mlir\00", align 1 | |
| @iree_hal_executable_library_query_v0_src_locs = private constant [1 x %iree_hal_executable_src_loc_v0_t] [%iree_hal_executable_src_loc_v0_t { i32 2, i32 63, ptr @3 }] | |
| @iree_hal_executable_library_query_v0 = private constant %iree_hal_executable_library_v0_t { ptr @iree_hal_executable_library_query_v0_header, %iree_hal_executable_import_table_v0_t zeroinitializer, %iree_hal_executable_export_table_v0_t { i32 1, ptr @iree_hal_executable_library_query_v0_funcs, ptr @iree_hal_executable_library_query_v0_attrs, ptr @iree_hal_executable_library_query_v0_names, ptr @iree_hal_executable_library_query_v0_tags, ptr @iree_hal_executable_library_query_v0_src_locs }, %iree_hal_executable_constant_table_v0_t zeroinitializer } | |
| declare ptr @malloc(i64) #0 | |
| declare void @free(ptr) #0 | |
| define internal i32 @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !3 { | |
| %4 = alloca i32, i64 1, align 64, !dbg !79 | |
| %5 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %4, 0, !dbg !79 | |
| %6 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %5, ptr %4, 1, !dbg !79 | |
| %7 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %6, i64 0, 2, !dbg !79 | |
| %8 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %7, i64 1, 3, 0, !dbg !79 | |
| %9 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %8, i64 1, 3, 1, !dbg !79 | |
| %10 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %9, i64 1, 3, 2, !dbg !79 | |
| %11 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %10, i64 1, 4, 0, !dbg !79 | |
| %12 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %11, i64 1, 4, 1, !dbg !79 | |
| %13 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %12, i64 1, 4, 2, !dbg !79 | |
| %14 = alloca i32, i64 3, align 64, !dbg !79 | |
| %15 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
| %16 = extractvalue %iree_hal_executable_dispatch_state_v0_t %15, 10, !dbg !80 | |
| %17 = load ptr, ptr %16, align 8, !dbg !80 | |
| %18 = ptrtoint ptr %17 to i64, !dbg !80 | |
| %19 = and i64 %18, 63, !dbg !80 | |
| %20 = icmp eq i64 %19, 0, !dbg !80 | |
| call void @llvm.assume(i1 %20), !dbg !80 | |
| %21 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
| %22 = extractvalue %iree_hal_executable_dispatch_state_v0_t %21, 10, !dbg !80 | |
| %23 = getelementptr ptr, ptr %22, i32 1, !dbg !80 | |
| %24 = load ptr, ptr %23, align 8, !dbg !80 | |
| %25 = ptrtoint ptr %24 to i64, !dbg !80 | |
| %26 = and i64 %25, 63, !dbg !80 | |
| %27 = icmp eq i64 %26, 0, !dbg !80 | |
| call void @llvm.assume(i1 %27), !dbg !80 | |
| %28 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !79 | |
| %29 = extractvalue %iree_hal_executable_dispatch_state_v0_t %28, 10, !dbg !79 | |
| %30 = getelementptr ptr, ptr %29, i32 2, !dbg !79 | |
| %31 = load ptr, ptr %30, align 8, !dbg !79 | |
| %32 = ptrtoint ptr %31 to i64, !dbg !79 | |
| %33 = and i64 %32, 63, !dbg !79 | |
| %34 = icmp eq i64 %33, 0, !dbg !79 | |
| call void @llvm.assume(i1 %34), !dbg !79 | |
| %35 = load %iree_hal_executable_workgroup_state_v0_t, ptr %2, align 8, !dbg !79 | |
| %36 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 0, !dbg !79 | |
| %37 = zext i32 %36 to i64, !dbg !79 | |
| %38 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 1, !dbg !79 | |
| %39 = zext i32 %38 to i64, !dbg !79 | |
| %40 = getelementptr i32, ptr %4, i64 0, !dbg !79 | |
| %41 = load i32, ptr %40, align 4, !dbg !79 | |
| %42 = insertelement <1 x i32> undef, i32 %41, i32 0, !dbg !79 | |
| %43 = extractelement <1 x i32> %42, i64 0, !dbg !79 | |
| %44 = insertelement <1 x i32> zeroinitializer, i32 %43, i64 0, !dbg !79 | |
| br label %45, !dbg !79 | |
| 45: ; preds = %159, %3 | |
| %46 = phi i64 [ %160, %159 ], [ 0, %3 ] | |
| %47 = phi <1 x i32> [ %51, %159 ], [ %44, %3 ] | |
| %48 = icmp slt i64 %46, 2, !dbg !79 | |
| br i1 %48, label %49, label %161, !dbg !79 | |
| 49: ; preds = %144, %45 | |
| %50 = phi i64 [ %158, %144 ], [ 0, %45 ] | |
| %51 = phi <1 x i32> [ %80, %144 ], [ %47, %45 ] | |
| %52 = icmp slt i64 %50, 5, !dbg !79 | |
| br i1 %52, label %53, label %159, !dbg !79 | |
| 53: ; preds = %49 | |
| %54 = mul i64 %39, 2, !dbg !79 | |
| %55 = add i64 %46, %54, !dbg !79 | |
| %56 = mul i64 %37, 5, !dbg !79 | |
| %57 = add i64 %50, %56, !dbg !79 | |
| %58 = mul i64 %55, 20, !dbg !79 | |
| %59 = add i64 0, %58, !dbg !79 | |
| %60 = add i64 %59, %57, !dbg !79 | |
| %61 = add i64 %60, 0, !dbg !79 | |
| %62 = getelementptr i32, ptr %31, i64 %61, !dbg !79 | |
| store i32 0, ptr %62, align 4, !dbg !79 | |
| %63 = mul i64 %46, 20, !dbg !79 | |
| %64 = mul i64 %39, 40, !dbg !79 | |
| %65 = add i64 %63, %64, !dbg !79 | |
| %66 = add i64 %65, %50, !dbg !79 | |
| %67 = add i64 %66, %56, !dbg !79 | |
| %68 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %31, 0, !dbg !79 | |
| %69 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %68, ptr %31, 1, !dbg !79 | |
| %70 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %69, i64 %67, 2, !dbg !79 | |
| %71 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %70, i64 1, 3, 0, !dbg !79 | |
| %72 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %71, i64 200, 4, 0, !dbg !79 | |
| %73 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, i64 1, 3, 1, !dbg !79 | |
| %74 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %73, i64 20, 4, 1, !dbg !79 | |
| %75 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %74, i64 1, 3, 2, !dbg !79 | |
| %76 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %75, i64 1, 4, 2, !dbg !79 | |
| br label %77, !dbg !79 | |
| 77: ; preds = %99, %53 | |
| %78 = phi i64 [ %143, %99 ], [ 0, %53 ] | |
| %79 = phi { ptr, ptr, i64, [3 x i64], [3 x i64] } [ %13, %99 ], [ %76, %53 ] | |
| %80 = phi <1 x i32> [ %142, %99 ], [ %51, %53 ] | |
| %81 = icmp slt i64 %78, 9, !dbg !79 | |
| br i1 %81, label %82, label %144, !dbg !79 | |
| 82: ; preds = %85, %77 | |
| %83 = phi i64 [ %98, %85 ], [ 0, %77 ] | |
| %84 = icmp slt i64 %83, 3, !dbg !79 | |
| br i1 %84, label %85, label %99, !dbg !79 | |
| 85: ; preds = %82 | |
| %86 = add i64 %56, %50, !dbg !79 | |
| %87 = add i64 %86, %78, !dbg !79 | |
| %88 = add i64 %87, %83, !dbg !79 | |
| %89 = mul i64 %55, 28, !dbg !79 | |
| %90 = add i64 0, %89, !dbg !79 | |
| %91 = add i64 %90, %88, !dbg !79 | |
| %92 = add i64 %91, 0, !dbg !79 | |
| %93 = getelementptr i32, ptr %17, i64 %92, !dbg !79 | |
| %94 = load i32, ptr %93, align 4, !dbg !79 | |
| %95 = add i64 0, %83, !dbg !79 | |
| %96 = add i64 %95, 0, !dbg !79 | |
| %97 = getelementptr i32, ptr %14, i64 %96, !dbg !79 | |
| store i32 %94, ptr %97, align 4, !dbg !79 | |
| %98 = add i64 %83, 1, !dbg !79 | |
| br label %82, !dbg !79 | |
| 99: ; preds = %82 | |
| %100 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 1, !dbg !79 | |
| %101 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 2, !dbg !79 | |
| %102 = getelementptr i32, ptr %100, i64 %101, !dbg !79 | |
| %103 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 0, !dbg !79 | |
| %104 = mul i64 %103, 0, !dbg !79 | |
| %105 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 1, !dbg !79 | |
| %106 = mul i64 %105, 0, !dbg !79 | |
| %107 = add i64 %104, %106, !dbg !79 | |
| %108 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 2, !dbg !79 | |
| %109 = mul i64 %108, 0, !dbg !79 | |
| %110 = add i64 %107, %109, !dbg !79 | |
| %111 = getelementptr i32, ptr %102, i64 %110, !dbg !79 | |
| %112 = load i32, ptr %111, align 4, !dbg !79 | |
| store i32 %112, ptr %40, align 4, !dbg !79 | |
| %113 = load <3 x i32>, ptr %14, align 4, !dbg !79 | |
| %114 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> zeroinitializer, !dbg !79 | |
| %115 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> <i32 1>, !dbg !79 | |
| %116 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> <i32 2>, !dbg !79 | |
| %117 = add i64 0, %78, !dbg !79 | |
| %118 = add i64 %117, 0, !dbg !79 | |
| %119 = getelementptr i32, ptr %24, i64 %118, !dbg !79 | |
| %120 = load i32, ptr %119, align 4, !dbg !79 | |
| %121 = add i64 %78, 1, !dbg !79 | |
| %122 = add i64 0, %121, !dbg !79 | |
| %123 = add i64 %122, 0, !dbg !79 | |
| %124 = getelementptr i32, ptr %24, i64 %123, !dbg !79 | |
| %125 = load i32, ptr %124, align 4, !dbg !79 | |
| %126 = add i64 %78, 2, !dbg !79 | |
| %127 = add i64 0, %126, !dbg !79 | |
| %128 = add i64 %127, 0, !dbg !79 | |
| %129 = getelementptr i32, ptr %24, i64 %128, !dbg !79 | |
| %130 = load i32, ptr %129, align 4, !dbg !79 | |
| %131 = insertelement <1 x i32> undef, i32 %120, i32 0, !dbg !79 | |
| %132 = shufflevector <1 x i32> %131, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
| %133 = mul <1 x i32> %114, %132, !dbg !79 | |
| %134 = add <1 x i32> %133, %80, !dbg !79 | |
| %135 = insertelement <1 x i32> undef, i32 %125, i32 0, !dbg !79 | |
| %136 = shufflevector <1 x i32> %135, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
| %137 = mul <1 x i32> %115, %136, !dbg !79 | |
| %138 = add <1 x i32> %137, %134, !dbg !79 | |
| %139 = insertelement <1 x i32> undef, i32 %130, i32 0, !dbg !79 | |
| %140 = shufflevector <1 x i32> %139, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
| %141 = mul <1 x i32> %116, %140, !dbg !79 | |
| %142 = add <1 x i32> %141, %138, !dbg !79 | |
| %143 = add i64 %78, 3, !dbg !79 | |
| br label %77, !dbg !79 | |
| 144: ; preds = %77 | |
| %145 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 1, !dbg !79 | |
| %146 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 2, !dbg !79 | |
| %147 = getelementptr i32, ptr %145, i64 %146, !dbg !79 | |
| %148 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 0, !dbg !79 | |
| %149 = mul i64 %148, 0, !dbg !79 | |
| %150 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 1, !dbg !79 | |
| %151 = mul i64 %150, 0, !dbg !79 | |
| %152 = add i64 %149, %151, !dbg !79 | |
| %153 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 2, !dbg !79 | |
| %154 = mul i64 %153, 0, !dbg !79 | |
| %155 = add i64 %152, %154, !dbg !79 | |
| %156 = getelementptr i32, ptr %147, i64 %155, !dbg !79 | |
| %157 = load i32, ptr %156, align 4, !dbg !79 | |
| store i32 %157, ptr %62, align 4, !dbg !79 | |
| %158 = add i64 %50, 1, !dbg !79 | |
| br label %49, !dbg !79 | |
| 159: ; preds = %49 | |
| %160 = add i64 %46, 1, !dbg !79 | |
| br label %45, !dbg !79 | |
| 161: ; preds = %45 | |
| %162 = extractelement <1 x i32> %47, i64 0, !dbg !79 | |
| %163 = insertelement <1 x i32> zeroinitializer, i32 %162, i64 0, !dbg !79 | |
| %164 = extractelement <1 x i32> %163, i64 0, !dbg !79 | |
| store i32 %164, ptr %40, align 4, !dbg !79 | |
| ret i32 0, !dbg !79 | |
| } | |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) | |
| declare void @llvm.assume(i1 noundef) #1 | |
| ; Function Attrs: uwtable | |
| define dso_local dllexport ptr @iree_hal_executable_library_query(i32 %0, ptr %1) #2 { | |
| entry: | |
| %2 = icmp eq i32 %0, 3 | |
| %3 = select i1 %2, ptr @iree_hal_executable_library_query_v0, ptr null | |
| ret ptr %3 | |
| } | |
| attributes #0 = { "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" } | |
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | |
| attributes #2 = { uwtable "nonlazybind" } | |
| !llvm.module.flags = !{!0} | |
| !llvm.dbg.cu = !{!1} | |
| !0 = !{i32 2, !"Debug Info Version", i32 3} | |
| !1 = distinct !DICompileUnit(language: DW_LANG_C17, file: !2, producer: "IREE", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) | |
| !2 = !DIFile(filename: "-", directory: "") | |
| !3 = distinct !DISubprogram(name: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", linkageName: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", scope: !2, file: !2, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) | |
| !4 = !DISubroutineType(cc: DW_CC_normal, types: !5) | |
| !5 = !{!6, !7, !38, !67} | |
| !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) | |
| !7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) | |
| !8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9) | |
| !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_environment_v0_t", baseType: !10) | |
| !10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_environment_v0_t", scope: !11, file: !11, line: 246, size: 768, elements: !12) | |
| !11 = !DIFile(filename: "runtime/src/iree/hal/local/executable_library.h", directory: ".") | |
| !12 = !{!13, !21, !24, !27, !29} | |
| !13 = !DIDerivedType(tag: DW_TAG_member, name: "constants", baseType: !14, size: 64) | |
| !14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) | |
| !15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16) | |
| !16 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !17, size: 2048, elements: !19) | |
| !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", baseType: !18) | |
| !18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) | |
| !19 = !{!20} | |
| !20 = !DISubrange(count: 64) | |
| !21 = !DIDerivedType(tag: DW_TAG_member, name: "import_thunk", baseType: !22, size: 64, offset: 64) | |
| !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) | |
| !23 = !DIBasicType(name: "void", encoding: DW_ATE_address) | |
| !24 = !DIDerivedType(tag: DW_TAG_member, name: "import_funcs", baseType: !25, size: 64, offset: 128) | |
| !25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64) | |
| !26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22) | |
| !27 = !DIDerivedType(tag: DW_TAG_member, name: "import_contexts", baseType: !28, size: 64, offset: 192) | |
| !28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64) | |
| !29 = !DIDerivedType(tag: DW_TAG_member, name: "processor", baseType: !30, offset: 256) | |
| !30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_processor_v0_t", scope: !11, file: !11, line: 227, size: 512, elements: !31) | |
| !31 = !{!32} | |
| !32 = !DIDerivedType(tag: DW_TAG_member, name: "data", baseType: !33) | |
| !33 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !34, size: 512, elements: !36) | |
| !34 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", baseType: !35) | |
| !35 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) | |
| !36 = !{!37} | |
| !37 = !DISubrange(count: 8) | |
| !38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 64) | |
| !39 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !40) | |
| !40 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_dispatch_state_v0_t", baseType: !41) | |
| !41 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_dispatch_state_v0_t", scope: !11, file: !11, line: 275, size: 384, elements: !42) | |
| !42 = !{!43, !44, !45, !48, !49, !50, !51, !52, !55, !56, !57, !62} | |
| !43 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_x", baseType: !17, size: 32) | |
| !44 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_y", baseType: !17, size: 32, offset: 32) | |
| !45 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_z", baseType: !46, size: 16, offset: 64) | |
| !46 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", baseType: !47) | |
| !47 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) | |
| !48 = !DIDerivedType(tag: DW_TAG_member, name: "push_constant_count", baseType: !46, size: 16, offset: 80) | |
| !49 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_x", baseType: !17, size: 32, offset: 96) | |
| !50 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_y", baseType: !17, size: 32, offset: 128) | |
| !51 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_z", baseType: !46, size: 16, offset: 160) | |
| !52 = !DIDerivedType(tag: DW_TAG_member, name: "max_concurrency", baseType: !53, size: 8, offset: 176) | |
| !53 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", baseType: !54) | |
| !54 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) | |
| !55 = !DIDerivedType(tag: DW_TAG_member, name: "binding_count", baseType: !53, size: 8, offset: 184) | |
| !56 = !DIDerivedType(tag: DW_TAG_member, name: "push_constants", baseType: !14, size: 64, offset: 192) | |
| !57 = !DIDerivedType(tag: DW_TAG_member, name: "binding_ptrs", baseType: !58, size: 64, offset: 256) | |
| !58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 64) | |
| !59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60) | |
| !60 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !61, size: 4096, elements: !19) | |
| !61 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !53, size: 64) | |
| !62 = !DIDerivedType(tag: DW_TAG_member, name: "binding_lengths", baseType: !63, size: 64, offset: 320) | |
| !63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !64, size: 64) | |
| !64 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !65) | |
| !65 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !66, size: 4096, elements: !19) | |
| !66 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", baseType: !34) | |
| !67 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !68, size: 64) | |
| !68 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !69) | |
| !69 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_workgroup_state_v0_t", baseType: !70) | |
| !70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_workgroup_state_v0_t", scope: !11, file: !11, line: 321, size: 256, elements: !71) | |
| !71 = !{!72, !73, !74, !75, !76, !77, !78} | |
| !72 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_x", baseType: !17, size: 32) | |
| !73 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_y", baseType: !17, size: 32, offset: 32) | |
| !74 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_z", baseType: !46, size: 16, offset: 64) | |
| !75 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", baseType: !46, size: 16, offset: 80) | |
| !76 = !DIDerivedType(tag: DW_TAG_member, name: "processor_id", baseType: !17, size: 32, offset: 96) | |
| !77 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory", baseType: !22, size: 64, offset: 128) | |
| !78 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory_size", baseType: !17, size: 32, offset: 192) | |
| !79 = !DILocation(line: 7, column: 10, scope: !3, inlinedAt: !80) | |
| !80 = !DILocation(line: 2, column: 3, scope: !3) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; ModuleID = 'files/module_pipeline_dispatch_0_embedded_elf_arm_64.codegen.bc' | |
| source_filename = "pipeline_dispatch_0" | |
| target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | |
| target triple = "aarch64-unknown-unknown-eabi-elf" | |
| %iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 } | |
| %iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 } | |
| %iree_hal_executable_src_loc_v0_t = type { i32, i32, ptr } | |
| %iree_hal_executable_library_v0_t = type { ptr, %iree_hal_executable_import_table_v0_t, %iree_hal_executable_export_table_v0_t, %iree_hal_executable_constant_table_v0_t } | |
| %iree_hal_executable_import_table_v0_t = type { i32, ptr } | |
| %iree_hal_executable_export_table_v0_t = type { i32, ptr, ptr, ptr, ptr, ptr } | |
| %iree_hal_executable_constant_table_v0_t = type { i32 } | |
| %iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr } | |
| %iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 } | |
| @0 = private constant [20 x i8] c"pipeline_dispatch_0\00", align 1 | |
| @iree_hal_executable_library_query_v0_header = private constant %iree_hal_executable_library_header_t { i32 3, ptr @0, i32 0, i32 0 } | |
| @iree_hal_executable_library_query_v0_funcs = private constant [1 x ptr] [ptr @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32] | |
| @iree_hal_executable_library_query_v0_attrs = private constant [1 x %iree_hal_executable_dispatch_attrs_v0_t] zeroinitializer | |
| @1 = private constant [65 x i8] c"pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32\00", align 1 | |
| @iree_hal_executable_library_query_v0_names = private constant [1 x ptr] [ptr @1] | |
| @2 = private constant [1 x i8] zeroinitializer, align 1 | |
| @iree_hal_executable_library_query_v0_tags = private constant [1 x ptr] [ptr @2] | |
| @3 = private constant [64 x i8] c"/home/andwar02/work/VOSA//test_standalone_ops/conv2d_plain.mlir\00", align 1 | |
| @iree_hal_executable_library_query_v0_src_locs = private constant [1 x %iree_hal_executable_src_loc_v0_t] [%iree_hal_executable_src_loc_v0_t { i32 2, i32 63, ptr @3 }] | |
| @iree_hal_executable_library_query_v0 = private constant %iree_hal_executable_library_v0_t { ptr @iree_hal_executable_library_query_v0_header, %iree_hal_executable_import_table_v0_t zeroinitializer, %iree_hal_executable_export_table_v0_t { i32 1, ptr @iree_hal_executable_library_query_v0_funcs, ptr @iree_hal_executable_library_query_v0_attrs, ptr @iree_hal_executable_library_query_v0_names, ptr @iree_hal_executable_library_query_v0_tags, ptr @iree_hal_executable_library_query_v0_src_locs }, %iree_hal_executable_constant_table_v0_t zeroinitializer } | |
| declare ptr @malloc(i64) #0 | |
| declare void @free(ptr) #0 | |
| define internal i32 @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !3 { | |
| %4 = alloca i32, i64 1, align 64, !dbg !79 | |
| %5 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %4, 0, !dbg !79 | |
| %6 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %5, ptr %4, 1, !dbg !79 | |
| %7 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %6, i64 0, 2, !dbg !79 | |
| %8 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %7, i64 1, 3, 0, !dbg !79 | |
| %9 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %8, i64 1, 3, 1, !dbg !79 | |
| %10 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %9, i64 1, 3, 2, !dbg !79 | |
| %11 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %10, i64 1, 4, 0, !dbg !79 | |
| %12 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %11, i64 1, 4, 1, !dbg !79 | |
| %13 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %12, i64 1, 4, 2, !dbg !79 | |
| %14 = alloca i32, i64 3, align 64, !dbg !79 | |
| %15 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
| %16 = extractvalue %iree_hal_executable_dispatch_state_v0_t %15, 10, !dbg !80 | |
| %17 = load ptr, ptr %16, align 8, !dbg !80 | |
| %18 = ptrtoint ptr %17 to i64, !dbg !80 | |
| %19 = and i64 %18, 63, !dbg !80 | |
| %20 = icmp eq i64 %19, 0, !dbg !80 | |
| call void @llvm.assume(i1 %20), !dbg !80 | |
| %21 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
| %22 = extractvalue %iree_hal_executable_dispatch_state_v0_t %21, 10, !dbg !80 | |
| %23 = getelementptr ptr, ptr %22, i32 1, !dbg !80 | |
| %24 = load ptr, ptr %23, align 8, !dbg !80 | |
| %25 = ptrtoint ptr %24 to i64, !dbg !80 | |
| %26 = and i64 %25, 63, !dbg !80 | |
| %27 = icmp eq i64 %26, 0, !dbg !80 | |
| call void @llvm.assume(i1 %27), !dbg !80 | |
| %28 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !79 | |
| %29 = extractvalue %iree_hal_executable_dispatch_state_v0_t %28, 10, !dbg !79 | |
| %30 = getelementptr ptr, ptr %29, i32 2, !dbg !79 | |
| %31 = load ptr, ptr %30, align 8, !dbg !79 | |
| %32 = ptrtoint ptr %31 to i64, !dbg !79 | |
| %33 = and i64 %32, 63, !dbg !79 | |
| %34 = icmp eq i64 %33, 0, !dbg !79 | |
| call void @llvm.assume(i1 %34), !dbg !79 | |
| %35 = load %iree_hal_executable_workgroup_state_v0_t, ptr %2, align 8, !dbg !79 | |
| %36 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 0, !dbg !79 | |
| %37 = zext i32 %36 to i64, !dbg !79 | |
| %38 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 1, !dbg !79 | |
| %39 = zext i32 %38 to i64, !dbg !79 | |
| br label %40, !dbg !79 | |
| 40: ; preds = %159, %3 | |
| %41 = phi i64 [ %160, %159 ], [ 0, %3 ] | |
| %42 = icmp slt i64 %41, 2, !dbg !79 | |
| br i1 %42, label %43, label %161, !dbg !79 | |
| 43: ; preds = %144, %40 | |
| %44 = phi i64 [ %158, %144 ], [ 0, %40 ] | |
| %45 = icmp slt i64 %44, 5, !dbg !79 | |
| br i1 %45, label %46, label %159, !dbg !79 | |
| 46: ; preds = %43 | |
| %47 = mul i64 %39, 2, !dbg !79 | |
| %48 = add i64 %41, %47, !dbg !79 | |
| %49 = mul i64 %37, 5, !dbg !79 | |
| %50 = add i64 %44, %49, !dbg !79 | |
| %51 = mul i64 %48, 20, !dbg !79 | |
| %52 = add i64 0, %51, !dbg !79 | |
| %53 = add i64 %52, %50, !dbg !79 | |
| %54 = add i64 %53, 0, !dbg !79 | |
| %55 = getelementptr i32, ptr %31, i64 %54, !dbg !79 | |
| store i32 0, ptr %55, align 4, !dbg !79 | |
| %56 = mul i64 %41, 20, !dbg !79 | |
| %57 = mul i64 %39, 40, !dbg !79 | |
| %58 = add i64 %56, %57, !dbg !79 | |
| %59 = add i64 %58, %44, !dbg !79 | |
| %60 = add i64 %59, %49, !dbg !79 | |
| %61 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %31, 0, !dbg !79 | |
| %62 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %61, ptr %31, 1, !dbg !79 | |
| %63 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %62, i64 %60, 2, !dbg !79 | |
| %64 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %63, i64 1, 3, 0, !dbg !79 | |
| %65 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %64, i64 200, 4, 0, !dbg !79 | |
| %66 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %65, i64 1, 3, 1, !dbg !79 | |
| %67 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %66, i64 20, 4, 1, !dbg !79 | |
| %68 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %67, i64 1, 3, 2, !dbg !79 | |
| %69 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %68, i64 1, 4, 2, !dbg !79 | |
| br label %70, !dbg !79 | |
| 70: ; preds = %91, %46 | |
| %71 = phi i64 [ %143, %91 ], [ 0, %46 ] | |
| %72 = phi { ptr, ptr, i64, [3 x i64], [3 x i64] } [ %13, %91 ], [ %69, %46 ] | |
| %73 = icmp slt i64 %71, 9, !dbg !79 | |
| br i1 %73, label %74, label %144, !dbg !79 | |
| 74: ; preds = %77, %70 | |
| %75 = phi i64 [ %90, %77 ], [ 0, %70 ] | |
| %76 = icmp slt i64 %75, 3, !dbg !79 | |
| br i1 %76, label %77, label %91, !dbg !79 | |
| 77: ; preds = %74 | |
| %78 = add i64 %49, %44, !dbg !79 | |
| %79 = add i64 %78, %71, !dbg !79 | |
| %80 = add i64 %79, %75, !dbg !79 | |
| %81 = mul i64 %48, 28, !dbg !79 | |
| %82 = add i64 0, %81, !dbg !79 | |
| %83 = add i64 %82, %80, !dbg !79 | |
| %84 = add i64 %83, 0, !dbg !79 | |
| %85 = getelementptr i32, ptr %17, i64 %84, !dbg !79 | |
| %86 = load i32, ptr %85, align 4, !dbg !79 | |
| %87 = add i64 0, %75, !dbg !79 | |
| %88 = add i64 %87, 0, !dbg !79 | |
| %89 = getelementptr i32, ptr %14, i64 %88, !dbg !79 | |
| store i32 %86, ptr %89, align 4, !dbg !79 | |
| %90 = add i64 %75, 1, !dbg !79 | |
| br label %74, !dbg !79 | |
| 91: ; preds = %74 | |
| %92 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 1, !dbg !79 | |
| %93 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 2, !dbg !79 | |
| %94 = getelementptr i32, ptr %92, i64 %93, !dbg !79 | |
| %95 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 0, !dbg !79 | |
| %96 = mul i64 %95, 0, !dbg !79 | |
| %97 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 1, !dbg !79 | |
| %98 = mul i64 %97, 0, !dbg !79 | |
| %99 = add i64 %96, %98, !dbg !79 | |
| %100 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 2, !dbg !79 | |
| %101 = mul i64 %100, 0, !dbg !79 | |
| %102 = add i64 %99, %101, !dbg !79 | |
| %103 = getelementptr i32, ptr %94, i64 %102, !dbg !79 | |
| %104 = load i32, ptr %103, align 4, !dbg !79 | |
| %105 = getelementptr i32, ptr %4, i64 0, !dbg !79 | |
| store i32 %104, ptr %105, align 4, !dbg !79 | |
| %106 = load <3 x i32>, ptr %14, align 4, !dbg !79 | |
| %107 = load i32, ptr %105, align 4, !dbg !79 | |
| %108 = insertelement <1 x i32> undef, i32 %107, i32 0, !dbg !79 | |
| %109 = extractelement <1 x i32> %108, i64 0, !dbg !79 | |
| %110 = insertelement <1 x i32> zeroinitializer, i32 %109, i64 0, !dbg !79 | |
| %111 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> zeroinitializer, !dbg !79 | |
| %112 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> <i32 1>, !dbg !79 | |
| %113 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> <i32 2>, !dbg !79 | |
| %114 = add i64 0, %71, !dbg !79 | |
| %115 = add i64 %114, 0, !dbg !79 | |
| %116 = getelementptr i32, ptr %24, i64 %115, !dbg !79 | |
| %117 = load i32, ptr %116, align 4, !dbg !79 | |
| %118 = add i64 %71, 1, !dbg !79 | |
| %119 = add i64 0, %118, !dbg !79 | |
| %120 = add i64 %119, 0, !dbg !79 | |
| %121 = getelementptr i32, ptr %24, i64 %120, !dbg !79 | |
| %122 = load i32, ptr %121, align 4, !dbg !79 | |
| %123 = add i64 %71, 2, !dbg !79 | |
| %124 = add i64 0, %123, !dbg !79 | |
| %125 = add i64 %124, 0, !dbg !79 | |
| %126 = getelementptr i32, ptr %24, i64 %125, !dbg !79 | |
| %127 = load i32, ptr %126, align 4, !dbg !79 | |
| %128 = insertelement <1 x i32> undef, i32 %117, i32 0, !dbg !79 | |
| %129 = shufflevector <1 x i32> %128, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
| %130 = mul <1 x i32> %111, %129, !dbg !79 | |
| %131 = add <1 x i32> %130, %110, !dbg !79 | |
| %132 = insertelement <1 x i32> undef, i32 %122, i32 0, !dbg !79 | |
| %133 = shufflevector <1 x i32> %132, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
| %134 = mul <1 x i32> %112, %133, !dbg !79 | |
| %135 = add <1 x i32> %134, %131, !dbg !79 | |
| %136 = insertelement <1 x i32> undef, i32 %127, i32 0, !dbg !79 | |
| %137 = shufflevector <1 x i32> %136, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
| %138 = mul <1 x i32> %113, %137, !dbg !79 | |
| %139 = add <1 x i32> %138, %135, !dbg !79 | |
| %140 = extractelement <1 x i32> %139, i64 0, !dbg !79 | |
| %141 = insertelement <1 x i32> zeroinitializer, i32 %140, i64 0, !dbg !79 | |
| %142 = extractelement <1 x i32> %141, i64 0, !dbg !79 | |
| store i32 %142, ptr %105, align 4, !dbg !79 | |
| %143 = add i64 %71, 3, !dbg !79 | |
| br label %70, !dbg !79 | |
| 144: ; preds = %70 | |
| %145 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 1, !dbg !79 | |
| %146 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 2, !dbg !79 | |
| %147 = getelementptr i32, ptr %145, i64 %146, !dbg !79 | |
| %148 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 0, !dbg !79 | |
| %149 = mul i64 %148, 0, !dbg !79 | |
| %150 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 1, !dbg !79 | |
| %151 = mul i64 %150, 0, !dbg !79 | |
| %152 = add i64 %149, %151, !dbg !79 | |
| %153 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 2, !dbg !79 | |
| %154 = mul i64 %153, 0, !dbg !79 | |
| %155 = add i64 %152, %154, !dbg !79 | |
| %156 = getelementptr i32, ptr %147, i64 %155, !dbg !79 | |
| %157 = load i32, ptr %156, align 4, !dbg !79 | |
| store i32 %157, ptr %55, align 4, !dbg !79 | |
| %158 = add i64 %44, 1, !dbg !79 | |
| br label %43, !dbg !79 | |
| 159: ; preds = %43 | |
| %160 = add i64 %41, 1, !dbg !79 | |
| br label %40, !dbg !79 | |
| 161: ; preds = %40 | |
| ret i32 0, !dbg !79 | |
| } | |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) | |
| declare void @llvm.assume(i1 noundef) #1 | |
| ; Function Attrs: uwtable | |
| define dso_local dllexport ptr @iree_hal_executable_library_query(i32 %0, ptr %1) #2 { | |
| entry: | |
| %2 = icmp eq i32 %0, 3 | |
| %3 = select i1 %2, ptr @iree_hal_executable_library_query_v0, ptr null | |
| ret ptr %3 | |
| } | |
| attributes #0 = { "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" } | |
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | |
| attributes #2 = { uwtable "nonlazybind" } | |
| !llvm.module.flags = !{!0} | |
| !llvm.dbg.cu = !{!1} | |
| !0 = !{i32 2, !"Debug Info Version", i32 3} | |
| !1 = distinct !DICompileUnit(language: DW_LANG_C17, file: !2, producer: "IREE", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) | |
| !2 = !DIFile(filename: "-", directory: "") | |
| !3 = distinct !DISubprogram(name: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", linkageName: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", scope: !2, file: !2, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) | |
| !4 = !DISubroutineType(cc: DW_CC_normal, types: !5) | |
| !5 = !{!6, !7, !38, !67} | |
| !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) | |
| !7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) | |
| !8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9) | |
| !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_environment_v0_t", baseType: !10) | |
| !10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_environment_v0_t", scope: !11, file: !11, line: 246, size: 768, elements: !12) | |
| !11 = !DIFile(filename: "runtime/src/iree/hal/local/executable_library.h", directory: ".") | |
| !12 = !{!13, !21, !24, !27, !29} | |
| !13 = !DIDerivedType(tag: DW_TAG_member, name: "constants", baseType: !14, size: 64) | |
| !14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) | |
| !15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16) | |
| !16 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !17, size: 2048, elements: !19) | |
| !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", baseType: !18) | |
| !18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) | |
| !19 = !{!20} | |
| !20 = !DISubrange(count: 64) | |
| !21 = !DIDerivedType(tag: DW_TAG_member, name: "import_thunk", baseType: !22, size: 64, offset: 64) | |
| !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) | |
| !23 = !DIBasicType(name: "void", encoding: DW_ATE_address) | |
| !24 = !DIDerivedType(tag: DW_TAG_member, name: "import_funcs", baseType: !25, size: 64, offset: 128) | |
| !25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64) | |
| !26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22) | |
| !27 = !DIDerivedType(tag: DW_TAG_member, name: "import_contexts", baseType: !28, size: 64, offset: 192) | |
| !28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64) | |
| !29 = !DIDerivedType(tag: DW_TAG_member, name: "processor", baseType: !30, offset: 256) | |
| !30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_processor_v0_t", scope: !11, file: !11, line: 227, size: 512, elements: !31) | |
| !31 = !{!32} | |
| !32 = !DIDerivedType(tag: DW_TAG_member, name: "data", baseType: !33) | |
| !33 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !34, size: 512, elements: !36) | |
| !34 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", baseType: !35) | |
| !35 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) | |
| !36 = !{!37} | |
| !37 = !DISubrange(count: 8) | |
| !38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 64) | |
| !39 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !40) | |
| !40 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_dispatch_state_v0_t", baseType: !41) | |
| !41 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_dispatch_state_v0_t", scope: !11, file: !11, line: 275, size: 384, elements: !42) | |
| !42 = !{!43, !44, !45, !48, !49, !50, !51, !52, !55, !56, !57, !62} | |
| !43 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_x", baseType: !17, size: 32) | |
| !44 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_y", baseType: !17, size: 32, offset: 32) | |
| !45 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_z", baseType: !46, size: 16, offset: 64) | |
| !46 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", baseType: !47) | |
| !47 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) | |
| !48 = !DIDerivedType(tag: DW_TAG_member, name: "push_constant_count", baseType: !46, size: 16, offset: 80) | |
| !49 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_x", baseType: !17, size: 32, offset: 96) | |
| !50 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_y", baseType: !17, size: 32, offset: 128) | |
| !51 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_z", baseType: !46, size: 16, offset: 160) | |
| !52 = !DIDerivedType(tag: DW_TAG_member, name: "max_concurrency", baseType: !53, size: 8, offset: 176) | |
| !53 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", baseType: !54) | |
| !54 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) | |
| !55 = !DIDerivedType(tag: DW_TAG_member, name: "binding_count", baseType: !53, size: 8, offset: 184) | |
| !56 = !DIDerivedType(tag: DW_TAG_member, name: "push_constants", baseType: !14, size: 64, offset: 192) | |
| !57 = !DIDerivedType(tag: DW_TAG_member, name: "binding_ptrs", baseType: !58, size: 64, offset: 256) | |
| !58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 64) | |
| !59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60) | |
| !60 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !61, size: 4096, elements: !19) | |
| !61 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !53, size: 64) | |
| !62 = !DIDerivedType(tag: DW_TAG_member, name: "binding_lengths", baseType: !63, size: 64, offset: 320) | |
| !63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !64, size: 64) | |
| !64 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !65) | |
| !65 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !66, size: 4096, elements: !19) | |
| !66 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", baseType: !34) | |
| !67 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !68, size: 64) | |
| !68 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !69) | |
| !69 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_workgroup_state_v0_t", baseType: !70) | |
| !70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_workgroup_state_v0_t", scope: !11, file: !11, line: 321, size: 256, elements: !71) | |
| !71 = !{!72, !73, !74, !75, !76, !77, !78} | |
| !72 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_x", baseType: !17, size: 32) | |
| !73 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_y", baseType: !17, size: 32, offset: 32) | |
| !74 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_z", baseType: !46, size: 16, offset: 64) | |
| !75 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", baseType: !46, size: 16, offset: 80) | |
| !76 = !DIDerivedType(tag: DW_TAG_member, name: "processor_id", baseType: !17, size: 32, offset: 96) | |
| !77 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory", baseType: !22, size: 64, offset: 128) | |
| !78 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory_size", baseType: !17, size: 32, offset: 192) | |
| !79 = !DILocation(line: 7, column: 10, scope: !3, inlinedAt: !80) | |
| !80 = !DILocation(line: 2, column: 3, scope: !3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment