Skip to content

Instantly share code, notes, and snippets.

@banach-space
Last active July 16, 2025 08:09
Show Gist options
  • Select an option

  • Save banach-space/d507070e04d7febf3983b779d18ba542 to your computer and use it in GitHub Desktop.

Select an option

Save banach-space/d507070e04d7febf3983b779d18ba542 to your computer and use it in GitHub Desktop.
Scalable vectorization for linalg.pack + linalg.unpack
// Pack + unpack Ops just before vectorization
// NOTE: There are no linalg.pack Ops
// Use -mlir-print-ir-before=iree-codegen-generic-vectorization with iree-compile
%unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [8, 8], [0, 0], [0, 0]]>} : tensor<1x1x8x8xf32> -> tensor<8x8xf32>
// Examples to vectorize
func.func @unpack(%in: tensor<1x1x8x?xf32>, %out: tensor<8x?xf32>) -> tensor<8x?xf32> {
%vs = vector.vscale
%c8 = arith.constant 8 : index
%tile_size = arith.muli %vs, %c8 : index
%unpack = linalg.unpack %in
inner_dims_pos = [0, 1]
inner_tiles = [8, %tile_size]
into %out : tensor<1x1x8x?xf32> -> tensor<8x?xf32>
return %unpack : tensor<8x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
// TODO: Vector sizes specificaion is insufficient!
transform.structured.vectorize %0 vector_sizes [1, 1] : !transform.any_op
transform.yield
}
}
// -----
func.func @pack(%in: tensor<1x?xf32>, %out: tensor<1x1x?x1xf32>) -> tensor<1x1x?x1xf32> {
%pad = arith.constant 1.23: f32
%vs = vector.vscale
%c8 = arith.constant 8 : index
%tile_size = arith.muli %vs, %c8 : index
%pack = linalg.pack %in
padding_value(%pad : f32)
outer_dims_perm = [1, 0]
inner_dims_pos = [1, 0]
inner_tiles = [%tile_size, 1]
into %out : tensor<1x?xf32> -> tensor<1x1x?x1xf32>
return %pack : tensor<1x1x?x1xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
// TODO: Vector sizes specificaion is insufficient!
transform.structured.vectorize %0 vector_sizes [1, 1] : !transform.any_op
transform.yield
}
}
// Pack + unpack Ops just before vectorization
// NOTE - there is not linalg.pack for the output tensor, which is initialised with 0s
// Use -mlir-print-ir-before=iree-codegen-generic-vectorization with iree-compile
%pack = linalg.pack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [1, 1]]>} : tensor<8x1xf32> -> tensor<1x1x8x1xf32>
%pack = linalg.pack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [1, 1]]>} : tensor<4x1xf32> -> tensor<1x1x4x1xf32>
%unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [8, 4], [0, 0], [0, 0]]>} : tensor<1x1x8x4xf32> -> tensor<8x4xf32>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment