banach-space/aarch64_fixed_width.mlir

## aarch64_fixed_width.mlir
// Pack + unpack Ops just before vectorization
// NOTE: There are no linalg.pack Ops

// Use -mlir-print-ir-before=iree-codegen-generic-vectorization with iree-compile

%unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [8, 8], [0, 0], [0, 0]]>} : tensor<1x1x8x8xf32> -> tensor<8x8xf32>

## pack_unpack.mlir
// Examples to vectorize

func.func @unpack(%in: tensor<1x1x8x?xf32>, %out: tensor<8x?xf32>) -> tensor<8x?xf32> {
  %vs = vector.vscale
  %c8 = arith.constant 8 : index
  %tile_size = arith.muli %vs, %c8 : index

  %unpack = linalg.unpack  %in
    inner_dims_pos = [0, 1]
    inner_tiles = [8, %tile_size]
    into %out : tensor<1x1x8x?xf32> -> tensor<8x?xf32>
  return %unpack : tensor<8x?xf32>
}

module attributes {transform.with_named_sequence} {
  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    // TODO: Vector sizes specificaion is insufficient!
    transform.structured.vectorize %0 vector_sizes [1, 1] : !transform.any_op
    transform.yield
  }
}

// -----

func.func @pack(%in: tensor<1x?xf32>, %out: tensor<1x1x?x1xf32>) -> tensor<1x1x?x1xf32> {
  %pad = arith.constant 1.23:  f32

  %vs = vector.vscale
  %c8 = arith.constant 8 : index
  %tile_size = arith.muli %vs, %c8 : index

  %pack = linalg.pack %in
     padding_value(%pad : f32)
     outer_dims_perm = [1, 0]
     inner_dims_pos = [1, 0]
     inner_tiles = [%tile_size, 1]
     into %out  : tensor<1x?xf32> -> tensor<1x1x?x1xf32>
  return %pack : tensor<1x1x?x1xf32>
}

module attributes {transform.with_named_sequence} {
  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    // TODO: Vector sizes specificaion is insufficient!
    transform.structured.vectorize %0 vector_sizes [1, 1] : !transform.any_op
    transform.yield
  }
}

## x86_fixed_width.mlir
// Pack + unpack Ops just before vectorization
// NOTE - there is not linalg.pack for the output tensor, which is initialised with 0s

// Use -mlir-print-ir-before=iree-codegen-generic-vectorization with iree-compile

%pack = linalg.pack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [1, 1]]>} : tensor<8x1xf32> -> tensor<1x1x8x1xf32>
%pack = linalg.pack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [1, 1]]>} : tensor<4x1xf32> -> tensor<1x1x4x1xf32>
%unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [8, 4], [0, 0], [0, 0]]>} : tensor<1x1x8x4xf32> -> tensor<8x4xf32>
	// Pack + unpack Ops just before vectorization
	// NOTE: There are no linalg.pack Ops

	// Use -mlir-print-ir-before=iree-codegen-generic-vectorization with iree-compile

	%unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [8, 8], [0, 0], [0, 0]]>} : tensor<1x1x8x8xf32> -> tensor<8x8xf32>
	// Examples to vectorize

	func.func @unpack(%in: tensor<1x1x8x?xf32>, %out: tensor<8x?xf32>) -> tensor<8x?xf32> {
	%vs = vector.vscale
	%c8 = arith.constant 8 : index
	%tile_size = arith.muli %vs, %c8 : index

	%unpack = linalg.unpack %in
	inner_dims_pos = [0, 1]
	inner_tiles = [8, %tile_size]
	into %out : tensor<1x1x8x?xf32> -> tensor<8x?xf32>
	return %unpack : tensor<8x?xf32>
	}

	module attributes {transform.with_named_sequence} {
	transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
	%0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
	// TODO: Vector sizes specificaion is insufficient!
	transform.structured.vectorize %0 vector_sizes [1, 1] : !transform.any_op
	transform.yield
	}
	}

	// -----

	func.func @pack(%in: tensor<1x?xf32>, %out: tensor<1x1x?x1xf32>) -> tensor<1x1x?x1xf32> {
	%pad = arith.constant 1.23: f32

	%vs = vector.vscale
	%c8 = arith.constant 8 : index
	%tile_size = arith.muli %vs, %c8 : index

	%pack = linalg.pack %in
	padding_value(%pad : f32)
	outer_dims_perm = [1, 0]
	inner_dims_pos = [1, 0]
	inner_tiles = [%tile_size, 1]
	into %out : tensor<1x?xf32> -> tensor<1x1x?x1xf32>
	return %pack : tensor<1x1x?x1xf32>
	}

	module attributes {transform.with_named_sequence} {
	transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
	%0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
	// TODO: Vector sizes specificaion is insufficient!
	transform.structured.vectorize %0 vector_sizes [1, 1] : !transform.any_op
	transform.yield
	}
	}
	// Pack + unpack Ops just before vectorization
	// NOTE - there is not linalg.pack for the output tensor, which is initialised with 0s

	// Use -mlir-print-ir-before=iree-codegen-generic-vectorization with iree-compile

	%pack = linalg.pack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [1, 1]]>} : tensor<8x1xf32> -> tensor<1x1x8x1xf32>
	%pack = linalg.pack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [1, 1]]>} : tensor<4x1xf32> -> tensor<1x1x4x1xf32>
	%unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [8, 4], [0, 0], [0, 0]]>} : tensor<1x1x8x4xf32> -> tensor<8x4xf32>