Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save bbrezillon/6b8b4f54e2b29f9eb76c1775859f136b to your computer and use it in GitHub Desktop.

Select an option

Save bbrezillon/6b8b4f54e2b29f9eb76c1775859f136b to your computer and use it in GitHub Desktop.
__kernel void test_fn(__local uint2 *sSharedStorage, __global uint2 *srcValues, __global uint *offsets, __global uint2 *destBuffer, uint alignmentOffset )
{
int tid = get_global_id( 0 );
sSharedStorage[ offsets[tid] ] = (uint2)(uint)0;
sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];
barrier( CLK_LOCAL_MEM_FENCE );
vstore2( srcValues[ tid ], offsets[ tid ], ( (__local uint *)sSharedStorage ) + alignmentOffset );
barrier( CLK_LOCAL_MEM_FENCE );
int i;
__local uint *sp = (__local uint*) (sSharedStorage + offsets[tid]) + alignmentOffset;
__global uint *dp = (__global uint*) (destBuffer + offsets[tid]) + alignmentOffset;
for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ )
dp[i] = sp[i];
}
; SPIR-V
; Version: 1.0
; Generator: Khronos; 17
; Bound: 60
; Schema: 0
OpCapability Addresses
OpCapability Kernel
OpCapability Int64
%1 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %2 "test_fn" %__spirv_BuiltInGlobalInvocationId
%4 = OpString "kernel_arg_type.test_fn.uint2*,uint2*,uint*,uint2*,uint,"
OpSource OpenCL_C 102000
OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
OpName %sSharedStorage "sSharedStorage"
OpName %srcValues "srcValues"
OpName %offsets "offsets"
OpName %destBuffer "destBuffer"
OpName %alignmentOffset "alignmentOffset"
OpName %entry "entry"
OpName %call "call"
OpName %idxprom "idxprom"
OpName %arrayidx "arrayidx"
OpName %idxprom1 "idxprom1"
OpName %arrayidx2 "arrayidx2"
OpName %idxprom5 "idxprom5"
OpName %arrayidx6 "arrayidx6"
OpName %add "add"
OpName %idxprom9 "idxprom9"
OpName %arrayidx10 "arrayidx10"
OpName %arrayidx12 "arrayidx12"
OpName %conv15 "conv15"
OpName %idx_ext "idx.ext"
OpName %add_ptr "add.ptr"
OpName %idx_ext18 "idx.ext18"
OpName %add_ptr21 "add.ptr21"
OpName %add_ptr27 "add.ptr27"
OpName %arrayidx31_1 "arrayidx31.1"
OpName %arrayidx33_1 "arrayidx33.1"
OpDecorate %30 FuncParamAttr NoCapture
%30 = OpDecorationGroup
OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
OpGroupDecorate %30 %srcValues %offsets %destBuffer
%ulong = OpTypeInt 64 0
%uint = OpTypeInt 32 0
%ulong_32 = OpConstant %ulong 32
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_272 = OpConstant %uint 272
%ulong_0 = OpConstant %ulong 0
%ulong_1 = OpConstant %ulong 1
%v3ulong = OpTypeVector %ulong 3
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
%void = OpTypeVoid
%v2uint = OpTypeVector %uint 2
%_ptr_Workgroup_v2uint = OpTypePointer Workgroup %v2uint
%_ptr_CrossWorkgroup_v2uint = OpTypePointer CrossWorkgroup %v2uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%46 = OpTypeFunction %void %_ptr_Workgroup_v2uint %_ptr_CrossWorkgroup_v2uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_v2uint %uint
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
%48 = OpConstantNull %v2uint
%2 = OpFunction %void None %46
%sSharedStorage = OpFunctionParameter %_ptr_Workgroup_v2uint
%srcValues = OpFunctionParameter %_ptr_CrossWorkgroup_v2uint
%offsets = OpFunctionParameter %_ptr_CrossWorkgroup_uint
%destBuffer = OpFunctionParameter %_ptr_CrossWorkgroup_v2uint
%alignmentOffset = OpFunctionParameter %uint
%entry = OpLabel
%49 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId
%call = OpCompositeExtract %ulong %49 0
%50 = OpShiftLeftLogical %ulong %call %ulong_32
%idxprom = OpShiftRightArithmetic %ulong %50 %ulong_32
%arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %offsets %idxprom
%51 = OpLoad %uint %arrayidx Aligned 4
%idxprom1 = OpUConvert %ulong %51
%arrayidx2 = OpInBoundsPtrAccessChain %_ptr_Workgroup_v2uint %sSharedStorage %idxprom1
OpStore %arrayidx2 %48 Aligned 8
%52 = OpLoad %uint %arrayidx Aligned 4
%idxprom5 = OpUConvert %ulong %52
%arrayidx6 = OpInBoundsPtrAccessChain %_ptr_Workgroup_v2uint %sSharedStorage %idxprom5
%53 = OpLoad %v2uint %arrayidx6 Aligned 8
%add = OpIAdd %uint %52 %uint_1
%idxprom9 = OpUConvert %ulong %add
%arrayidx10 = OpInBoundsPtrAccessChain %_ptr_Workgroup_v2uint %sSharedStorage %idxprom9
OpStore %arrayidx10 %53 Aligned 8
OpControlBarrier %uint_2 %uint_2 %uint_272
%arrayidx12 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v2uint %srcValues %idxprom
%54 = OpLoad %v2uint %arrayidx12 Aligned 8
%55 = OpLoad %uint %arrayidx Aligned 4
%conv15 = OpUConvert %ulong %55
%idx_ext = OpUConvert %ulong %alignmentOffset
%add_ptr = OpInBoundsPtrAccessChain %_ptr_Workgroup_uint %sSharedStorage %ulong_0 %idx_ext
%56 = OpExtInst %void %1 vstoren %54 %conv15 %add_ptr
OpControlBarrier %uint_2 %uint_2 %uint_272
%57 = OpLoad %uint %arrayidx Aligned 4
%idx_ext18 = OpUConvert %ulong %57
%add_ptr21 = OpInBoundsPtrAccessChain %_ptr_Workgroup_uint %sSharedStorage %idx_ext18 %idx_ext
%add_ptr27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %destBuffer %idx_ext18 %idx_ext
%58 = OpLoad %uint %add_ptr21 Aligned 4
OpStore %add_ptr27 %58 Aligned 4
%arrayidx31_1 = OpInBoundsPtrAccessChain %_ptr_Workgroup_uint %add_ptr21 %ulong_1
%59 = OpLoad %uint %arrayidx31_1 Aligned 4
%arrayidx33_1 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %add_ptr27 %ulong_1
OpStore %arrayidx33_1 %59 Aligned 4
OpReturn
OpFunctionEnd
impl test_fn {
block block_0:
/* preds: */
vec1 32 ssa_30 = load_const (0x00000001 /* 0.000000 */)
vec2 32 ssa_24 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */)
vec1 64 ssa_15 = load_const (0x 20 /* 0.000000 */)
vec1 64 ssa_12 = load_const (0x 20 /* 0.000000 */)
vec1 64 ssa_0 = intrinsic load_param () (0) /* param_idx=0 */
vec1 64 ssa_1 = deref_cast (uvec2 *)ssa_0 (shared uvec2) /* ptr_stride=8 */
vec1 64 ssa_2 = intrinsic load_param () (1) /* param_idx=1 */
vec1 64 ssa_3 = deref_cast (uvec2 *)ssa_2 (global uvec2) /* ptr_stride=8 */
vec1 64 ssa_4 = intrinsic load_param () (2) /* param_idx=2 */
vec1 64 ssa_5 = deref_cast (uint *)ssa_4 (global uint) /* ptr_stride=4 */
vec1 64 ssa_6 = intrinsic load_param () (3) /* param_idx=3 */
vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_6 (global uvec2) /* ptr_stride=8 */
vec1 32 ssa_8 = intrinsic load_param () (4) /* param_idx=4 */
vec1 32 ssa_9 = deref_var &__spirv_BuiltInGlobalInvocationId (system u64vec3)
vec3 64 ssa_10 = intrinsic load_deref (ssa_9) (0, 0, 0) /* access=0 */ /* align_mul=0 */ /* align_offset=0 */
vec1 64 ssa_11 = mov ssa_10.x
vec1 32 ssa_13 = u2u32 ssa_12
vec1 64 ssa_14 = ishl ssa_11, ssa_13
vec1 32 ssa_16 = u2u32 ssa_15
vec1 64 ssa_17 = ishr ssa_14, ssa_16
vec1 64 ssa_18 = deref_cast (uint *)ssa_5 (global uint) /* ptr_stride=4 */
vec1 64 ssa_19 = deref_ptr_as_array &(*ssa_18)[ssa_17] (global uint) /* &(*(uint *)ssa_5)[ssa_17] */
vec1 32 ssa_20 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_21 = u2u64 ssa_20
vec1 64 ssa_22 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */
vec1 64 ssa_23 = deref_ptr_as_array &(*ssa_22)[ssa_21] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_21] */
intrinsic store_deref (ssa_23, ssa_24) (3, 0, 8, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=8 */ /* align_offset=0 */
vec1 32 ssa_25 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_26 = u2u64 ssa_25
vec1 64 ssa_27 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */
vec1 64 ssa_28 = deref_ptr_as_array &(*ssa_27)[ssa_26] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_26] */
vec2 32 ssa_29 = intrinsic load_deref (ssa_28) (0, 8, 0) /* access=0 */ /* align_mul=8 */ /* align_offset=0 */
vec1 32 ssa_31 = iadd ssa_25, ssa_30
vec1 64 ssa_32 = u2u64 ssa_31
vec1 64 ssa_33 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */
vec1 64 ssa_34 = deref_ptr_as_array &(*ssa_33)[ssa_32] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_32] */
intrinsic store_deref (ssa_34, ssa_29) (3, 0, 8, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=8 */ /* align_offset=0 */
intrinsic scoped_control_memory_barrier () (2, 3, 256, 2) /* mem_semantics=ACQ|REL */ /* mem_modes=shared */ /* mem_scope=WORKGROUP */ /* exec_scope=W
ORKGROUP */
vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_3 (global uvec2) /* ptr_stride=8 */
vec1 64 ssa_36 = deref_ptr_as_array &(*ssa_35)[ssa_17] (global uvec2) /* &(*(uvec2 *)ssa_3)[ssa_17] */
vec2 32 ssa_37 = intrinsic load_deref (ssa_36) (0, 8, 0) /* access=0 */ /* align_mul=8 */ /* align_offset=0 */
vec1 32 ssa_38 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_39 = u2u64 ssa_38
vec1 64 ssa_40 = u2u64 ssa_8
vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */
vec1 64 ssa_42 = load_const (0x 0 /* 0.000000 */)
vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[0] (shared uvec2) /* &(*(uvec2 *)ssa_1)[0] */
vec1 64 ssa_44 = deref_array &(*ssa_43)[ssa_40] (shared uint) /* &(*(uvec2 *)ssa_1)[0][ssa_40] */
vec1 32 ssa_45 = load_const (0x00000001 /* 0.000000 */)
vec1 64 ssa_46 = ishl ssa_39, ssa_45
vec1 64 ssa_47 = deref_ptr_as_array &(*ssa_44)[ssa_46] (shared uint) /* &(*(uvec2 *)ssa_1)[0][ssa_40][ssa_46] */
vec1 32 ssa_48 = mov ssa_37.x
intrinsic store_deref (ssa_47, ssa_48) (1, 0, 0, 0) /* wrmask=x */ /* access=0 */ /* align_mul=0 */ /* align_offset=0 */
vec1 64 ssa_49 = load_const (0x 1 /* 0.000000 */)
vec1 64 ssa_50 = iadd ssa_46, ssa_49
vec1 64 ssa_51 = deref_ptr_as_array &(*ssa_44)[ssa_50] (shared uint) /* &(*(uvec2 *)ssa_1)[0][ssa_40][ssa_50] */
vec1 32 ssa_52 = mov ssa_37.y
intrinsic store_deref (ssa_51, ssa_52) (1, 0, 0, 0) /* wrmask=x */ /* access=0 */ /* align_mul=0 */ /* align_offset=0 */
intrinsic scoped_control_memory_barrier () (2, 3, 256, 2) /* mem_semantics=ACQ|REL */ /* mem_modes=shared */ /* mem_scope=WORKGROUP */ /* exec_scope=W
ORKGROUP */
vec1 32 ssa_53 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_54 = u2u64 ssa_53
vec1 64 ssa_55 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */
vec1 64 ssa_56 = deref_ptr_as_array &(*ssa_55)[ssa_54] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_54] */
vec1 64 ssa_57 = deref_array &(*ssa_56)[ssa_40] (shared uint) /* &(*(uvec2 *)ssa_1)[ssa_54][ssa_40] */
vec1 64 ssa_58 = deref_cast (uvec2 *)ssa_7 (global uvec2) /* ptr_stride=8 */
vec1 64 ssa_59 = deref_ptr_as_array &(*ssa_58)[ssa_54] (global uvec2) /* &(*(uvec2 *)ssa_7)[ssa_54] */
vec1 64 ssa_60 = deref_array &(*ssa_59)[ssa_40] (global uint) /* &(*(uvec2 *)ssa_7)[ssa_54][ssa_40] */
vec2 32 ssa_61 = intrinsic load_deref (ssa_56) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_62 = load_const (0x 1 /* 0.000000 */)
vec1 1 ssa_63 = ilt ssa_40, ssa_62
vec1 32 ssa_64 = mov ssa_61.y
vec1 32 ssa_65 = mov ssa_61.x
vec1 32 ssa_66 = bcsel ssa_63, ssa_65, ssa_64
vec2 32 ssa_67 = intrinsic load_deref (ssa_59) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec2 64 ssa_68 = load_const (0x 0 /* 0.000000 */, 0x 1 /* 0.000000 */)
vec2 1 ssa_69 = ieq ssa_40.xx, ssa_68
vec2 32 ssa_70 = bcsel ssa_69, ssa_66.xx, ssa_67
intrinsic store_deref (ssa_59, ssa_70) (3, 0, 4, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_71 = deref_cast (uint *)ssa_57 (shared uint) /* ptr_stride=4 */
vec1 64 ssa_72 = load_const (0x 1 /* 0.000000 */)
vec1 64 ssa_73 = deref_ptr_as_array &(*ssa_71)[1] (shared uint) /* &(*(uint *)ssa_57)[1] */
vec1 32 ssa_74 = intrinsic load_deref (ssa_73) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
vec1 64 ssa_75 = deref_cast (uint *)ssa_60 (global uint) /* ptr_stride=4 */
vec1 64 ssa_76 = load_const (0x 1 /* 0.000000 */)
vec1 64 ssa_77 = deref_ptr_as_array &(*ssa_75)[1] (global uint) /* &(*(uint *)ssa_60)[1] */
intrinsic store_deref (ssa_77, ssa_74) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
return
/* succs: block_1 */
block block_1:
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment