Created
July 23, 2020 15:32
-
-
Save bbrezillon/6b8b4f54e2b29f9eb76c1775859f136b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| __kernel void test_fn(__local uint2 *sSharedStorage, __global uint2 *srcValues, __global uint *offsets, __global uint2 *destBuffer, uint alignmentOffset ) | |
| { | |
| int tid = get_global_id( 0 ); | |
| sSharedStorage[ offsets[tid] ] = (uint2)(uint)0; | |
| sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ]; | |
| barrier( CLK_LOCAL_MEM_FENCE ); | |
| vstore2( srcValues[ tid ], offsets[ tid ], ( (__local uint *)sSharedStorage ) + alignmentOffset ); | |
| barrier( CLK_LOCAL_MEM_FENCE ); | |
| int i; | |
| __local uint *sp = (__local uint*) (sSharedStorage + offsets[tid]) + alignmentOffset; | |
| __global uint *dp = (__global uint*) (destBuffer + offsets[tid]) + alignmentOffset; | |
| for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) | |
| dp[i] = sp[i]; | |
| } | |
| ; SPIR-V | |
| ; Version: 1.0 | |
| ; Generator: Khronos; 17 | |
| ; Bound: 60 | |
| ; Schema: 0 | |
| OpCapability Addresses | |
| OpCapability Kernel | |
| OpCapability Int64 | |
| %1 = OpExtInstImport "OpenCL.std" | |
| OpMemoryModel Physical64 OpenCL | |
| OpEntryPoint Kernel %2 "test_fn" %__spirv_BuiltInGlobalInvocationId | |
| %4 = OpString "kernel_arg_type.test_fn.uint2*,uint2*,uint*,uint2*,uint," | |
| OpSource OpenCL_C 102000 | |
| OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId" | |
| OpName %sSharedStorage "sSharedStorage" | |
| OpName %srcValues "srcValues" | |
| OpName %offsets "offsets" | |
| OpName %destBuffer "destBuffer" | |
| OpName %alignmentOffset "alignmentOffset" | |
| OpName %entry "entry" | |
| OpName %call "call" | |
| OpName %idxprom "idxprom" | |
| OpName %arrayidx "arrayidx" | |
| OpName %idxprom1 "idxprom1" | |
| OpName %arrayidx2 "arrayidx2" | |
| OpName %idxprom5 "idxprom5" | |
| OpName %arrayidx6 "arrayidx6" | |
| OpName %add "add" | |
| OpName %idxprom9 "idxprom9" | |
| OpName %arrayidx10 "arrayidx10" | |
| OpName %arrayidx12 "arrayidx12" | |
| OpName %conv15 "conv15" | |
| OpName %idx_ext "idx.ext" | |
| OpName %add_ptr "add.ptr" | |
| OpName %idx_ext18 "idx.ext18" | |
| OpName %add_ptr21 "add.ptr21" | |
| OpName %add_ptr27 "add.ptr27" | |
| OpName %arrayidx31_1 "arrayidx31.1" | |
| OpName %arrayidx33_1 "arrayidx33.1" | |
| OpDecorate %30 FuncParamAttr NoCapture | |
| %30 = OpDecorationGroup | |
| OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId | |
| OpDecorate %__spirv_BuiltInGlobalInvocationId Constant | |
| OpGroupDecorate %30 %srcValues %offsets %destBuffer | |
| %ulong = OpTypeInt 64 0 | |
| %uint = OpTypeInt 32 0 | |
| %ulong_32 = OpConstant %ulong 32 | |
| %uint_1 = OpConstant %uint 1 | |
| %uint_2 = OpConstant %uint 2 | |
| %uint_272 = OpConstant %uint 272 | |
| %ulong_0 = OpConstant %ulong 0 | |
| %ulong_1 = OpConstant %ulong 1 | |
| %v3ulong = OpTypeVector %ulong 3 | |
| %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong | |
| %void = OpTypeVoid | |
| %v2uint = OpTypeVector %uint 2 | |
| %_ptr_Workgroup_v2uint = OpTypePointer Workgroup %v2uint | |
| %_ptr_CrossWorkgroup_v2uint = OpTypePointer CrossWorkgroup %v2uint | |
| %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint | |
| %46 = OpTypeFunction %void %_ptr_Workgroup_v2uint %_ptr_CrossWorkgroup_v2uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_v2uint %uint | |
| %_ptr_Workgroup_uint = OpTypePointer Workgroup %uint | |
| %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input | |
| %48 = OpConstantNull %v2uint | |
| %2 = OpFunction %void None %46 | |
| %sSharedStorage = OpFunctionParameter %_ptr_Workgroup_v2uint | |
| %srcValues = OpFunctionParameter %_ptr_CrossWorkgroup_v2uint | |
| %offsets = OpFunctionParameter %_ptr_CrossWorkgroup_uint | |
| %destBuffer = OpFunctionParameter %_ptr_CrossWorkgroup_v2uint | |
| %alignmentOffset = OpFunctionParameter %uint | |
| %entry = OpLabel | |
| %49 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId | |
| %call = OpCompositeExtract %ulong %49 0 | |
| %50 = OpShiftLeftLogical %ulong %call %ulong_32 | |
| %idxprom = OpShiftRightArithmetic %ulong %50 %ulong_32 | |
| %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %offsets %idxprom | |
| %51 = OpLoad %uint %arrayidx Aligned 4 | |
| %idxprom1 = OpUConvert %ulong %51 | |
| %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_Workgroup_v2uint %sSharedStorage %idxprom1 | |
| OpStore %arrayidx2 %48 Aligned 8 | |
| %52 = OpLoad %uint %arrayidx Aligned 4 | |
| %idxprom5 = OpUConvert %ulong %52 | |
| %arrayidx6 = OpInBoundsPtrAccessChain %_ptr_Workgroup_v2uint %sSharedStorage %idxprom5 | |
| %53 = OpLoad %v2uint %arrayidx6 Aligned 8 | |
| %add = OpIAdd %uint %52 %uint_1 | |
| %idxprom9 = OpUConvert %ulong %add | |
| %arrayidx10 = OpInBoundsPtrAccessChain %_ptr_Workgroup_v2uint %sSharedStorage %idxprom9 | |
| OpStore %arrayidx10 %53 Aligned 8 | |
| OpControlBarrier %uint_2 %uint_2 %uint_272 | |
| %arrayidx12 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v2uint %srcValues %idxprom | |
| %54 = OpLoad %v2uint %arrayidx12 Aligned 8 | |
| %55 = OpLoad %uint %arrayidx Aligned 4 | |
| %conv15 = OpUConvert %ulong %55 | |
| %idx_ext = OpUConvert %ulong %alignmentOffset | |
| %add_ptr = OpInBoundsPtrAccessChain %_ptr_Workgroup_uint %sSharedStorage %ulong_0 %idx_ext | |
| %56 = OpExtInst %void %1 vstoren %54 %conv15 %add_ptr | |
| OpControlBarrier %uint_2 %uint_2 %uint_272 | |
| %57 = OpLoad %uint %arrayidx Aligned 4 | |
| %idx_ext18 = OpUConvert %ulong %57 | |
| %add_ptr21 = OpInBoundsPtrAccessChain %_ptr_Workgroup_uint %sSharedStorage %idx_ext18 %idx_ext | |
| %add_ptr27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %destBuffer %idx_ext18 %idx_ext | |
| %58 = OpLoad %uint %add_ptr21 Aligned 4 | |
| OpStore %add_ptr27 %58 Aligned 4 | |
| %arrayidx31_1 = OpInBoundsPtrAccessChain %_ptr_Workgroup_uint %add_ptr21 %ulong_1 | |
| %59 = OpLoad %uint %arrayidx31_1 Aligned 4 | |
| %arrayidx33_1 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %add_ptr27 %ulong_1 | |
| OpStore %arrayidx33_1 %59 Aligned 4 | |
| OpReturn | |
| OpFunctionEnd | |
| impl test_fn { | |
| block block_0: | |
| /* preds: */ | |
| vec1 32 ssa_30 = load_const (0x00000001 /* 0.000000 */) | |
| vec2 32 ssa_24 = load_const (0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */) | |
| vec1 64 ssa_15 = load_const (0x 20 /* 0.000000 */) | |
| vec1 64 ssa_12 = load_const (0x 20 /* 0.000000 */) | |
| vec1 64 ssa_0 = intrinsic load_param () (0) /* param_idx=0 */ | |
| vec1 64 ssa_1 = deref_cast (uvec2 *)ssa_0 (shared uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_2 = intrinsic load_param () (1) /* param_idx=1 */ | |
| vec1 64 ssa_3 = deref_cast (uvec2 *)ssa_2 (global uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_4 = intrinsic load_param () (2) /* param_idx=2 */ | |
| vec1 64 ssa_5 = deref_cast (uint *)ssa_4 (global uint) /* ptr_stride=4 */ | |
| vec1 64 ssa_6 = intrinsic load_param () (3) /* param_idx=3 */ | |
| vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_6 (global uvec2) /* ptr_stride=8 */ | |
| vec1 32 ssa_8 = intrinsic load_param () (4) /* param_idx=4 */ | |
| vec1 32 ssa_9 = deref_var &__spirv_BuiltInGlobalInvocationId (system u64vec3) | |
| vec3 64 ssa_10 = intrinsic load_deref (ssa_9) (0, 0, 0) /* access=0 */ /* align_mul=0 */ /* align_offset=0 */ | |
| vec1 64 ssa_11 = mov ssa_10.x | |
| vec1 32 ssa_13 = u2u32 ssa_12 | |
| vec1 64 ssa_14 = ishl ssa_11, ssa_13 | |
| vec1 32 ssa_16 = u2u32 ssa_15 | |
| vec1 64 ssa_17 = ishr ssa_14, ssa_16 | |
| vec1 64 ssa_18 = deref_cast (uint *)ssa_5 (global uint) /* ptr_stride=4 */ | |
| vec1 64 ssa_19 = deref_ptr_as_array &(*ssa_18)[ssa_17] (global uint) /* &(*(uint *)ssa_5)[ssa_17] */ | |
| vec1 32 ssa_20 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_21 = u2u64 ssa_20 | |
| vec1 64 ssa_22 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_23 = deref_ptr_as_array &(*ssa_22)[ssa_21] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_21] */ | |
| intrinsic store_deref (ssa_23, ssa_24) (3, 0, 8, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=8 */ /* align_offset=0 */ | |
| vec1 32 ssa_25 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_26 = u2u64 ssa_25 | |
| vec1 64 ssa_27 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_28 = deref_ptr_as_array &(*ssa_27)[ssa_26] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_26] */ | |
| vec2 32 ssa_29 = intrinsic load_deref (ssa_28) (0, 8, 0) /* access=0 */ /* align_mul=8 */ /* align_offset=0 */ | |
| vec1 32 ssa_31 = iadd ssa_25, ssa_30 | |
| vec1 64 ssa_32 = u2u64 ssa_31 | |
| vec1 64 ssa_33 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_34 = deref_ptr_as_array &(*ssa_33)[ssa_32] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_32] */ | |
| intrinsic store_deref (ssa_34, ssa_29) (3, 0, 8, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=8 */ /* align_offset=0 */ | |
| intrinsic scoped_control_memory_barrier () (2, 3, 256, 2) /* mem_semantics=ACQ|REL */ /* mem_modes=shared */ /* mem_scope=WORKGROUP */ /* exec_scope=W | |
| ORKGROUP */ | |
| vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_3 (global uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_36 = deref_ptr_as_array &(*ssa_35)[ssa_17] (global uvec2) /* &(*(uvec2 *)ssa_3)[ssa_17] */ | |
| vec2 32 ssa_37 = intrinsic load_deref (ssa_36) (0, 8, 0) /* access=0 */ /* align_mul=8 */ /* align_offset=0 */ | |
| vec1 32 ssa_38 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_39 = u2u64 ssa_38 | |
| vec1 64 ssa_40 = u2u64 ssa_8 | |
| vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_42 = load_const (0x 0 /* 0.000000 */) | |
| vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[0] (shared uvec2) /* &(*(uvec2 *)ssa_1)[0] */ | |
| vec1 64 ssa_44 = deref_array &(*ssa_43)[ssa_40] (shared uint) /* &(*(uvec2 *)ssa_1)[0][ssa_40] */ | |
| vec1 32 ssa_45 = load_const (0x00000001 /* 0.000000 */) | |
| vec1 64 ssa_46 = ishl ssa_39, ssa_45 | |
| vec1 64 ssa_47 = deref_ptr_as_array &(*ssa_44)[ssa_46] (shared uint) /* &(*(uvec2 *)ssa_1)[0][ssa_40][ssa_46] */ | |
| vec1 32 ssa_48 = mov ssa_37.x | |
| intrinsic store_deref (ssa_47, ssa_48) (1, 0, 0, 0) /* wrmask=x */ /* access=0 */ /* align_mul=0 */ /* align_offset=0 */ | |
| vec1 64 ssa_49 = load_const (0x 1 /* 0.000000 */) | |
| vec1 64 ssa_50 = iadd ssa_46, ssa_49 | |
| vec1 64 ssa_51 = deref_ptr_as_array &(*ssa_44)[ssa_50] (shared uint) /* &(*(uvec2 *)ssa_1)[0][ssa_40][ssa_50] */ | |
| vec1 32 ssa_52 = mov ssa_37.y | |
| intrinsic store_deref (ssa_51, ssa_52) (1, 0, 0, 0) /* wrmask=x */ /* access=0 */ /* align_mul=0 */ /* align_offset=0 */ | |
| intrinsic scoped_control_memory_barrier () (2, 3, 256, 2) /* mem_semantics=ACQ|REL */ /* mem_modes=shared */ /* mem_scope=WORKGROUP */ /* exec_scope=W | |
| ORKGROUP */ | |
| vec1 32 ssa_53 = intrinsic load_deref (ssa_19) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_54 = u2u64 ssa_53 | |
| vec1 64 ssa_55 = deref_cast (uvec2 *)ssa_1 (shared uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_56 = deref_ptr_as_array &(*ssa_55)[ssa_54] (shared uvec2) /* &(*(uvec2 *)ssa_1)[ssa_54] */ | |
| vec1 64 ssa_57 = deref_array &(*ssa_56)[ssa_40] (shared uint) /* &(*(uvec2 *)ssa_1)[ssa_54][ssa_40] */ | |
| vec1 64 ssa_58 = deref_cast (uvec2 *)ssa_7 (global uvec2) /* ptr_stride=8 */ | |
| vec1 64 ssa_59 = deref_ptr_as_array &(*ssa_58)[ssa_54] (global uvec2) /* &(*(uvec2 *)ssa_7)[ssa_54] */ | |
| vec1 64 ssa_60 = deref_array &(*ssa_59)[ssa_40] (global uint) /* &(*(uvec2 *)ssa_7)[ssa_54][ssa_40] */ | |
| vec2 32 ssa_61 = intrinsic load_deref (ssa_56) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_62 = load_const (0x 1 /* 0.000000 */) | |
| vec1 1 ssa_63 = ilt ssa_40, ssa_62 | |
| vec1 32 ssa_64 = mov ssa_61.y | |
| vec1 32 ssa_65 = mov ssa_61.x | |
| vec1 32 ssa_66 = bcsel ssa_63, ssa_65, ssa_64 | |
| vec2 32 ssa_67 = intrinsic load_deref (ssa_59) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec2 64 ssa_68 = load_const (0x 0 /* 0.000000 */, 0x 1 /* 0.000000 */) | |
| vec2 1 ssa_69 = ieq ssa_40.xx, ssa_68 | |
| vec2 32 ssa_70 = bcsel ssa_69, ssa_66.xx, ssa_67 | |
| intrinsic store_deref (ssa_59, ssa_70) (3, 0, 4, 0) /* wrmask=xy */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_71 = deref_cast (uint *)ssa_57 (shared uint) /* ptr_stride=4 */ | |
| vec1 64 ssa_72 = load_const (0x 1 /* 0.000000 */) | |
| vec1 64 ssa_73 = deref_ptr_as_array &(*ssa_71)[1] (shared uint) /* &(*(uint *)ssa_57)[1] */ | |
| vec1 32 ssa_74 = intrinsic load_deref (ssa_73) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| vec1 64 ssa_75 = deref_cast (uint *)ssa_60 (global uint) /* ptr_stride=4 */ | |
| vec1 64 ssa_76 = load_const (0x 1 /* 0.000000 */) | |
| vec1 64 ssa_77 = deref_ptr_as_array &(*ssa_75)[1] (global uint) /* &(*(uint *)ssa_60)[1] */ | |
| intrinsic store_deref (ssa_77, ssa_74) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */ | |
| return | |
| /* succs: block_1 */ | |
| block block_1: | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment