Skip to content

Instantly share code, notes, and snippets.

@carstenbauer
Created December 9, 2022 10:16
Show Gist options
  • Select an option

  • Save carstenbauer/f6211009332c839af3d89dc7f0d01343 to your computer and use it in GitHub Desktop.

Select an option

Save carstenbauer/f6211009332c839af3d89dc7f0d01343 to your computer and use it in GitHub Desktop.
SIMD.jl
julia> function vector_dot(B, C)
a = zero(eltype(B))
for i in eachindex(B,C)
@inbounds a += B[i] * C[i]
end
return a
end
vector_dot (generic function with 1 method)
julia> function vector_dot_macro(B, C)
a = zero(eltype(B))
@simd for i in eachindex(B,C)
@inbounds a += B[i] * C[i]
end
return a
end
vector_dot_macro (generic function with 1 method)
julia> using SIMD
julia> function vector_dot_SIMD(B::Vector{T}, C::Vector{T}) where T
N = 8 # SIMD width
@assert length(B) == length(C)
@assert length(B) % N == 0
a = zero(T)
@inbounds for i in 1:N:length(B)
Bvec = vload(Vec{N,T}, B, i)
Cvec = vload(Vec{N,T}, C, i)
a += sum(Bvec + Cvec)
end
return a
end
vector_dot_SIMD (generic function with 1 method)
julia> using BenchmarkTools
julia> B = rand(51200); C = rand(51200);
julia> @btime vector_dot($B,$C);
43.630 μs (0 allocations: 0 bytes)
julia> @btime vector_dot_macro($B,$C);
8.930 μs (0 allocations: 0 bytes)
julia> @btime vector_dot_SIMD($B,$C);
8.907 μs (0 allocations: 0 bytes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment