Skip to content

Instantly share code, notes, and snippets.

@recp
Last active August 5, 2018 11:56
Show Gist options
  • Select an option

  • Save recp/d5800146aebea706c72671ea388cfde5 to your computer and use it in GitHub Desktop.

Select an option

Save recp/d5800146aebea706c72671ea388cfde5 to your computer and use it in GitHub Desktop.
cglm-mat4-mul-sse2.txt
glm_mat4_mul_sse2:
movaps xmm0, XMMWORD PTR [rsi]
movaps xmm3, XMMWORD PTR [rdi]
movaps xmm2, XMMWORD PTR [rdi+16]
movaps xmm4, XMMWORD PTR [rdi+32]
movaps xmm1, XMMWORD PTR [rdi+48]
movaps xmm5, xmm0
movaps xmm7, xmm0
movaps xmm6, xmm0
shufps xmm5, xmm0, 255
shufps xmm7, xmm0, 170
shufps xmm6, xmm0, 85
mulps xmm7, xmm4
mulps xmm6, xmm2
shufps xmm0, xmm0, 0
mulps xmm5, xmm1
mulps xmm0, xmm3
addps xmm5, xmm7
addps xmm0, xmm6
addps xmm0, xmm5
movaps XMMWORD PTR [rdx], xmm0
movaps xmm0, XMMWORD PTR [rsi+16]
movaps xmm5, xmm0
movaps xmm7, xmm0
movaps xmm6, xmm0
shufps xmm5, xmm0, 255
shufps xmm7, xmm0, 170
shufps xmm6, xmm0, 85
mulps xmm7, xmm4
mulps xmm6, xmm2
shufps xmm0, xmm0, 0
mulps xmm5, xmm1
mulps xmm0, xmm3
addps xmm5, xmm7
addps xmm0, xmm6
addps xmm0, xmm5
movaps XMMWORD PTR [rdx+16], xmm0
movaps xmm0, XMMWORD PTR [rsi+32]
movaps xmm5, xmm0
movaps xmm7, xmm0
movaps xmm6, xmm0
shufps xmm5, xmm0, 255
shufps xmm7, xmm0, 170
shufps xmm6, xmm0, 85
mulps xmm7, xmm4
mulps xmm6, xmm2
shufps xmm0, xmm0, 0
mulps xmm5, xmm1
mulps xmm0, xmm3
addps xmm5, xmm7
addps xmm0, xmm6
addps xmm0, xmm5
movaps XMMWORD PTR [rdx+32], xmm0
movaps xmm0, XMMWORD PTR [rsi+48]
movaps xmm7, xmm0
movaps xmm6, xmm0
movaps xmm5, xmm0
shufps xmm7, xmm0, 255
shufps xmm6, xmm0, 170
shufps xmm5, xmm0, 85
mulps xmm1, xmm7
shufps xmm0, xmm0, 0
mulps xmm4, xmm6
mulps xmm2, xmm5
mulps xmm0, xmm3
addps xmm1, xmm4
addps xmm0, xmm2
addps xmm1, xmm0
movaps XMMWORD PTR [rdx+48], xmm1
ret
glm_mat4_mul_sse2:
movdqa xmm0, XMMWORD PTR [rsi]
movaps xmm3, XMMWORD PTR [rdi]
movaps xmm2, XMMWORD PTR [rdi+16]
movaps xmm4, XMMWORD PTR [rdi+32]
movaps xmm1, XMMWORD PTR [rdi+48]
pshufd xmm6, xmm0, 255
pshufd xmm5, xmm0, 170
pshufd xmm7, xmm0, 85
mulps xmm7, xmm2
pshufd xmm0, xmm0, 0
mulps xmm6, xmm1
mulps xmm0, xmm3
mulps xmm5, xmm4
addps xmm0, xmm7
addps xmm5, xmm6
addps xmm0, xmm5
movaps XMMWORD PTR [rdx], xmm0
movdqa xmm0, XMMWORD PTR [rsi+16]
pshufd xmm5, xmm0, 255
pshufd xmm7, xmm0, 170
pshufd xmm6, xmm0, 85
mulps xmm7, xmm4
pshufd xmm0, xmm0, 0
mulps xmm6, xmm2
mulps xmm5, xmm1
mulps xmm0, xmm3
addps xmm5, xmm7
addps xmm0, xmm6
addps xmm0, xmm5
movaps XMMWORD PTR [rdx+16], xmm0
movdqa xmm0, XMMWORD PTR [rsi+32]
pshufd xmm5, xmm0, 255
pshufd xmm7, xmm0, 170
pshufd xmm6, xmm0, 85
mulps xmm7, xmm4
pshufd xmm0, xmm0, 0
mulps xmm6, xmm2
mulps xmm5, xmm1
mulps xmm0, xmm3
addps xmm5, xmm7
addps xmm0, xmm6
addps xmm0, xmm5
movaps XMMWORD PTR [rdx+32], xmm0
movdqa xmm0, XMMWORD PTR [rsi+48]
pshufd xmm7, xmm0, 255
pshufd xmm6, xmm0, 170
pshufd xmm5, xmm0, 85
mulps xmm1, xmm7
pshufd xmm0, xmm0, 0
mulps xmm4, xmm6
mulps xmm2, xmm5
mulps xmm3, xmm0
addps xmm1, xmm4
addps xmm2, xmm3
addps xmm1, xmm2
movaps XMMWORD PTR [rdx+48], xmm1
ret
Notes:
To enable this CGLM_USE_INT_DOMAIN macro must be defined before include header e.g. #define CGLM_USE_INT_DOMAIN
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment