diff options
author | rodri <rgl@antares-labs.eu> | 2023-12-01 21:58:15 +0000 |
---|---|---|
committer | rodri <rgl@antares-labs.eu> | 2023-12-01 21:58:15 +0000 |
commit | cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5 (patch) | |
tree | 7a943ff0655e8df9571a097c6beafa2a024f08a4 /dppd.s | |
parent | a0b600a89c2e6e636579fe727235d036c08c7a9d (diff) | |
download | amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.tar.gz amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.tar.bz2 amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.zip |
implement memory aligned versions of some functions.
Diffstat (limited to 'dppd.s')
-rw-r--r-- | dppd.s | 33 |
1 files changed, 33 insertions, 0 deletions
@@ -41,6 +41,29 @@ TEXT dotvec2_avx(SB), 1, $0 VZEROUPPER RET +TEXT dotvec2_sse_a(SB), 1, $0 + MOVQ b+8(FP), DX + MOVAPD 0(DX), X1 + MOVAPD 0(BP), X0 + MULPD X1, X0 + HADDPD X0, X0 + RET + +TEXT dotvec2_sse4_a(SB), 1, $0 + MOVQ b+8(FP), DX + MOVAPD 0(DX), X1 + MOVAPD 0(BP), X0 + DPPD $0x31, X1, X0 + RET + +TEXT dotvec2_avx_a(SB), 1, $0 + MOVQ b+8(FP), DX + VMOVAPD_128mr(0, rDX, rX0) + VMOVAPD_128mr(0, rBP, rX1) + VDPPD(rX1, rX0, rX0) /* VDPPD $0x31, X1, X0, X0 */ + VZEROUPPER + RET + TEXT dotvec3_sse4(SB), 1, $0 MOVUPD a+0(FP), X0 MOVUPD b+32(FP), X1 @@ -63,6 +86,16 @@ TEXT dotvec3_avx(SB), 1, $0 VZEROUPPER RET +TEXT dotvec3_sse4_a(SB), 1, $0 + MOVQ b+8(FP), DX + MOVAPD 0(DX), X0 + MOVAPD 0(BP), X1 + DPPD $0x31, X1, X0 + MOVSD 16(DX), X1 + MULSD 16(BP), X1 + ADDSD X1, X0 + RET + TEXT Pt2b(SB), 1, $0 MOVQ BP, DI MOVSD x+8(FP), X0 |