aboutsummaryrefslogtreecommitdiff
path: root/dppd.s
diff options
context:
space:
mode:
authorrodri <rgl@antares-labs.eu>2023-12-01 21:58:15 +0000
committerrodri <rgl@antares-labs.eu>2023-12-01 21:58:15 +0000
commitcdfd06439c4c4354e9fdc4f4124149a8d7abdfe5 (patch)
tree7a943ff0655e8df9571a097c6beafa2a024f08a4 /dppd.s
parenta0b600a89c2e6e636579fe727235d036c08c7a9d (diff)
downloadamd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.tar.gz
amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.tar.bz2
amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.zip
implement memory aligned versions of some functions.
Diffstat (limited to 'dppd.s')
-rw-r--r--dppd.s33
1 files changed, 33 insertions, 0 deletions
diff --git a/dppd.s b/dppd.s
index d480ddb..4c07876 100644
--- a/dppd.s
+++ b/dppd.s
@@ -41,6 +41,29 @@ TEXT dotvec2_avx(SB), 1, $0
VZEROUPPER
RET
+TEXT dotvec2_sse_a(SB), 1, $0
+ MOVQ b+8(FP), DX
+ MOVAPD 0(DX), X1
+ MOVAPD 0(BP), X0
+ MULPD X1, X0
+ HADDPD X0, X0
+ RET
+
+TEXT dotvec2_sse4_a(SB), 1, $0
+ MOVQ b+8(FP), DX
+ MOVAPD 0(DX), X1
+ MOVAPD 0(BP), X0
+ DPPD $0x31, X1, X0
+ RET
+
+TEXT dotvec2_avx_a(SB), 1, $0
+ MOVQ b+8(FP), DX
+ VMOVAPD_128mr(0, rDX, rX0)
+ VMOVAPD_128mr(0, rBP, rX1)
+ VDPPD(rX1, rX0, rX0) /* VDPPD $0x31, X1, X0, X0 */
+ VZEROUPPER
+ RET
+
TEXT dotvec3_sse4(SB), 1, $0
MOVUPD a+0(FP), X0
MOVUPD b+32(FP), X1
@@ -63,6 +86,16 @@ TEXT dotvec3_avx(SB), 1, $0
VZEROUPPER
RET
+TEXT dotvec3_sse4_a(SB), 1, $0
+ MOVQ b+8(FP), DX
+ MOVAPD 0(DX), X0
+ MOVAPD 0(BP), X1
+ DPPD $0x31, X1, X0
+ MOVSD 16(DX), X1
+ MULSD 16(BP), X1
+ ADDSD X1, X0
+ RET
+
TEXT Pt2b(SB), 1, $0
MOVQ BP, DI
MOVSD x+8(FP), X0