diff options
author | rodri <rgl@antares-labs.eu> | 2023-11-24 12:08:15 +0000 |
---|---|---|
committer | rodri <rgl@antares-labs.eu> | 2023-11-24 12:08:15 +0000 |
commit | 9404d16a4263a87559af64bfb18c91ccebaa601d (patch) | |
tree | 0a17daf278b3656042a35e1227b665bc87dbf992 /dppd.s | |
parent | d8ab83e060bf9bb6b1d51915d63578a6dc8cacaf (diff) | |
download | amd64-simd-9404d16a4263a87559af64bfb18c91ccebaa601d.tar.gz amd64-simd-9404d16a4263a87559af64bfb18c91ccebaa601d.tar.bz2 amd64-simd-9404d16a4263a87559af64bfb18c91ccebaa601d.zip |
fix the 9 asm notes. add VFMA231[SP]D instructions.
Diffstat (limited to 'dppd.s')
-rw-r--r-- | dppd.s | 25 |
1 files changed, 16 insertions, 9 deletions
@@ -7,8 +7,8 @@ TEXT dppd(SB), 1, $0 MOVQ SP, AX MOVLPD(8, rAX, rX0) /* MOVLPD a+0(FP), X0 */ MOVHPD(16, rAX, rX0) /* MOVHPD a+8(FP), X0 */ - MOVLPD(32, rAX, rX1) /* MOVLPD b+0(FP), X1 */ - MOVHPD(40, rAX, rX1) /* MOVHPD b+8(FP), X1*/ + MOVLPD(32, rAX, rX1) /* MOVLPD b+24(FP), X1 */ + MOVHPD(40, rAX, rX1) /* MOVHPD b+32(FP), X1*/ DPPD(rX1, rX0) /* DPPD $0x31, X1, X0 */ RET @@ -16,12 +16,12 @@ TEXT dppd3(SB), 1, $0 MOVQ SP, AX MOVLPD(8, rAX, rX0) /* MOVLPD a+0(FP), X0 */ MOVHPD(16, rAX, rX0) /* MOVHPD a+8(FP), X0 */ - MOVLPD(40, rAX, rX1) /* MOVLPD b+0(FP), X1 */ - MOVHPD(48, rAX, rX1) /* MOVHPD b+8(FP), X1 */ + MOVLPD(40, rAX, rX1) /* MOVLPD b+32(FP), X1 */ + MOVHPD(48, rAX, rX1) /* MOVHPD b+40(FP), X1 */ DPPD(rX1, rX0) /* DPPD $0x31, X1, X0 */ MOVSD one(SB), X1 MOVHPD(24, rAX, rX0) /* MOVHPD a+16(FP), X0 */ - MOVHPD(56, rAX, rX1) /* MOVHPD b+16(FP), X1 */ + MOVHPD(56, rAX, rX1) /* MOVHPD b+48(FP), X1 */ DPPD(rX1, rX0) /* DPPD $0x31, X1, X0 */ RET @@ -38,18 +38,18 @@ TEXT Pt2b(SB), 1, $0 TEXT hsubpd(SB), 1, $0 MOVQ SP, AX MOVLPD(8, rAX, rX0) /* MOVLPD a+0(FP), X0 */ - MOVHPD(16, rAX, rX0) /* MOVHPD b+0(FP), X0 */ + MOVHPD(16, rAX, rX0) /* MOVHPD b+8(FP), X0 */ HSUBPD(rX0, rX0) /* HSUBPD X0, X0 */ RET TEXT xvec3(SB), 1, $0 MOVQ SP, AX ADDQ $8, AX - MOVLPD(40, rAX, rX0) /* MOVLPD b+0(FP), X0 */ + MOVLPD(40, rAX, rX0) /* MOVLPD b+32(FP), X0 */ MOVHPD(8, rAX, rX0) /* MOVHPD a+0(FP), X0 */ MOVLPD(16, rAX, rX1) /* MOVLPD a+8(FP), X1 */ - MOVHPD(48, rAX, rX1) /* MOVHPD b+8(FP), X1 */ - MOVLPD(56, rAX, rX2) /* MOVLPD b+16(FP), X2 */ + MOVHPD(48, rAX, rX1) /* MOVHPD b+40(FP), X1 */ + MOVLPD(56, rAX, rX2) /* MOVLPD b+48(FP), X2 */ MOVHPD(24, rAX, rX2) /* MOVHPD a+16(FP), X2 */ MOVAPD X1, X3 MULPD X2, X3 @@ -69,3 +69,10 @@ TEXT xvec3(SB), 1, $0 XORPD X0, X0 MOVSD X0, 24(DI) RET + +TEXT fma(SB), 1, $0 + MOVSD a+0(FP), X0 + MOVSD b+8(FP), X1 + MOVSD c+16(FP), X2 + VFMADD231SD(rX1, rX2, rX0) + RET |