From cc3307440e698d58843a5273519f4988c01937f1 Mon Sep 17 00:00:00 2001 From: rodri Date: Fri, 24 Nov 2023 22:13:49 +0000 Subject: add more avx instructions and place VZEROUPPERs. --- dppd.s | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'dppd.s') diff --git a/dppd.s b/dppd.s index e738dde..de938b8 100644 --- a/dppd.s +++ b/dppd.s @@ -19,6 +19,7 @@ TEXT dppda(SB), 1, $0 VMOVUPD_128mr(8, rAX, rX0) /* VMOVUPD a+0(FP), X0 */ VMOVUPD_128mr(32, rAX, rX1) /* VMOVUPD b+24(FP), X1 */ VDPPD(rX1, rX0, rX0) /* VDPPD $0x31, X1, X0, X0 */ + VZEROUPPER RET TEXT dppd3(SB), 1, $0 @@ -42,6 +43,7 @@ TEXT dppd3a(SB), 1, $0 MOVSD a+16(FP), X1 MOVSD b+48(FP), X2 VFMADD231SD(rX1, rX2, rX0) + VZEROUPPER RET TEXT Pt2b(SB), 1, $0 @@ -89,9 +91,15 @@ TEXT xvec3(SB), 1, $0 MOVSD X0, 24(DI) RET +TEXT xvec3a(SB), 1, $0 + MOVQ SP, AX + ADDQ $8, AX + + TEXT fma(SB), 1, $0 MOVSD a+0(FP), X0 MOVSD b+8(FP), X1 MOVSD c+16(FP), X2 VFMADD231SD(rX1, rX2, rX0) + VZEROUPPER RET -- cgit v1.2.3