From a0b600a89c2e6e636579fe727235d036c08c7a9d Mon Sep 17 00:00:00 2001 From: rodri Date: Wed, 29 Nov 2023 21:19:16 +0000 Subject: remove instructions recently added to 9front. implemented some tests. --- avx.h | 2 +- bench/main.c | 17 ++++++++++++----- dppd.s | 32 ++++++++++++++++++++++++++------ main.c | 34 ++++++++++++++++++++++++++++++++++ sse.h | 23 ----------------------- 5 files changed, 73 insertions(+), 35 deletions(-) diff --git a/avx.h b/avx.h index 5ae12ec..76a7e4b 100644 --- a/avx.h +++ b/avx.h @@ -55,7 +55,7 @@ #define VMOVDQA_256rm(s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_256,VEX_p_66); \ VOP(0x7F, 0x3, (s), (d)) -/* VMODQU */ +/* VMOVDQU */ #define VMOVDQU_128mr(off, s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_F3); \ VOPi(0x6F, 0x1, (d), (s), (off)) #define VMOVDQU_128rm(s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_F3); \ diff --git a/bench/main.c b/bench/main.c index 060a3d3..6f4886f 100644 --- a/bench/main.c +++ b/bench/main.c @@ -5,6 +5,7 @@ #include "../bench9/b.h" double min(double, double); +double dotvec2_sse(Point2, Point2); double dotvec2_sse4(Point2, Point2); double dotvec2_avx(Point2, Point2); double dotvec3_sse4(Point3, Point3); @@ -64,14 +65,15 @@ static void bdotvec2(int fd) { Bgr g; - B *b0, *b1, *b2; + B *b0, *b1, *b2, *b3; Point2 a, b; int i; benchinitgr(&g, "2d dot product"); b0 = benchadd(&g, "dotvec2"); - b1 = benchadd(&g, "dotvec2_sse4"); - b2 = benchadd(&g, "dotvec2_avx"); + b1 = benchadd(&g, "dotvec2_sse"); + b2 = benchadd(&g, "dotvec2_sse4"); + b3 = benchadd(&g, "dotvec2_avx"); while(b0->n > 0 || b1->n > 0){ a = Vec2(truerand()*frand(), truerand()*frand()); @@ -84,13 +86,18 @@ bdotvec2(int fd) benchin(b1); for(i = 0; i < 1e6; i++) - dotvec2_sse4(a, b); + dotvec2_sse(a, b); benchout(b1); benchin(b2); for(i = 0; i < 1e6; i++) - dotvec2_avx(a, b); + dotvec2_sse4(a, b); benchout(b2); + + benchin(b3); + for(i = 0; i < 1e6; i++) + dotvec2_avx(a, b); + benchout(b3); } benchprintgr(&g, fd); diff --git a/dppd.s b/dppd.s index 55ee3d9..d480ddb 100644 --- a/dppd.s +++ b/dppd.s @@ -5,10 +5,30 @@ DATA one(SB)/8,$1.0 GLOBL one(SB), $8 +TEXT round(SB), 1, $0 + MOVSD a+0(FP), X0 + ROUNDSD $0x4, X0, X0 + RET + +TEXT addsub_sse(SB), 1, $0 + MOVQ b+8(FP), DX + MOVUPD 0(BP), X1 + MOVUPD 0(DX), X0 + ADDSUBPD X1, X0 + MOVUPD X0, 0(DX) + RET + +TEXT dotvec2_sse(SB), 1, $0 + MOVUPD a+0(FP), X0 + MOVUPD b+24(FP), X1 + MULPD X1, X0 + HADDPD X0, X0 + RET + TEXT dotvec2_sse4(SB), 1, $0 MOVUPD a+0(FP), X0 MOVUPD b+24(FP), X1 - DPPD(rX1, rX0) /* DPPD $0x31, X1, X0 */ + DPPD $0x31, X1, X0 RET TEXT dotvec2_avx(SB), 1, $0 @@ -24,7 +44,7 @@ TEXT dotvec2_avx(SB), 1, $0 TEXT dotvec3_sse4(SB), 1, $0 MOVUPD a+0(FP), X0 MOVUPD b+32(FP), X1 - DPPD(rX1, rX0) /* DPPD $0x31, X1, X0 */ + DPPD $0x31, X1, X0 MOVSD a+16(FP), X1 MULSD b+48(FP), X1 ADDSD X1, X0 @@ -56,7 +76,7 @@ TEXT Pt2b(SB), 1, $0 TEXT hsubpd(SB), 1, $0 MOVLPD a+0(FP), X0 MOVHPD b+8(FP), X0 - HSUBPD(rX0, rX0) /* HSUBPD X0, X0 */ + HSUBPD X0, X0 RET TEXT crossvec3_sse(SB), 1, $0 @@ -68,15 +88,15 @@ TEXT crossvec3_sse(SB), 1, $0 MOVHPD a+24(FP), X2 /* X2 := [a.z][b.z] */ MOVAPD X1, X3 MULPD X2, X3 - HSUBPD(rX3, rX3) /* x */ + HSUBPD X3, X3 /* x */ MOVAPD X2, X4 SHUFPD $0x1, X4, X4 MULPD X0, X4 - HSUBPD(rX4, rX4) /* y */ + HSUBPD X4, X4 /* y */ MOVAPD X0, X5 MULPD X1, X5 SHUFPD $0x1, X5, X5 - HSUBPD(rX5, rX5) /* z */ + HSUBPD X5, X5 /* z */ MOVQ BP, DI MOVSD X3, 0(DI) MOVSD X4, 8(DI) diff --git a/main.c b/main.c index 51a2f89..1c22cd8 100644 --- a/main.c +++ b/main.c @@ -3,6 +3,7 @@ #include double min(double, double); +double dotvec2_sse(Point2, Point2); double dotvec2_sse4(Point2, Point2); double dotvec2_avx(Point2, Point2); double dotvec3_sse4(Point3, Point3); @@ -14,6 +15,15 @@ double fma(double, double, double); Point2 addpt2_sse(Point2, Point2); Point2 addpt2_avx(Point2, Point2); Point3 addpt3_avx(Point3, Point3); +void addsub_sse(double*,double*); +double round(double); + +void +addsub(double *a, double *b) +{ + b[0] = b[0]-a[0]; + b[1] = b[1]+a[1]; +} double fmin(double a, double b) @@ -31,6 +41,7 @@ void main(int argc, char *argv[]) { double a, b, r; + double va[2], vb[2]; Point2 p0, p1, pr; Point3 p0t, p1t, prt; @@ -56,6 +67,9 @@ main(int argc, char *argv[]) r = dotvec2(p0, p1); print("dotvec2(%v, %v) = %g\n", p0, p1, r); r = 0; + r = dotvec2_sse(p0, p1); + print("dotvec2_sse(%v, %v) = %g\n", p0, p1, r); + r = 0; r = dotvec2_sse4(p0, p1); print("dotvec2_sse4(%v, %v) = %g\n", p0, p1, r); r = 0; @@ -127,5 +141,25 @@ main(int argc, char *argv[]) prt = addpt3_avx(p0t, p1t); print("addpt3_avx(%V, %V) = %V\n", p0t, p1t, prt); + print("\n"); + + va[0] = va[1] = a; + vb[0] = vb[1] = b; + print("addsub([%g %g], [%g %g]) = ", va[0], va[1], vb[0], vb[1]); + addsub(va, vb); + print("[%g %g]\n", vb[0], vb[1]); + + va[0] = va[1] = a; + vb[0] = vb[1] = b; + print("addsub_sse([%g %g], [%g %g]) = ", va[0], va[1], vb[0], vb[1]); + addsub_sse(va, vb); + print("[%g %g]\n", vb[0], vb[1]); + + print("\n"); + + r = 0; + r = round(a); + print("round(%g) = %g\n", a, r); + exits(nil); } diff --git a/sse.h b/sse.h index 0a711d5..3d7a52f 100644 --- a/sse.h +++ b/sse.h @@ -18,26 +18,3 @@ /* MODQU */ #define MOVDQU_mr(off, s, d) F3OPi(0x6F, 0x1, (d), (s), (off)) #define MOVDQU_rm(off, s, d) F3OPi(0x7F, 0x1, (s), (d), (off)) - -/* MOVLPD */ -//opcode = 660F12 -//modrm = 01 000 000 [AX → X0] / 01 001 000 [AX → X1] -//disp8 = 8 / 32 -//#define MOVLPD(off, s, d) OPi(0x12, 0x1, (d), (s), (off)) - -/* MOVHPD */ -//opcode = 660F16 -//modrm = 01 000 000 [AX → X0] / 01 001 000 [AX → X1] -//disp8 = 16 / 40 -//#define MOVHPD(off, s, d) OPi(0x16, 0x1, (d), (s), (off)) - -/* HSUBPD */ -//opcode = 660F7D = 01100110 00001111 01111101 -//modrm = 11 000 000 [X0 → X0] -#define HSUBPD(s, d) OP(0x7D, 0x3, (d), (s)) - -/* DPPD */ -//opcode = 660F3A41 = 01100110 00001111 00111010 01000001 -//modrm = 11 000 001 [X1 → X0] -//imm8 = 0011 0001 -#define DPPD(s, d) OP4i(0x413A, 0x3, (d), (s), 0x31) -- cgit v1.2.3