From 0c51b567258d7b826e65976c7a72b081e30c2ccd Mon Sep 17 00:00:00 2001 From: rodri Date: Sat, 25 Nov 2023 12:05:33 +0000 Subject: add 3d point sum. --- avx.h | 4 ++++ bench/main.c | 34 ++++++++++++++++++++++++++++++++++ dppd.s | 10 ++++++++++ main.c | 10 ++++++++++ 4 files changed, 58 insertions(+) diff --git a/avx.h b/avx.h index 3c7129c..eea76a1 100644 --- a/avx.h +++ b/avx.h @@ -122,3 +122,7 @@ /* VFMADD231PD (256 bit) */ #define VFMADD231PD_256(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_256,VEX_p_66); \ VOP(0xB8, 0x3, (d), (s1)) + +/* VINSERTF128 */ +#define VINSERTF128(i, s0, s1, d) VEX3(0,0,0,VEX_m_0F3A,0,(s0),VEX_L_256,VEX_p_66); \ + VOPi(0x18, 0x3, (d), (s1), (i)) diff --git a/bench/main.c b/bench/main.c index 811c471..1567e8c 100644 --- a/bench/main.c +++ b/bench/main.c @@ -14,6 +14,7 @@ Point3 crossvec3_sse(Point3, Point3); double hsubpd(double, double); double fma(double, double, double); Point2 addpt2_avx(Point2, Point2); +Point3 addpt3_avx(Point3, Point3); double fmin(double a, double b) @@ -258,6 +259,37 @@ baddpt2(int fd) benchfreegr(&g); } +static void +baddpt3(int fd) +{ + Bgr g; + B *b0, *b1; + Point3 a, b; + int i; + + benchinitgr(&g, "3d point sum"); + b0 = benchadd(&g, "addpt3"); + b1 = benchadd(&g, "addpt3_avx"); + + while(b0->n > 0 || b1->n > 0){ + a = Pt3(truerand()*frand(), truerand()*frand(), truerand()*frand(), truerand()*frand()); + b = Pt3(truerand()*frand(), truerand()*frand(), truerand()*frand(), truerand()*frand()); + + benchin(b0); + for(i = 0; i < 1e6; i++) + addpt3(a, b); + benchout(b0); + + benchin(b1); + for(i = 0; i < 1e6; i++) + addpt3_avx(a, b); + benchout(b1); + } + + benchprintgr(&g, fd); + benchfreegr(&g); +} + void threadmain(int argc, char **argv) { @@ -280,6 +312,8 @@ threadmain(int argc, char **argv) bfma(1); bseparator(1); baddpt2(1); + bseparator(1); + baddpt3(1); threadexitsall(nil); } diff --git a/dppd.s b/dppd.s index b746117..db805ff 100644 --- a/dppd.s +++ b/dppd.s @@ -99,6 +99,7 @@ TEXT fma(SB), 1, $0 VFMADD231SD(rX1, rX2, rX0) RET +/* TODO: write only 24 bytes */ TEXT addpt2_avx(SB), 1, $0 MOVQ SP, AX ADDQ $8, AX @@ -107,3 +108,12 @@ TEXT addpt2_avx(SB), 1, $0 VADDPD_256rr(rX1, rX0, rX0) VMOVDQU_256rm(rX0, rAX) RET + +TEXT addpt3_avx(SB), 1, $0 + MOVQ SP, AX + ADDQ $8, AX + VMOVDQU_256mr(8, rAX, rX0) + VMOVDQU_256mr(40, rAX, rX1) + VADDPD_256rr(rX1, rX0, rX0) + VMOVDQU_256rm(rX0, rAX) + RET diff --git a/main.c b/main.c index 274daf4..02fc11c 100644 --- a/main.c +++ b/main.c @@ -12,6 +12,7 @@ Point3 crossvec3_sse(Point3, Point3); double hsubpd(double, double); double fma(double, double, double); Point2 addpt2_avx(Point2, Point2); +Point3 addpt3_avx(Point3, Point3); double fmin(double a, double b) @@ -96,5 +97,14 @@ main(int argc, char *argv[]) pr = addpt2_avx(p0, p1); print("addpt2_avx(%v, %v) = %v\n", p0, p1, pr); + print("\n"); + + p0t = Pt3(a, 1, 1, b); + p1t = Pt3(b, 3, 1, a); + prt = addpt3(p0t, p1t); + print("addpt3(%V, %V) = %V\n", p0t, p1t, prt); + prt = addpt3_avx(p0t, p1t); + print("addpt3_avx(%V, %V) = %V\n", p0t, p1t, prt); + exits(nil); } -- cgit v1.2.3