diff options
author | rodri <rgl@antares-labs.eu> | 2023-11-25 10:34:41 +0000 |
---|---|---|
committer | rodri <rgl@antares-labs.eu> | 2023-11-25 10:34:41 +0000 |
commit | 675aa84403f98776a7d463e1cc5f9bd41cdbab92 (patch) | |
tree | 7c0f2fbb1814c5a9f8975307da8e79b0c0165d89 /bench | |
parent | cc3307440e698d58843a5273519f4988c01937f1 (diff) | |
download | amd64-simd-675aa84403f98776a7d463e1cc5f9bd41cdbab92.tar.gz amd64-simd-675aa84403f98776a7d463e1cc5f9bd41cdbab92.tar.bz2 amd64-simd-675aa84403f98776a7d463e1cc5f9bd41cdbab92.zip |
cleaned things up and improved the organization a bit.
Diffstat (limited to 'bench')
-rw-r--r-- | bench/main.c | 179 | ||||
-rw-r--r-- | bench/mkfile | 23 |
2 files changed, 188 insertions, 14 deletions
diff --git a/bench/main.c b/bench/main.c index 9d00719..811c471 100644 --- a/bench/main.c +++ b/bench/main.c @@ -2,13 +2,61 @@ #include <libc.h> #include <thread.h> #include <geometry.h> -#include "b.h" +#include "../bench9/b.h" -double dppd(Point2, Point2); -double dppda(Point2, Point2); -double dppd3(Point3, Point3); -double dppd3a(Point3, Point3); -Point3 xvec3(Point3, Point3); +double min(double, double); +double dotvec2_sse4(Point2, Point2); +double dotvec2_avx(Point2, Point2); +double dotvec3_sse4(Point3, Point3); +double dotvec3_avx(Point3, Point3); +Point2 Pt2b(double, double, double); +Point3 crossvec3_sse(Point3, Point3); +double hsubpd(double, double); +double fma(double, double, double); +Point2 addpt2_avx(Point2, Point2); + +double +fmin(double a, double b) +{ + return a<b? a: b; +} + +double +madd(double a, double b, double c) +{ + return a + b*c; +} + +static void +bmin(int fd) +{ + Bgr g; + B *b0, *b1; + double a, b; + int i; + + benchinitgr(&g, "min"); + b0 = benchadd(&g, "fmin"); + b1 = benchadd(&g, "fmin_sse"); + + while(b0->n > 0 || b1->n > 0){ + a = truerand()*frand(); + b = truerand()*frand(); + + benchin(b0); + for(i = 0; i < 1e6; i++) + fmin(a, b); + benchout(b0); + + benchin(b1); + for(i = 0; i < 1e6; i++) + min(a, b); + benchout(b1); + } + + benchprintgr(&g, fd); + benchfreegr(&g); +} static void bdotvec2(int fd) @@ -20,7 +68,7 @@ bdotvec2(int fd) benchinitgr(&g, "2d dot product"); b0 = benchadd(&g, "dotvec2"); - b1 = benchadd(&g, "dotvec2_simd"); + b1 = benchadd(&g, "dotvec2_sse4"); b2 = benchadd(&g, "dotvec2_avx"); while(b0->n > 0 || b1->n > 0){ @@ -34,12 +82,12 @@ bdotvec2(int fd) benchin(b1); for(i = 0; i < 1e6; i++) - dppd(a, b); + dotvec2_sse4(a, b); benchout(b1); benchin(b2); for(i = 0; i < 1e6; i++) - dppda(a, b); + dotvec2_avx(a, b); benchout(b2); } @@ -57,7 +105,7 @@ bdotvec3(int fd) benchinitgr(&g, "3d dot product"); b0 = benchadd(&g, "dotvec3"); - b1 = benchadd(&g, "dotvec3_simd"); + b1 = benchadd(&g, "dotvec3_sse4"); b2 = benchadd(&g, "dotvec3_avx"); while(b0->n > 0 || b1->n > 0){ @@ -71,12 +119,12 @@ bdotvec3(int fd) benchin(b1); for(i = 0; i < 1e6; i++) - dppd3(a, b); + dotvec3_sse4(a, b); benchout(b1); benchin(b2); for(i = 0; i < 1e6; i++) - dppd3a(a, b); + dotvec3_avx(a, b); benchout(b2); } @@ -94,7 +142,7 @@ bcrossvec3(int fd) benchinitgr(&g, "3d cross product"); b0 = benchadd(&g, "crossvec3"); - b1 = benchadd(&g, "crossvec3_simd"); + b1 = benchadd(&g, "crossvec3_sse"); while(b0->n > 0 || b1->n > 0){ a = Vec3(truerand()*frand(), truerand()*frand(), truerand()*frand()); @@ -107,7 +155,102 @@ bcrossvec3(int fd) benchin(b1); for(i = 0; i < 1e6; i++) - xvec3(a, b); + crossvec3_sse(a, b); + benchout(b1); + } + + benchprintgr(&g, fd); + benchfreegr(&g); +} + +static void +bPt2(int fd) +{ + Bgr g; + B *b0, *b1; + double x, y, w; + int i; + + benchinitgr(&g, "Pt2"); + b0 = benchadd(&g, "Pt2"); + b1 = benchadd(&g, "Pt2b"); + + while(b0->n > 0 || b1->n > 0){ + x = truerand()*frand(); + y = truerand()*frand(); + w = truerand()*frand(); + + benchin(b0); + for(i = 0; i < 1e6; i++) + Pt2(x, y, w); + benchout(b0); + + benchin(b1); + for(i = 0; i < 1e6; i++) + Pt2b(x, y, w); + benchout(b1); + } + + benchprintgr(&g, fd); + benchfreegr(&g); +} + +static void +bfma(int fd) +{ + Bgr g; + B *b0, *b1; + double a, b, c; + int i; + + benchinitgr(&g, "multiply + add"); + b0 = benchadd(&g, "madd"); + b1 = benchadd(&g, "fma_avx"); + + while(b0->n > 0 || b1->n > 0){ + a = truerand()*frand(); + b = truerand()*frand(); + c = truerand()*frand(); + + benchin(b0); + for(i = 0; i < 1e6; i++) + madd(a, b, c); + benchout(b0); + + benchin(b1); + for(i = 0; i < 1e6; i++) + fma(a, b, c); + benchout(b1); + } + + benchprintgr(&g, fd); + benchfreegr(&g); +} + +static void +baddpt2(int fd) +{ + Bgr g; + B *b0, *b1; + Point2 a, b; + int i; + + benchinitgr(&g, "2d point sum"); + b0 = benchadd(&g, "addpt2"); + b1 = benchadd(&g, "addpt2_avx"); + + while(b0->n > 0 || b1->n > 0){ + a = Pt2(truerand()*frand(), truerand()*frand(), truerand()*frand()); + b = Pt2(truerand()*frand(), truerand()*frand(), truerand()*frand()); + + benchin(b0); + for(i = 0; i < 1e6; i++) + addpt2(a, b); + benchout(b0); + + benchin(b1); + for(i = 0; i < 1e6; i++) + addpt2_avx(a, b); benchout(b1); } @@ -124,11 +267,19 @@ threadmain(int argc, char **argv) if(benchwire(0) != 0) fprint(2, "failed to wire: %r\n"); + bmin(1); + bseparator(1); bdotvec2(1); bseparator(1); bdotvec3(1); bseparator(1); bcrossvec3(1); + bseparator(1); + bPt2(1); + bseparator(1); + bfma(1); + bseparator(1); + baddpt2(1); threadexitsall(nil); } diff --git a/bench/mkfile b/bench/mkfile new file mode 100644 index 0000000..e649008 --- /dev/null +++ b/bench/mkfile @@ -0,0 +1,23 @@ +</$objtype/mkfile + +TARG=bench9 +BIN=/$objtype/bin +arch=`{echo __^$objtype^__} +CFLAGS=$CFLAGS -D$arch -p + +HFILES=\ + ../bench9/b.h\ + ../regs.h\ + ../sse.h\ + ../avx.h\ + +OFILES=\ + ../bench9/b.$O\ + ../bench9/b_$objtype.$O\ + ../min.$O\ + ../dppd.$O\ + main.$O\ + +default:V: all + +</sys/src/cmd/mkone |