aboutsummaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
authorrodri <rgl@antares-labs.eu>2023-11-25 10:34:41 +0000
committerrodri <rgl@antares-labs.eu>2023-11-25 10:34:41 +0000
commit675aa84403f98776a7d463e1cc5f9bd41cdbab92 (patch)
tree7c0f2fbb1814c5a9f8975307da8e79b0c0165d89 /bench
parentcc3307440e698d58843a5273519f4988c01937f1 (diff)
downloadamd64-simd-675aa84403f98776a7d463e1cc5f9bd41cdbab92.tar.gz
amd64-simd-675aa84403f98776a7d463e1cc5f9bd41cdbab92.tar.bz2
amd64-simd-675aa84403f98776a7d463e1cc5f9bd41cdbab92.zip
cleaned things up and improved the organization a bit.
Diffstat (limited to 'bench')
-rw-r--r--bench/main.c179
-rw-r--r--bench/mkfile23
2 files changed, 188 insertions, 14 deletions
diff --git a/bench/main.c b/bench/main.c
index 9d00719..811c471 100644
--- a/bench/main.c
+++ b/bench/main.c
@@ -2,13 +2,61 @@
#include <libc.h>
#include <thread.h>
#include <geometry.h>
-#include "b.h"
+#include "../bench9/b.h"
-double dppd(Point2, Point2);
-double dppda(Point2, Point2);
-double dppd3(Point3, Point3);
-double dppd3a(Point3, Point3);
-Point3 xvec3(Point3, Point3);
+double min(double, double);
+double dotvec2_sse4(Point2, Point2);
+double dotvec2_avx(Point2, Point2);
+double dotvec3_sse4(Point3, Point3);
+double dotvec3_avx(Point3, Point3);
+Point2 Pt2b(double, double, double);
+Point3 crossvec3_sse(Point3, Point3);
+double hsubpd(double, double);
+double fma(double, double, double);
+Point2 addpt2_avx(Point2, Point2);
+
+double
+fmin(double a, double b)
+{
+ return a<b? a: b;
+}
+
+double
+madd(double a, double b, double c)
+{
+ return a + b*c;
+}
+
+static void
+bmin(int fd)
+{
+ Bgr g;
+ B *b0, *b1;
+ double a, b;
+ int i;
+
+ benchinitgr(&g, "min");
+ b0 = benchadd(&g, "fmin");
+ b1 = benchadd(&g, "fmin_sse");
+
+ while(b0->n > 0 || b1->n > 0){
+ a = truerand()*frand();
+ b = truerand()*frand();
+
+ benchin(b0);
+ for(i = 0; i < 1e6; i++)
+ fmin(a, b);
+ benchout(b0);
+
+ benchin(b1);
+ for(i = 0; i < 1e6; i++)
+ min(a, b);
+ benchout(b1);
+ }
+
+ benchprintgr(&g, fd);
+ benchfreegr(&g);
+}
static void
bdotvec2(int fd)
@@ -20,7 +68,7 @@ bdotvec2(int fd)
benchinitgr(&g, "2d dot product");
b0 = benchadd(&g, "dotvec2");
- b1 = benchadd(&g, "dotvec2_simd");
+ b1 = benchadd(&g, "dotvec2_sse4");
b2 = benchadd(&g, "dotvec2_avx");
while(b0->n > 0 || b1->n > 0){
@@ -34,12 +82,12 @@ bdotvec2(int fd)
benchin(b1);
for(i = 0; i < 1e6; i++)
- dppd(a, b);
+ dotvec2_sse4(a, b);
benchout(b1);
benchin(b2);
for(i = 0; i < 1e6; i++)
- dppda(a, b);
+ dotvec2_avx(a, b);
benchout(b2);
}
@@ -57,7 +105,7 @@ bdotvec3(int fd)
benchinitgr(&g, "3d dot product");
b0 = benchadd(&g, "dotvec3");
- b1 = benchadd(&g, "dotvec3_simd");
+ b1 = benchadd(&g, "dotvec3_sse4");
b2 = benchadd(&g, "dotvec3_avx");
while(b0->n > 0 || b1->n > 0){
@@ -71,12 +119,12 @@ bdotvec3(int fd)
benchin(b1);
for(i = 0; i < 1e6; i++)
- dppd3(a, b);
+ dotvec3_sse4(a, b);
benchout(b1);
benchin(b2);
for(i = 0; i < 1e6; i++)
- dppd3a(a, b);
+ dotvec3_avx(a, b);
benchout(b2);
}
@@ -94,7 +142,7 @@ bcrossvec3(int fd)
benchinitgr(&g, "3d cross product");
b0 = benchadd(&g, "crossvec3");
- b1 = benchadd(&g, "crossvec3_simd");
+ b1 = benchadd(&g, "crossvec3_sse");
while(b0->n > 0 || b1->n > 0){
a = Vec3(truerand()*frand(), truerand()*frand(), truerand()*frand());
@@ -107,7 +155,102 @@ bcrossvec3(int fd)
benchin(b1);
for(i = 0; i < 1e6; i++)
- xvec3(a, b);
+ crossvec3_sse(a, b);
+ benchout(b1);
+ }
+
+ benchprintgr(&g, fd);
+ benchfreegr(&g);
+}
+
+static void
+bPt2(int fd)
+{
+ Bgr g;
+ B *b0, *b1;
+ double x, y, w;
+ int i;
+
+ benchinitgr(&g, "Pt2");
+ b0 = benchadd(&g, "Pt2");
+ b1 = benchadd(&g, "Pt2b");
+
+ while(b0->n > 0 || b1->n > 0){
+ x = truerand()*frand();
+ y = truerand()*frand();
+ w = truerand()*frand();
+
+ benchin(b0);
+ for(i = 0; i < 1e6; i++)
+ Pt2(x, y, w);
+ benchout(b0);
+
+ benchin(b1);
+ for(i = 0; i < 1e6; i++)
+ Pt2b(x, y, w);
+ benchout(b1);
+ }
+
+ benchprintgr(&g, fd);
+ benchfreegr(&g);
+}
+
+static void
+bfma(int fd)
+{
+ Bgr g;
+ B *b0, *b1;
+ double a, b, c;
+ int i;
+
+ benchinitgr(&g, "multiply + add");
+ b0 = benchadd(&g, "madd");
+ b1 = benchadd(&g, "fma_avx");
+
+ while(b0->n > 0 || b1->n > 0){
+ a = truerand()*frand();
+ b = truerand()*frand();
+ c = truerand()*frand();
+
+ benchin(b0);
+ for(i = 0; i < 1e6; i++)
+ madd(a, b, c);
+ benchout(b0);
+
+ benchin(b1);
+ for(i = 0; i < 1e6; i++)
+ fma(a, b, c);
+ benchout(b1);
+ }
+
+ benchprintgr(&g, fd);
+ benchfreegr(&g);
+}
+
+static void
+baddpt2(int fd)
+{
+ Bgr g;
+ B *b0, *b1;
+ Point2 a, b;
+ int i;
+
+ benchinitgr(&g, "2d point sum");
+ b0 = benchadd(&g, "addpt2");
+ b1 = benchadd(&g, "addpt2_avx");
+
+ while(b0->n > 0 || b1->n > 0){
+ a = Pt2(truerand()*frand(), truerand()*frand(), truerand()*frand());
+ b = Pt2(truerand()*frand(), truerand()*frand(), truerand()*frand());
+
+ benchin(b0);
+ for(i = 0; i < 1e6; i++)
+ addpt2(a, b);
+ benchout(b0);
+
+ benchin(b1);
+ for(i = 0; i < 1e6; i++)
+ addpt2_avx(a, b);
benchout(b1);
}
@@ -124,11 +267,19 @@ threadmain(int argc, char **argv)
if(benchwire(0) != 0)
fprint(2, "failed to wire: %r\n");
+ bmin(1);
+ bseparator(1);
bdotvec2(1);
bseparator(1);
bdotvec3(1);
bseparator(1);
bcrossvec3(1);
+ bseparator(1);
+ bPt2(1);
+ bseparator(1);
+ bfma(1);
+ bseparator(1);
+ baddpt2(1);
threadexitsall(nil);
}
diff --git a/bench/mkfile b/bench/mkfile
new file mode 100644
index 0000000..e649008
--- /dev/null
+++ b/bench/mkfile
@@ -0,0 +1,23 @@
+</$objtype/mkfile
+
+TARG=bench9
+BIN=/$objtype/bin
+arch=`{echo __^$objtype^__}
+CFLAGS=$CFLAGS -D$arch -p
+
+HFILES=\
+ ../bench9/b.h\
+ ../regs.h\
+ ../sse.h\
+ ../avx.h\
+
+OFILES=\
+ ../bench9/b.$O\
+ ../bench9/b_$objtype.$O\
+ ../min.$O\
+ ../dppd.$O\
+ main.$O\
+
+default:V: all
+
+</sys/src/cmd/mkone