aboutsummaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
authorrodri <rgl@antares-labs.eu>2023-12-01 21:58:15 +0000
committerrodri <rgl@antares-labs.eu>2023-12-01 21:58:15 +0000
commitcdfd06439c4c4354e9fdc4f4124149a8d7abdfe5 (patch)
tree7a943ff0655e8df9571a097c6beafa2a024f08a4 /bench
parenta0b600a89c2e6e636579fe727235d036c08c7a9d (diff)
downloadamd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.tar.gz
amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.tar.bz2
amd64-simd-cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5.zip
implement memory aligned versions of some functions.
Diffstat (limited to 'bench')
-rw-r--r--bench/main.c57
1 files changed, 55 insertions, 2 deletions
diff --git a/bench/main.c b/bench/main.c
index 6f4886f..514fb7c 100644
--- a/bench/main.c
+++ b/bench/main.c
@@ -8,8 +8,12 @@ double min(double, double);
double dotvec2_sse(Point2, Point2);
double dotvec2_sse4(Point2, Point2);
double dotvec2_avx(Point2, Point2);
+double dotvec2_sse_a(Point2*, Point2*);
+double dotvec2_sse4_a(Point2*, Point2*);
+double dotvec2_avx_a(Point2*, Point2*);
double dotvec3_sse4(Point3, Point3);
double dotvec3_avx(Point3, Point3);
+double dotvec3_sse4_a(Point3*, Point3*);
Point2 Pt2b(double, double, double);
Point3 crossvec3_sse(Point3, Point3);
double hsubpd(double, double);
@@ -18,6 +22,21 @@ Point2 addpt2_sse(Point2, Point2);
Point2 addpt2_avx(Point2, Point2);
Point3 addpt3_avx(Point3, Point3);
+void *
+amalloc(ulong n, ulong a)
+{
+ void *p;
+
+ assert(a > 1 && (a&1) == 0);
+
+ a--;
+ p = malloc(n+a);
+ if(p == nil)
+ sysfatal("malloc: %r");
+ p = (void*)(((uintptr)p + a)&~a);
+ return p;
+}
+
double
fmin(double a, double b)
{
@@ -65,8 +84,9 @@ static void
bdotvec2(int fd)
{
Bgr g;
- B *b0, *b1, *b2, *b3;
+ B *b0, *b1, *b2, *b3, *b4, *b5, *b6;
Point2 a, b;
+ Point2 *aa, *bb;
int i;
benchinitgr(&g, "2d dot product");
@@ -74,10 +94,17 @@ bdotvec2(int fd)
b1 = benchadd(&g, "dotvec2_sse");
b2 = benchadd(&g, "dotvec2_sse4");
b3 = benchadd(&g, "dotvec2_avx");
+ b4 = benchadd(&g, "dotvec2_sse_a");
+ b5 = benchadd(&g, "dotvec2_sse4_a");
+ b6 = benchadd(&g, "dotvec2_avx_a");
while(b0->n > 0 || b1->n > 0){
a = Vec2(truerand()*frand(), truerand()*frand());
b = Vec2(truerand()*frand(), truerand()*frand());
+ aa = amalloc(sizeof(Point2), 16);
+ bb = amalloc(sizeof(Point2), 16);
+ *aa = a;
+ *bb = b;
benchin(b0);
for(i = 0; i < 1e6; i++)
@@ -98,6 +125,21 @@ bdotvec2(int fd)
for(i = 0; i < 1e6; i++)
dotvec2_avx(a, b);
benchout(b3);
+
+ benchin(b4);
+ for(i = 0; i < 1e6; i++)
+ dotvec2_sse_a(aa, bb);
+ benchout(b4);
+
+ benchin(b5);
+ for(i = 0; i < 1e6; i++)
+ dotvec2_sse4_a(aa, bb);
+ benchout(b5);
+
+ benchin(b6);
+ for(i = 0; i < 1e6; i++)
+ dotvec2_avx_a(aa, bb);
+ benchout(b6);
}
benchprintgr(&g, fd);
@@ -108,18 +150,24 @@ static void
bdotvec3(int fd)
{
Bgr g;
- B *b0, *b1, *b2;
+ B *b0, *b1, *b2, *b3;
Point3 a, b;
+ Point3 *aa, *bb;
int i;
benchinitgr(&g, "3d dot product");
b0 = benchadd(&g, "dotvec3");
b1 = benchadd(&g, "dotvec3_sse4");
b2 = benchadd(&g, "dotvec3_avx");
+ b3 = benchadd(&g, "dotvec3_sse4_a");
while(b0->n > 0 || b1->n > 0){
a = Vec3(truerand()*frand(), truerand()*frand(), truerand()*frand());
b = Vec3(truerand()*frand(), truerand()*frand(), truerand()*frand());
+ aa = amalloc(sizeof(Point3), 16);
+ bb = amalloc(sizeof(Point3), 16);
+ *aa = a;
+ *bb = b;
benchin(b0);
for(i = 0; i < 1e6; i++)
@@ -135,6 +183,11 @@ bdotvec3(int fd)
for(i = 0; i < 1e6; i++)
dotvec3_avx(a, b);
benchout(b2);
+
+ benchin(b3);
+ for(i = 0; i < 1e6; i++)
+ dotvec3_sse4_a(aa, bb);
+ benchout(b3);
}
benchprintgr(&g, fd);