From cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5 Mon Sep 17 00:00:00 2001 From: rodri Date: Fri, 1 Dec 2023 21:58:15 +0000 Subject: implement memory aligned versions of some functions. --- bench/main.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'bench') diff --git a/bench/main.c b/bench/main.c index 6f4886f..514fb7c 100644 --- a/bench/main.c +++ b/bench/main.c @@ -8,8 +8,12 @@ double min(double, double); double dotvec2_sse(Point2, Point2); double dotvec2_sse4(Point2, Point2); double dotvec2_avx(Point2, Point2); +double dotvec2_sse_a(Point2*, Point2*); +double dotvec2_sse4_a(Point2*, Point2*); +double dotvec2_avx_a(Point2*, Point2*); double dotvec3_sse4(Point3, Point3); double dotvec3_avx(Point3, Point3); +double dotvec3_sse4_a(Point3*, Point3*); Point2 Pt2b(double, double, double); Point3 crossvec3_sse(Point3, Point3); double hsubpd(double, double); @@ -18,6 +22,21 @@ Point2 addpt2_sse(Point2, Point2); Point2 addpt2_avx(Point2, Point2); Point3 addpt3_avx(Point3, Point3); +void * +amalloc(ulong n, ulong a) +{ + void *p; + + assert(a > 1 && (a&1) == 0); + + a--; + p = malloc(n+a); + if(p == nil) + sysfatal("malloc: %r"); + p = (void*)(((uintptr)p + a)&~a); + return p; +} + double fmin(double a, double b) { @@ -65,8 +84,9 @@ static void bdotvec2(int fd) { Bgr g; - B *b0, *b1, *b2, *b3; + B *b0, *b1, *b2, *b3, *b4, *b5, *b6; Point2 a, b; + Point2 *aa, *bb; int i; benchinitgr(&g, "2d dot product"); @@ -74,10 +94,17 @@ bdotvec2(int fd) b1 = benchadd(&g, "dotvec2_sse"); b2 = benchadd(&g, "dotvec2_sse4"); b3 = benchadd(&g, "dotvec2_avx"); + b4 = benchadd(&g, "dotvec2_sse_a"); + b5 = benchadd(&g, "dotvec2_sse4_a"); + b6 = benchadd(&g, "dotvec2_avx_a"); while(b0->n > 0 || b1->n > 0){ a = Vec2(truerand()*frand(), truerand()*frand()); b = Vec2(truerand()*frand(), truerand()*frand()); + aa = amalloc(sizeof(Point2), 16); + bb = amalloc(sizeof(Point2), 16); + *aa = a; + *bb = b; benchin(b0); for(i = 0; i < 1e6; i++) @@ -98,6 +125,21 @@ bdotvec2(int fd) for(i = 0; i < 1e6; i++) dotvec2_avx(a, b); benchout(b3); + + benchin(b4); + for(i = 0; i < 1e6; i++) + dotvec2_sse_a(aa, bb); + benchout(b4); + + benchin(b5); + for(i = 0; i < 1e6; i++) + dotvec2_sse4_a(aa, bb); + benchout(b5); + + benchin(b6); + for(i = 0; i < 1e6; i++) + dotvec2_avx_a(aa, bb); + benchout(b6); } benchprintgr(&g, fd); @@ -108,18 +150,24 @@ static void bdotvec3(int fd) { Bgr g; - B *b0, *b1, *b2; + B *b0, *b1, *b2, *b3; Point3 a, b; + Point3 *aa, *bb; int i; benchinitgr(&g, "3d dot product"); b0 = benchadd(&g, "dotvec3"); b1 = benchadd(&g, "dotvec3_sse4"); b2 = benchadd(&g, "dotvec3_avx"); + b3 = benchadd(&g, "dotvec3_sse4_a"); while(b0->n > 0 || b1->n > 0){ a = Vec3(truerand()*frand(), truerand()*frand(), truerand()*frand()); b = Vec3(truerand()*frand(), truerand()*frand(), truerand()*frand()); + aa = amalloc(sizeof(Point3), 16); + bb = amalloc(sizeof(Point3), 16); + *aa = a; + *bb = b; benchin(b0); for(i = 0; i < 1e6; i++) @@ -135,6 +183,11 @@ bdotvec3(int fd) for(i = 0; i < 1e6; i++) dotvec3_avx(a, b); benchout(b2); + + benchin(b3); + for(i = 0; i < 1e6; i++) + dotvec3_sse4_a(aa, bb); + benchout(b3); } benchprintgr(&g, fd); -- cgit v1.2.3