From fb2c8083f028676d0c46e0d9a89de78b3f129552 Mon Sep 17 00:00:00 2001 From: rodri Date: Sat, 7 Sep 2024 18:39:36 +0000 Subject: remove unnecessary copying. profile individual stage procs. --- camera.c | 40 +++++++++++------- clip.c | 3 ++ graphics.h | 20 ++++----- internal.h | 7 ++++ render.c | 138 ++++++++++++++++++++++++++++++++++--------------------------- vertex.c | 4 -- 6 files changed, 124 insertions(+), 88 deletions(-) diff --git a/camera.c b/camera.c index 0562769..d5d660d 100644 --- a/camera.c +++ b/camera.c @@ -86,17 +86,6 @@ updatestats(Camera *c, uvlong v) c->stats.nframes++; } -static void -updatetimes(Camera *c, Renderjob *j) -{ - c->times.R[c->times.cur] = j->times.R; - c->times.E[c->times.cur] = j->times.E; - c->times.Tn[c->times.cur] = j->times.Tn; - c->times.Rn[c->times.cur] = j->times.Rn; - c->times.last = c->times.cur; - c->times.cur = ++c->times.cur % nelem(c->times.R); -} - static void verifycfg(Camera *c) { @@ -208,6 +197,26 @@ aimcamera(Camera *c, Point3 focus) c->by = crossvec3(c->bz, c->bx); } +static void +printtimings(Renderjob *job) +{ + int i; + + if(!job->rctl->doprof) + return; + + fprint(2, "R %llud %llud\nE %llud %llud\n", + job->times.R.t0, job->times.R.t1, + job->times.E.t0, job->times.E.t1); + for(i = 0; i < job->rctl->nprocs/2; i++) + fprint(2, "T%d %llud %llud\n", i, + job->times.Tn[i].t0, job->times.Tn[i].t1); + for(i = 0; i < job->rctl->nprocs/2; i++) + fprint(2, "r%d %llud %llud\n", i, + job->times.Rn[i].t0, job->times.Rn[i].t1); + fprint(2, "\n"); +} + void shootcamera(Camera *c, Shadertab *s) { @@ -224,6 +233,7 @@ shootcamera(Camera *c, Shadertab *s) job = emalloc(sizeof *job); memset(job, 0, sizeof *job); + job->rctl = c->rctl; job->fb = fbctl->getbb(fbctl); job->camera = emalloc(sizeof *c); *job->camera = *c; @@ -233,7 +243,7 @@ shootcamera(Camera *c, Shadertab *s) fbctl->reset(fbctl, c->clearcolor); t0 = nanosec(); - sendp(c->rctl->c, job); + sendp(c->rctl->jobq, job); recvp(job->donec); delscene(job->scene); /* destroy the snapshot */ /* @@ -251,7 +261,7 @@ shootcamera(Camera *c, Shadertab *s) reloadcamera(job->camera); job->scene = dupscene(skyboxscene); job->shaders = &skyboxshader; - sendp(c->rctl->c, job); + sendp(c->rctl->jobq, job); recvp(job->donec); delscene(job->scene); } @@ -259,7 +269,9 @@ shootcamera(Camera *c, Shadertab *s) fbctl->swap(fbctl); updatestats(c, t1-t0); - updatetimes(c, job); + printtimings(job); +// free(job->times.Tn); +// free(job->times.Rn); chanfree(job->donec); free(job->camera); diff --git a/clip.c b/clip.c index aaa8754..7548b36 100644 --- a/clip.c +++ b/clip.c @@ -108,6 +108,7 @@ clipprimitive(Primitive *p, Primitive *cp) d1 = (j&1) == 0? sd1[j]: -sd1[j]; perc = d0/(d0 - d1); + memset(&v, 0, sizeof v); lerpvertex(&v, v0, v1, perc); addvert(Vout, v); @@ -192,6 +193,8 @@ adjustverts(Point *p0, Point *p1, Vertex *v0, Vertex *v1) Point Δp; double len, perc; + memset(v, 0, sizeof v); + dp = subpt3(v1->p, v0->p); len = hypot(dp.x, dp.y); diff --git a/graphics.h b/graphics.h index a361491..ef2e419 100644 --- a/graphics.h +++ b/graphics.h @@ -56,8 +56,8 @@ typedef struct Scene Scene; typedef struct Shaderparams Shaderparams; typedef struct SUparams SUparams; typedef struct Shadertab Shadertab; -typedef struct Renderer Renderer; typedef struct Rendertime Rendertime; +typedef struct Renderer Renderer; typedef struct Renderjob Renderjob; typedef struct Fragment Fragment; typedef struct Astk Astk; @@ -231,20 +231,23 @@ struct Shadertab Color (*fshader)(Shaderparams*); /* fragment shader */ }; -struct Renderer +struct Rendertime { - Channel *c; + uvlong t0, t1; }; -struct Rendertime +struct Renderer { - uvlong t0, t1; + Channel *jobq; + ulong nprocs; + int doprof; /* enable profiling */ }; struct Renderjob { Ref; uvlong id; + Renderer *rctl; Framebuf *fb; Camera *camera; Scene *scene; @@ -252,7 +255,8 @@ struct Renderjob Channel *donec; struct { - Rendertime R, E, Tn, Rn; /* renderer, entityproc, tilers, rasterizers */ + /* renderer, entityproc, tilers, rasterizers */ + Rendertime R, E, Tn[20], Rn[20]; } times; Renderjob *next; @@ -356,10 +360,6 @@ struct Camera uvlong min, avg, max, acc, n, v; uvlong nframes; } stats; - struct { - Rendertime R[10], E[10], Tn[10], Rn[10]; - int last, cur; - } times; }; /* camera */ diff --git a/internal.h b/internal.h index c204d9a..3173f75 100644 --- a/internal.h +++ b/internal.h @@ -1,4 +1,5 @@ typedef struct Polygon Polygon; +typedef struct Entityparam Entityparam; typedef struct Tilerparam Tilerparam; typedef struct Rasterparam Rasterparam; typedef struct Rastertask Rastertask; @@ -10,6 +11,12 @@ struct Polygon ulong cap; }; +struct Entityparam +{ + Renderer *rctl; + Channel *paramsc; +}; + struct Tilerparam { int id; diff --git a/render.c b/render.c index 1c14ade..4574d62 100644 --- a/render.c +++ b/render.c @@ -191,7 +191,7 @@ rasterize(Rastertask *task) { SUparams *params; Raster *cr, *zr; - Primitive prim; + Primitive *prim; Vertex v; Shaderparams fsp; Triangle2 t; @@ -204,8 +204,9 @@ rasterize(Rastertask *task) int steep = 0, Δe, e, Δy; params = task->params; - prim = task->p; + prim = &task->p; memset(&fsp, 0, sizeof fsp); + memset(&v, 0, sizeof v); fsp.su = params; fsp.v = &v; fsp.getuniform = sparams_getuniform; @@ -216,18 +217,18 @@ rasterize(Rastertask *task) cr = params->fb->rasters; zr = cr->next; - switch(prim.type){ + switch(prim->type){ case PPoint: - p = Pt(prim.v[0].p.x, prim.v[0].p.y); + p = Pt(prim->v[0].p.x, prim->v[0].p.y); - z = fclamp(prim.v[0].p.z, 0, 1); + z = fclamp(prim->v[0].p.z, 0, 1); if(params->camera->enabledepth){ if(z <= getdepth(zr, p)) break; putdepth(zr, p, z); } - *fsp.v = dupvertex(&prim.v[0]); + fsp.v = &prim->v[0]; fsp.p = p; c = params->fshader(&fsp); if(params->camera->enableAbuff) @@ -237,10 +238,10 @@ rasterize(Rastertask *task) delvattrs(fsp.v); break; case PLine: - p0 = Pt(prim.v[0].p.x, prim.v[0].p.y); - p1 = Pt(prim.v[1].p.x, prim.v[1].p.y); + p0 = Pt(prim->v[0].p.x, prim->v[0].p.y); + p1 = Pt(prim->v[1].p.x, prim->v[1].p.y); /* clip it against our wr */ - if(rectclipline(task->wr, &p0, &p1, &prim.v[0], &prim.v[1]) < 0) + if(rectclipline(task->wr, &p0, &p1, &prim->v[0], &prim->v[1]) < 0) break; /* transpose the points */ @@ -253,7 +254,7 @@ rasterize(Rastertask *task) /* make them left-to-right */ if(p0.x > p1.x){ SWAP(Point, &p0, &p1); - SWAP(Vertex, &prim.v[0], &prim.v[1]); + SWAP(Vertex, &prim->v[0], &prim->v[1]); } dp = subpt(p1, p0); @@ -268,7 +269,7 @@ rasterize(Rastertask *task) if(steep) SWAP(int, &p.x, &p.y); - z = flerp(prim.v[0].p.z, prim.v[1].p.z, perc); + z = flerp(prim->v[0].p.z, prim->v[1].p.z, perc); /* TODO get rid of the bounds check and make sure the clipping doesn't overflow */ if(params->camera->enabledepth){ if(!ptinrect(p, params->fb->r) || z <= getdepth(zr, p)) @@ -277,12 +278,12 @@ rasterize(Rastertask *task) } /* interpolate z⁻¹ and get actual z */ - pcz = flerp(prim.v[0].p.w, prim.v[1].p.w, perc); + pcz = flerp(prim->v[0].p.w, prim->v[1].p.w, perc); pcz = 1.0/(pcz < 1e-5? 1e-5: pcz); /* perspective-correct attribute interpolation */ - perc *= prim.v[0].p.w * pcz; - lerpvertex(fsp.v, &prim.v[0], &prim.v[1], perc); + perc *= prim->v[0].p.w * pcz; + lerpvertex(fsp.v, &prim->v[0], &prim->v[1], perc); fsp.p = p; c = params->fshader(&fsp); @@ -290,7 +291,6 @@ rasterize(Rastertask *task) pushtoAbuf(params->fb, p, c, z); else pixel(cr, p, c, params->camera->enableblend); - delvattrs(fsp.v); discard: if(steep) SWAP(int, &p.x, &p.y); @@ -300,20 +300,18 @@ discard: e -= 2*dp.x; } } + delvattrs(fsp.v); break; case PTriangle: - t.p0 = Pt2(prim.v[0].p.x, prim.v[0].p.y, 1); - t.p1 = Pt2(prim.v[1].p.x, prim.v[1].p.y, 1); - t.p2 = Pt2(prim.v[2].p.x, prim.v[2].p.y, 1); + t.p0 = Pt2(prim->v[0].p.x, prim->v[0].p.y, 1); + t.p1 = Pt2(prim->v[1].p.x, prim->v[1].p.y, 1); + t.p2 = Pt2(prim->v[2].p.x, prim->v[2].p.y, 1); /* find the triangle's bbox and clip it against our wr */ bbox.min.x = min(min(t.p0.x, t.p1.x), t.p2.x); bbox.min.y = min(min(t.p0.y, t.p1.y), t.p2.y); bbox.max.x = max(max(t.p0.x, t.p1.x), t.p2.x)+1; bbox.max.y = max(max(t.p0.y, t.p1.y), t.p2.y)+1; - bbox.min.x = max(bbox.min.x, task->wr.min.x); - bbox.min.y = max(bbox.min.y, task->wr.min.y); - bbox.max.x = min(bbox.max.x, task->wr.max.x); - bbox.max.y = min(bbox.max.y, task->wr.max.y); + rectclip(&bbox, task->wr); for(p.y = bbox.min.y; p.y < bbox.max.y; p.y++) for(p.x = bbox.min.x; p.x < bbox.max.x; p.x++){ @@ -321,7 +319,7 @@ discard: if(bc.x < 0 || bc.y < 0 || bc.z < 0) continue; - z = fberp(prim.v[0].p.z, prim.v[1].p.z, prim.v[2].p.z, bc); + z = fberp(prim->v[0].p.z, prim->v[1].p.z, prim->v[2].p.z, bc); if(params->camera->enabledepth){ if(z <= getdepth(zr, p)) continue; @@ -329,14 +327,14 @@ discard: } /* interpolate z⁻¹ and get actual z */ - pcz = fberp(prim.v[0].p.w, prim.v[1].p.w, prim.v[2].p.w, bc); + pcz = fberp(prim->v[0].p.w, prim->v[1].p.w, prim->v[2].p.w, bc); pcz = 1.0/(pcz < 1e-5? 1e-5: pcz); /* perspective-correct attribute interpolation */ - bc = modulapt3(bc, Vec3(prim.v[0].p.w*pcz, - prim.v[1].p.w*pcz, - prim.v[2].p.w*pcz)); - berpvertex(fsp.v, &prim.v[0], &prim.v[1], &prim.v[2], bc); + bc = modulapt3(bc, Vec3(prim->v[0].p.w*pcz, + prim->v[1].p.w*pcz, + prim->v[2].p.w*pcz)); + berpvertex(fsp.v, &prim->v[0], &prim->v[1], &prim->v[2], bc); fsp.p = p; c = params->fshader(&fsp); @@ -344,8 +342,8 @@ discard: pushtoAbuf(params->fb, p, c, z); else pixel(cr, p, c, params->camera->enableblend); - delvattrs(fsp.v); } + delvattrs(fsp.v); break; default: sysfatal("alien primitive detected"); } @@ -357,6 +355,7 @@ rasterizer(void *arg) Rasterparam *rp; Rastertask *task; SUparams *params; + Renderjob *job; uvlong t0; int i; @@ -368,22 +367,23 @@ rasterizer(void *arg) t0 = nanosec(); params = task->params; + job = params->job; + if(job->times.Rn[rp->id].t0 == 0) + job->times.Rn[rp->id].t0 = t0; + /* end of job */ if(params->entity == nil){ - if(decref(params->job) < 1){ - if(params->job->camera->enableAbuff) - squashAbuf(params->job->fb, params->job->camera->enableblend); - params->job->times.Rn.t1 = nanosec(); - nbsend(params->job->donec, nil); + if(decref(job) < 1){ + if(job->camera->enableAbuff) + squashAbuf(job->fb, job->camera->enableblend); + nbsend(job->donec, nil); free(params); } + job->times.Rn[rp->id].t1 = nanosec(); free(task); continue; } - if(params->job->times.Rn.t0 == 0) - params->job->times.Rn.t0 = t0; - rasterize(task); for(i = 0; i < task->p.type+1; i++) @@ -423,11 +423,12 @@ tiler(void *arg) while((params = recvp(tp->paramsc)) != nil){ t0 = nanosec(); - if(params->job->times.Tn.t0 == 0) - params->job->times.Tn.t0 = t0; + if(params->job->times.Tn[tp->id].t0 == 0) + params->job->times.Tn[tp->id].t0 = t0; /* end of job */ if(params->entity == nil){ + params->job->times.Tn[tp->id].t1 = nanosec(); if(decref(params->job) < 1){ params->job->ref = nproc; for(i = 0; i < nproc; i++){ @@ -436,7 +437,6 @@ tiler(void *arg) task->params = params; sendp(taskchans[i], task); } - params->job->times.Tn.t1 = nanosec(); } continue; } @@ -476,7 +476,7 @@ tiler(void *arg) bbox.max.y = p->v[0].p.y+1; for(i = 0; i < nproc; i++) - if(rectXrect(bbox,wr[i])){ + if(rectXrect(bbox, wr[i])){ newparams = emalloc(sizeof *newparams); *newparams = *params; task = emalloc(sizeof *task); @@ -485,6 +485,7 @@ tiler(void *arg) task->p = *p; task->p.v[0] = dupvertex(&p->v[0]); sendp(taskchans[i], task); + break; } delvattrs(&p->v[0]); break; @@ -518,7 +519,7 @@ tiler(void *arg) bbox.max.y = max(p->v[0].p.y, p->v[1].p.y)+1; for(i = 0; i < nproc; i++) - if(rectXrect(bbox,wr[i])){ + if(rectXrect(bbox, wr[i])){ newparams = emalloc(sizeof *newparams); *newparams = *params; task = emalloc(sizeof *task); @@ -570,7 +571,7 @@ tiler(void *arg) bbox.max.y = max(max(p->v[0].p.y, p->v[1].p.y), p->v[2].p.y)+1; for(i = 0; i < nproc; i++) - if(rectXrect(bbox,wr[i])){ + if(rectXrect(bbox, wr[i])){ newparams = emalloc(sizeof *newparams); *newparams = *params; task = emalloc(sizeof *task); @@ -598,25 +599,24 @@ skiptri: static void entityproc(void *arg) { + Entityparam *ep; Channel *paramsin, **paramsout, **taskchans; Tilerparam *tp; Rasterparam *rp; SUparams *params, *newparams; Primitive *eb, *ee; - char *nprocs; ulong stride, nprims, nproc, nworkers; int i; uvlong t0; threadsetname("entityproc"); - paramsin = arg; - nprocs = getenv("NPROC"); - if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2) - nproc = 1; - else + ep = arg; + paramsin = ep->paramsc; + + nproc = ep->rctl->nprocs; + if(nproc > 2) nproc /= 2; - free(nprocs); paramsout = emalloc(nproc*sizeof(*paramsout)); taskchans = emalloc(nproc*sizeof(*taskchans)); @@ -641,6 +641,14 @@ entityproc(void *arg) if(params->job->times.E.t0 == 0) params->job->times.E.t0 = t0; + /* prof: initialize timing slots for the next stages */ +// if(params->job->times.Tn == nil){ +// params->job->times.Tn = emalloc(nproc*sizeof(Rendertime)); +// params->job->times.Rn = emalloc(nproc*sizeof(Rendertime)); +// memset(params->job->times.Tn, 0, nproc*sizeof(Rendertime)); +// memset(params->job->times.Rn, 0, nproc*sizeof(Rendertime)); +// } + /* end of job */ if(params->entity == nil){ params->job->ref = nproc; @@ -676,23 +684,25 @@ entityproc(void *arg) static void renderer(void *arg) { - Channel *jobc; + Renderer *rctl; Renderjob *job; Scene *sc; Entity *ent; SUparams *params; - Channel *paramsc; + Entityparam *ep; uvlong time, lastid; threadsetname("renderer"); - jobc = arg; + rctl = arg; lastid = 0; - paramsc = chancreate(sizeof(SUparams*), 8); - proccreate(entityproc, paramsc, mainstacksize); + ep = emalloc(sizeof *ep); + ep->rctl = rctl; + ep->paramsc = chancreate(sizeof(SUparams*), 8); + proccreate(entityproc, ep, mainstacksize); - while((job = recvp(jobc)) != nil){ + while((job = recvp(rctl->jobq)) != nil){ time = nanosec(); job->times.R.t0 = time; job->id = lastid++; @@ -718,13 +728,13 @@ renderer(void *arg) params->uni_time = time; params->vshader = job->shaders->vshader; params->fshader = job->shaders->fshader; - sendp(paramsc, params); + sendp(ep->paramsc, params); } /* mark end of job */ params = emalloc(sizeof *params); memset(params, 0, sizeof *params); params->job = job; - sendp(paramsc, params); + sendp(ep->paramsc, params); job->times.R.t1 = nanosec(); } @@ -734,9 +744,17 @@ Renderer * initgraphics(void) { Renderer *r; + char *nprocs; + ulong nproc; + + nprocs = getenv("NPROC"); + if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2) + nproc = 1; + free(nprocs); r = emalloc(sizeof *r); - r->c = chancreate(sizeof(Renderjob*), 8); - proccreate(renderer, r->c, mainstacksize); + r->jobq = chancreate(sizeof(Renderjob*), 8); + r->nprocs = nproc; + proccreate(renderer, r, mainstacksize); return r; } diff --git a/vertex.c b/vertex.c index 5701c23..98f2ec5 100644 --- a/vertex.c +++ b/vertex.c @@ -59,8 +59,6 @@ lerpvertex(Vertex *v, Vertex *v0, Vertex *v1, double t) v->c = lerp3(v0->c, v1->c, t); v->uv = lerp2(v0->uv, v1->uv, t); v->mtl = v0->mtl != nil? v0->mtl: v1->mtl; - v->attrs = nil; - v->nattrs = 0; for(i = 0; i < v0->nattrs; i++){ va.id = v0->attrs[i].id; va.type = v0->attrs[i].type; @@ -86,8 +84,6 @@ berpvertex(Vertex *v, Vertex *v0, Vertex *v1, Vertex *v2, Point3 bc) v->c = berp3(v0->c, v1->c, v2->c, bc); v->uv = berp2(v0->uv, v1->uv, v2->uv, bc); v->mtl = v0->mtl != nil? v0->mtl: v1->mtl != nil? v1->mtl: v2->mtl; - v->attrs = nil; - v->nattrs = 0; for(i = 0; i < v0->nattrs; i++){ va.id = v0->attrs[i].id; va.type = v0->attrs[i].type; -- cgit v1.2.3