From de71feccbfbc3f5e5f99c79e65f2a019e5ddcaa6 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 24 Feb 2021 19:34:05 -0500 Subject: [PATCH] nv50: pass in third axis via user param This is probably not the most efficient way to go for all geometries, but the assumption is that kernels tend to be x/y-heavy rather than z-heavy. Iterates over each z slice and passes in the current value via user param. (And bump all user params by a dword.) Signed-off-by: Ilia Mirkin Reviewed-by: Pierre Moreau Part-of: --- src/gallium/drivers/nouveau/nv50/nv50_compute.c | 16 +++++++++++----- src/gallium/drivers/nouveau/nv50/nv50_program.c | 2 +- .../drivers/nouveau/nv50/nv50_query_hw_sm.c | 8 ++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 017ebe7ef07..57f92ea9db1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -479,7 +479,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) unsigned size = align(nv50->compprog->parm_size, 0x4); BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); - PUSH_DATA (push, (size / 4) << 8); + PUSH_DATA (push, (1 + (size / 4)) << 8); if (size) { struct nouveau_mm_allocation *mm; @@ -498,7 +498,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) nouveau_pushbuf_space(push, 0, 0, 1); - BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4); + BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4); nouveau_pushbuf_data(push, bo, offset, size); nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); @@ -545,9 +545,15 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) BEGIN_NV04(push, NV50_CP(GRIDID), 1); PUSH_DATA (push, 1); - /* kernel launching */ - BEGIN_NV04(push, NV50_CP(LAUNCH), 1); - PUSH_DATA (push, 0); + for (int i = 0; i < info->grid[2]; i++) { + BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1); + PUSH_DATA (push, info->grid[2] | i << 16); + + /* kernel launching */ + BEGIN_NV04(push, NV50_CP(LAUNCH), 1); + PUSH_DATA (push, 0); + } + BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 8c14a6531ec..b496d4f2549 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -375,7 +375,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->gp.has_viewport = 0; if (prog->type == PIPE_SHADER_COMPUTE) - info->prop.cp.inputOffset = 0x10; + info->prop.cp.inputOffset = 0x14; info_out.driverPriv = prog; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c index 506a0c7c9cd..de3d8fd950e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -58,8 +58,8 @@ static const uint64_t nv50_read_hw_sm_counters_code[] = * mov $r2 $pm2 * mov $r3 $pm3 * mov $r4 $physid - * ld $r5 b32 s[0x10] - * ld $r6 b32 s[0x14] + * ld $r5 b32 s[0x14] + * ld $r6 b32 s[0x18] * and b32 $r4 $r4 0x000f0000 * shr u32 $r4 $r4 0x10 * mul $r4 u24 $r4 0x14 @@ -81,8 +81,8 @@ static const uint64_t nv50_read_hw_sm_counters_code[] = 0x6001878000000009ULL, 0x6001c7800000000dULL, 0x6000078000000011ULL, - 0x4400c78010000815ULL, - 0x4400c78010000a19ULL, + 0x4400c78010000a15ULL, + 0x4400c78010000c19ULL, 0x0000f003d0000811ULL, 0xe410078030100811ULL, 0x0000000340540811ULL,