diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 5c1b27f9beb..b3ceaf79971 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -628,4 +628,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) /* bind a compute shader clobbers fragment shader state */ nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; + + nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] * + info->grid[0] * info->grid[1] * info->grid[2]; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index afd04d99ba6..af8a290db71 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -225,6 +225,8 @@ struct nv50_context { uint16_t images_valid; struct util_dynarray global_residents; + + uint64_t compute_invocations; }; static inline struct nv50_context * diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index d91f46cf92b..50000ffff4c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -174,14 +174,15 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) nv50_hw_query_get(push, q, 0x30, 0x06805002); break; case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ - nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ - nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ - nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ - nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ - nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ - nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ + nv50_hw_query_get(push, q, 0x90, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0xa0, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0xb0, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xc0, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xd0, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xe0, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0xf0, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x100, 0x0980a002); /* ROP, PIXELS */ + ((uint64_t *)hq->data)[2 * 0x11] = nv50->compute_invocations; break; case PIPE_QUERY_TIME_ELAPSED: nv50_hw_query_get(push, q, 0x10, 0x00005002); @@ -237,6 +238,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + ((uint64_t *)hq->data)[2 * 0x8] = nv50->compute_invocations; break; case PIPE_QUERY_TIMESTAMP: hq->sequence++; @@ -316,7 +318,8 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, break; case PIPE_QUERY_PIPELINE_STATISTICS: for (i = 0; i < 8; ++i) - res64[i] = data64[i * 2] - data64[16 + i * 2]; + res64[i] = data64[i * 2] - data64[18 + i * 2]; + result->pipeline_statistics.cs_invocations = data64[i * 2] - data64[18 + i * 2]; break; case PIPE_QUERY_TIMESTAMP: res64[0] = data64[1]; @@ -351,6 +354,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) { struct nv50_hw_query *hq; struct nv50_query *q; + unsigned space = NV50_HW_QUERY_ALLOC_SPACE; hq = nv50_hw_sm_create_query(nv50, type); if (hq) { @@ -380,15 +384,25 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: + hq->is64bit = true; + space = 32; + break; case PIPE_QUERY_SO_STATISTICS: + hq->is64bit = true; + space = 64; + break; case PIPE_QUERY_PIPELINE_STATISTICS: hq->is64bit = true; + space = 9 * 2 * 16; /* 9 values, start/end, 16-bytes each */ break; case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_GPU_FINISHED: + space = 32; + break; case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + space = 16; break; default: debug_printf("invalid query type: %u\n", type); @@ -396,7 +410,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) return NULL; } - if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + if (!nv50_hw_query_allocate(nv50, q, space)) { FREE(hq); return NULL; }