From a8df5cfa3a66a647b5fe78ae962ec060003dc799 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 7 May 2023 03:16:31 +0200 Subject: [PATCH] gallium: change PIPE_COMPUTE_CAP_SUBGROUP_SIZE to a bitfield of sizes This will be required for `cl_intel_required_subgroup_size`, but it already helps implementing OpenCL subgroups as this allows us to check with every subgroup size when implementing `CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT`. Signed-off-by: Karol Herbst Reviewed-by: Alyssa Rosenzweig Part-of: --- docs/gallium/screen.rst | 5 +++-- src/gallium/drivers/asahi/agx_pipe.c | 2 +- src/gallium/drivers/crocus/crocus_screen.c | 2 +- src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- src/gallium/drivers/iris/iris_screen.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/panfrost/pan_screen.c | 2 +- src/gallium/drivers/r600/r600_pipe_common.c | 2 +- src/gallium/drivers/radeonsi/si_get.c | 9 +++++++-- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/v3d/v3d_screen.c | 2 +- src/gallium/drivers/zink/zink_screen.c | 2 +- src/gallium/frontends/clover/core/device.cpp | 7 +++++-- src/gallium/frontends/rusticl/core/device.rs | 2 +- src/gallium/include/pipe/p_defines.h | 2 +- 17 files changed, 30 insertions(+), 21 deletions(-) diff --git a/docs/gallium/screen.rst b/docs/gallium/screen.rst index 7e106531917..728e0e95b4c 100644 --- a/docs/gallium/screen.rst +++ b/docs/gallium/screen.rst @@ -804,8 +804,9 @@ pipe_screen::get_compute_param. are supported. * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported non-zero means yes, zero means no. Value type: ``uint32_t`` -* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in - threads. Also known as wavefront size, warp size or SIMD width. +* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZES``: Ored power of two sizes of a basic execution + unit in threads. Also known as wavefront size, warp size or SIMD width. + E.g. `64 | 32`. * ``PIPE_COMPUTE_CAP_ADDRESS_BITS``: The default compute device address space size specified as an unsigned integer value in bits. * ``PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK``: Maximum variable number diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 587d07208fc..7b7054e40d8 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1855,7 +1855,7 @@ agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t[]){1}); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t[]){32}); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c index a40914dd721..2082f3281fd 100644 --- a/src/gallium/drivers/crocus/crocus_screen.c +++ b/src/gallium/drivers/crocus/crocus_screen.c @@ -589,7 +589,7 @@ crocus_get_compute_param(struct pipe_screen *pscreen, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t []) { 1 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t []) { BRW_SUBGROUP_SIZE }); case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 78bc3d4b027..d6ae872d617 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -843,7 +843,7 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t[]){1}); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t[]){32}); // TODO case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index c418c4dba94..b3d16a23419 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -617,8 +617,8 @@ iris_get_compute_param(struct pipe_screen *pscreen, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t []) { 1 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - RET((uint32_t []) { BRW_SUBGROUP_SIZE }); + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: + RET((uint32_t []) { 32 | 16 | 8 }); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: RET((uint32_t []) { devinfo->max_cs_workgroup_threads }); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index f4d356be1dd..3b7f579a9b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -531,7 +531,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen, return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: return 0; - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: if (ret) { uint32_t *subgroup_size = ret; *subgroup_size = lp_native_vector_width / 32; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 38ef520bb2d..7a1e12a9cd3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -449,7 +449,7 @@ nv50_screen_get_compute_param(struct pipe_screen *pscreen, RET((uint64_t []) { 16 << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ RET((uint64_t []) { 4096 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t []) { 32 }); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: RET((uint32_t []) { 0 }); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7bd9928f637..a9dd435bc97 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -556,7 +556,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, RET((uint64_t []) { 512 << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ RET((uint64_t []) { 4096 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t []) { 32 }); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: RET((uint32_t []) { 0 }); diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 01d32f21f1a..18e5737caf9 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -751,7 +751,7 @@ panfrost_get_compute_param(struct pipe_screen *pscreen, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t[]){1}); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t[]){pan_subgroup_size(dev->arch)}); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 01f9d7fbec2..eddb76a49da 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1035,7 +1035,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: break; /* unused */ - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: if (ret) { uint32_t *subgroup_size = ret; *subgroup_size = r600_wavefront_size(rscreen->family); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 643fd8d06a7..b0f16c0e0e9 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1117,10 +1117,15 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir } return sizeof(uint32_t); } - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: if (ret) { uint32_t *subgroup_size = ret; - *subgroup_size = si_determine_wave_size(sscreen, NULL); + if (sscreen->debug_flags & DBG(W32_CS)) + *subgroup_size = 32; + else if (sscreen->debug_flags & DBG(W64_CS)) + *subgroup_size = 64; + else + *subgroup_size = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 77bd2e0498a..42f53b314c2 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -556,7 +556,7 @@ softpipe_get_compute_param(struct pipe_screen *_screen, case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: case PIPE_COMPUTE_CAP_ADDRESS_BITS: case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 57111eb5f66..98ca9bb69e6 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -545,7 +545,7 @@ v3d_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t []) { 1 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t []) { 16 }); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index a67dcd9c5d1..7f38a370157 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -437,7 +437,7 @@ zink_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: RET((uint32_t []) { 1 }); - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + case PIPE_COMPUTE_CAP_SUBGROUP_SIZES: RET((uint32_t []) { screen->info.props11.subgroupSize }); case PIPE_COMPUTE_CAP_MAX_SUBGROUPS: diff --git a/src/gallium/frontends/clover/core/device.cpp b/src/gallium/frontends/clover/core/device.cpp index f1880827e54..59dcaabef9f 100644 --- a/src/gallium/frontends/clover/core/device.cpp +++ b/src/gallium/frontends/clover/core/device.cpp @@ -432,8 +432,11 @@ device::max_block_size() const { cl_uint device::subgroup_size() const { - return get_compute_param(pipe, ir_format(), - PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0]; + cl_uint subgroup_sizes = + get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_SUBGROUP_SIZES)[0]; + if (!subgroup_sizes) + return 0; + return 1 << (util_last_bit(subgroup_sizes) - 1); } cl_uint diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 530512e3e0b..97a5a59f96e 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -849,7 +849,7 @@ impl Device { pub fn subgroup_sizes(&self) -> Vec { let subgroup_size = ComputeParam::::compute_param( self.screen.as_ref(), - pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZE, + pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES, ); SetBitIndices::from_msb(subgroup_size) diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 6b5cfc10f79..44979ac382e 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -1141,7 +1141,7 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS, PIPE_COMPUTE_CAP_MAX_SUBGROUPS, PIPE_COMPUTE_CAP_IMAGES_SUPPORTED, - PIPE_COMPUTE_CAP_SUBGROUP_SIZE, + PIPE_COMPUTE_CAP_SUBGROUP_SIZES, PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK, };