v3dv: implement vkCmdDispatchBase
This was added with VK_KHR_device_group and allows users to specify a base offset that will be automatically added to gl_WorkGroupID. Unfortunately, V3D doesn't support this natively, so we need to add the base to the workgroup id generated by hardware manually. For this, we inject add instructions that source from a QUNIFORM that will retrieve the actual dispatch base from the compute job when it is dispatched. Since a compute shader can be dispatched with CmdDispatch and/or CmdDispatchBase, we always need to add these additional add instructions and use a base of (0,0,0) for regular dispatches. Since we don't support any version of OpenGL with this dispatch base functionality we can avoid the extra instructions there. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11037>
This commit is contained in:

committed by
Marge Bot

parent
816be7d46f
commit
f07c797e93
@@ -3003,17 +3003,31 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_AND(c, c->cs_payload[0],
|
||||
vir_uniform_ui(c, 0xffff)));
|
||||
ntq_store_dest(c, &instr->dest, 1,
|
||||
vir_SHR(c, c->cs_payload[0],
|
||||
vir_uniform_ui(c, 16)));
|
||||
ntq_store_dest(c, &instr->dest, 2,
|
||||
vir_AND(c, c->cs_payload[1],
|
||||
vir_uniform_ui(c, 0xffff)));
|
||||
case nir_intrinsic_load_work_group_id: {
|
||||
struct qreg x = vir_AND(c, c->cs_payload[0],
|
||||
vir_uniform_ui(c, 0xffff));
|
||||
|
||||
struct qreg y = vir_SHR(c, c->cs_payload[0],
|
||||
vir_uniform_ui(c, 16));
|
||||
|
||||
struct qreg z = vir_AND(c, c->cs_payload[1],
|
||||
vir_uniform_ui(c, 0xffff));
|
||||
|
||||
/* We only support dispatch base in Vulkan */
|
||||
if (c->key->environment == V3D_ENVIRONMENT_VULKAN) {
|
||||
x = vir_ADD(c, x,
|
||||
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 0));
|
||||
y = vir_ADD(c, y,
|
||||
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 1));
|
||||
z = vir_ADD(c, z,
|
||||
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 2));
|
||||
}
|
||||
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, x));
|
||||
ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, y));
|
||||
ntq_store_dest(c, &instr->dest, 2, vir_MOV(c, z));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_subgroup_id:
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
|
||||
|
@@ -299,6 +299,11 @@ enum quniform_contents {
|
||||
*/
|
||||
QUNIFORM_NUM_WORK_GROUPS,
|
||||
|
||||
/* Base workgroup offset passed to vkCmdDispatchBase in the dimension
|
||||
* selected by the data value.
|
||||
*/
|
||||
QUNIFORM_WORK_GROUP_BASE,
|
||||
|
||||
/**
|
||||
* Returns the the offset of the scratch buffer for register spilling.
|
||||
*/
|
||||
|
@@ -5270,6 +5270,9 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job(
|
||||
|
||||
static struct v3dv_job *
|
||||
cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
uint32_t base_offset_x,
|
||||
uint32_t base_offset_y,
|
||||
uint32_t base_offset_z,
|
||||
uint32_t group_count_x,
|
||||
uint32_t group_count_y,
|
||||
uint32_t group_count_z,
|
||||
@@ -5298,6 +5301,10 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
job->csd.wg_count[1] = group_count_y;
|
||||
job->csd.wg_count[2] = group_count_z;
|
||||
|
||||
job->csd.wg_base[0] = base_offset_x;
|
||||
job->csd.wg_base[1] = base_offset_y;
|
||||
job->csd.wg_base[2] = base_offset_z;
|
||||
|
||||
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
@@ -5367,6 +5374,9 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
|
||||
static void
|
||||
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
uint32_t base_offset_x,
|
||||
uint32_t base_offset_y,
|
||||
uint32_t base_offset_z,
|
||||
uint32_t group_count_x,
|
||||
uint32_t group_count_y,
|
||||
uint32_t group_count_z)
|
||||
@@ -5376,6 +5386,9 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
|
||||
struct v3dv_job *job =
|
||||
cmd_buffer_create_csd_job(cmd_buffer,
|
||||
base_offset_x,
|
||||
base_offset_y,
|
||||
base_offset_z,
|
||||
group_count_x,
|
||||
group_count_y,
|
||||
group_count_z,
|
||||
@@ -5394,9 +5407,27 @@ v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
cmd_buffer_emit_pre_dispatch(cmd_buffer);
|
||||
cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ);
|
||||
cmd_buffer_dispatch(cmd_buffer, 0, 0, 0,
|
||||
groupCountX, groupCountY, groupCountZ);
|
||||
}
|
||||
|
||||
void v3dv_CmdDispatchBase(VkCommandBuffer commandBuffer,
|
||||
uint32_t baseGroupX,
|
||||
uint32_t baseGroupY,
|
||||
uint32_t baseGroupZ,
|
||||
uint32_t groupCountX,
|
||||
uint32_t groupCountY,
|
||||
uint32_t groupCountZ)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
cmd_buffer_emit_pre_dispatch(cmd_buffer);
|
||||
cmd_buffer_dispatch(cmd_buffer,
|
||||
baseGroupX, baseGroupY, baseGroupZ,
|
||||
groupCountX, groupCountY, groupCountZ);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_buffer *buffer,
|
||||
@@ -5421,6 +5452,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
*/
|
||||
struct v3dv_job *csd_job =
|
||||
cmd_buffer_create_csd_job(cmd_buffer,
|
||||
0, 0, 0,
|
||||
1, 1, 1,
|
||||
&job->cpu.csd_indirect.wg_uniform_offsets[0],
|
||||
&job->cpu.csd_indirect.wg_size);
|
||||
|
@@ -1004,6 +1004,7 @@ struct v3dv_job {
|
||||
struct {
|
||||
struct v3dv_bo *shared_memory;
|
||||
uint32_t wg_count[3];
|
||||
uint32_t wg_base[3];
|
||||
struct drm_v3d_submit_csd submit;
|
||||
} csd;
|
||||
};
|
||||
|
@@ -486,6 +486,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_WORK_GROUP_BASE:
|
||||
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||
cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_SHARED_OFFSET:
|
||||
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||
assert(job->csd.shared_memory);
|
||||
|
Reference in New Issue
Block a user