v3dv: implement vkCmdDispatchBase

This was added with VK_KHR_device_group and allows users to specify
a base offset that will be automatically added to gl_WorkGroupID.

Unfortunately, V3D doesn't support this natively, so we need to add
the base to the workgroup id generated by hardware manually. For this,
we inject add instructions that source from a QUNIFORM that will
retrieve the actual dispatch base from the compute job when it is
dispatched.

Since a compute shader can be dispatched with CmdDispatch and/or
CmdDispatchBase, we always need to add these additional add
instructions and use a base of (0,0,0) for regular dispatches.
Since we don't support any version of OpenGL with this dispatch
base functionality we can avoid the extra instructions there.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11037>
This commit is contained in:
Iago Toral Quiroga
2021-05-27 09:06:00 +02:00
committed by Marge Bot
parent 816be7d46f
commit f07c797e93
5 changed files with 68 additions and 11 deletions

View File

@@ -3003,17 +3003,31 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
break;
case nir_intrinsic_load_work_group_id:
ntq_store_dest(c, &instr->dest, 0,
vir_AND(c, c->cs_payload[0],
vir_uniform_ui(c, 0xffff)));
ntq_store_dest(c, &instr->dest, 1,
vir_SHR(c, c->cs_payload[0],
vir_uniform_ui(c, 16)));
ntq_store_dest(c, &instr->dest, 2,
vir_AND(c, c->cs_payload[1],
vir_uniform_ui(c, 0xffff)));
case nir_intrinsic_load_work_group_id: {
struct qreg x = vir_AND(c, c->cs_payload[0],
vir_uniform_ui(c, 0xffff));
struct qreg y = vir_SHR(c, c->cs_payload[0],
vir_uniform_ui(c, 16));
struct qreg z = vir_AND(c, c->cs_payload[1],
vir_uniform_ui(c, 0xffff));
/* We only support dispatch base in Vulkan */
if (c->key->environment == V3D_ENVIRONMENT_VULKAN) {
x = vir_ADD(c, x,
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 0));
y = vir_ADD(c, y,
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 1));
z = vir_ADD(c, z,
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 2));
}
ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, x));
ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, y));
ntq_store_dest(c, &instr->dest, 2, vir_MOV(c, z));
break;
}
case nir_intrinsic_load_subgroup_id:
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));

View File

@@ -299,6 +299,11 @@ enum quniform_contents {
*/
QUNIFORM_NUM_WORK_GROUPS,
/* Base workgroup offset passed to vkCmdDispatchBase in the dimension
* selected by the data value.
*/
QUNIFORM_WORK_GROUP_BASE,
/**
* Returns the the offset of the scratch buffer for register spilling.
*/

View File

@@ -5270,6 +5270,9 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job(
static struct v3dv_job *
cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t base_offset_x,
uint32_t base_offset_y,
uint32_t base_offset_z,
uint32_t group_count_x,
uint32_t group_count_y,
uint32_t group_count_z,
@@ -5298,6 +5301,10 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
job->csd.wg_count[1] = group_count_y;
job->csd.wg_count[2] = group_count_z;
job->csd.wg_base[0] = base_offset_x;
job->csd.wg_base[1] = base_offset_y;
job->csd.wg_base[2] = base_offset_z;
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
@@ -5367,6 +5374,9 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
static void
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t base_offset_x,
uint32_t base_offset_y,
uint32_t base_offset_z,
uint32_t group_count_x,
uint32_t group_count_y,
uint32_t group_count_z)
@@ -5376,6 +5386,9 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_job *job =
cmd_buffer_create_csd_job(cmd_buffer,
base_offset_x,
base_offset_y,
base_offset_z,
group_count_x,
group_count_y,
group_count_z,
@@ -5394,9 +5407,27 @@ v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_emit_pre_dispatch(cmd_buffer);
cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ);
cmd_buffer_dispatch(cmd_buffer, 0, 0, 0,
groupCountX, groupCountY, groupCountZ);
}
void v3dv_CmdDispatchBase(VkCommandBuffer commandBuffer,
uint32_t baseGroupX,
uint32_t baseGroupY,
uint32_t baseGroupZ,
uint32_t groupCountX,
uint32_t groupCountY,
uint32_t groupCountZ)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_emit_pre_dispatch(cmd_buffer);
cmd_buffer_dispatch(cmd_buffer,
baseGroupX, baseGroupY, baseGroupZ,
groupCountX, groupCountY, groupCountZ);
}
static void
cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
@@ -5421,6 +5452,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
*/
struct v3dv_job *csd_job =
cmd_buffer_create_csd_job(cmd_buffer,
0, 0, 0,
1, 1, 1,
&job->cpu.csd_indirect.wg_uniform_offsets[0],
&job->cpu.csd_indirect.wg_size);

View File

@@ -1004,6 +1004,7 @@ struct v3dv_job {
struct {
struct v3dv_bo *shared_memory;
uint32_t wg_count[3];
uint32_t wg_base[3];
struct drm_v3d_submit_csd submit;
} csd;
};

View File

@@ -486,6 +486,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
break;
case QUNIFORM_WORK_GROUP_BASE:
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
break;
case QUNIFORM_SHARED_OFFSET:
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
assert(job->csd.shared_memory);