v3dv: implement vkCmdDispatchBase
This was added with VK_KHR_device_group and allows users to specify a base offset that will be automatically added to gl_WorkGroupID. Unfortunately, V3D doesn't support this natively, so we need to add the base to the workgroup id generated by hardware manually. For this, we inject add instructions that source from a QUNIFORM that will retrieve the actual dispatch base from the compute job when it is dispatched. Since a compute shader can be dispatched with CmdDispatch and/or CmdDispatchBase, we always need to add these additional add instructions and use a base of (0,0,0) for regular dispatches. Since we don't support any version of OpenGL with this dispatch base functionality we can avoid the extra instructions there. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11037>
This commit is contained in:

committed by
Marge Bot

parent
816be7d46f
commit
f07c797e93
@@ -3003,17 +3003,31 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||||||
vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
|
vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_work_group_id:
|
case nir_intrinsic_load_work_group_id: {
|
||||||
ntq_store_dest(c, &instr->dest, 0,
|
struct qreg x = vir_AND(c, c->cs_payload[0],
|
||||||
vir_AND(c, c->cs_payload[0],
|
vir_uniform_ui(c, 0xffff));
|
||||||
vir_uniform_ui(c, 0xffff)));
|
|
||||||
ntq_store_dest(c, &instr->dest, 1,
|
struct qreg y = vir_SHR(c, c->cs_payload[0],
|
||||||
vir_SHR(c, c->cs_payload[0],
|
vir_uniform_ui(c, 16));
|
||||||
vir_uniform_ui(c, 16)));
|
|
||||||
ntq_store_dest(c, &instr->dest, 2,
|
struct qreg z = vir_AND(c, c->cs_payload[1],
|
||||||
vir_AND(c, c->cs_payload[1],
|
vir_uniform_ui(c, 0xffff));
|
||||||
vir_uniform_ui(c, 0xffff)));
|
|
||||||
|
/* We only support dispatch base in Vulkan */
|
||||||
|
if (c->key->environment == V3D_ENVIRONMENT_VULKAN) {
|
||||||
|
x = vir_ADD(c, x,
|
||||||
|
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 0));
|
||||||
|
y = vir_ADD(c, y,
|
||||||
|
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 1));
|
||||||
|
z = vir_ADD(c, z,
|
||||||
|
vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, x));
|
||||||
|
ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, y));
|
||||||
|
ntq_store_dest(c, &instr->dest, 2, vir_MOV(c, z));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_subgroup_id:
|
case nir_intrinsic_load_subgroup_id:
|
||||||
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
|
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
|
||||||
|
@@ -299,6 +299,11 @@ enum quniform_contents {
|
|||||||
*/
|
*/
|
||||||
QUNIFORM_NUM_WORK_GROUPS,
|
QUNIFORM_NUM_WORK_GROUPS,
|
||||||
|
|
||||||
|
/* Base workgroup offset passed to vkCmdDispatchBase in the dimension
|
||||||
|
* selected by the data value.
|
||||||
|
*/
|
||||||
|
QUNIFORM_WORK_GROUP_BASE,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the the offset of the scratch buffer for register spilling.
|
* Returns the the offset of the scratch buffer for register spilling.
|
||||||
*/
|
*/
|
||||||
|
@@ -5270,6 +5270,9 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job(
|
|||||||
|
|
||||||
static struct v3dv_job *
|
static struct v3dv_job *
|
||||||
cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
||||||
|
uint32_t base_offset_x,
|
||||||
|
uint32_t base_offset_y,
|
||||||
|
uint32_t base_offset_z,
|
||||||
uint32_t group_count_x,
|
uint32_t group_count_x,
|
||||||
uint32_t group_count_y,
|
uint32_t group_count_y,
|
||||||
uint32_t group_count_z,
|
uint32_t group_count_z,
|
||||||
@@ -5298,6 +5301,10 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|||||||
job->csd.wg_count[1] = group_count_y;
|
job->csd.wg_count[1] = group_count_y;
|
||||||
job->csd.wg_count[2] = group_count_z;
|
job->csd.wg_count[2] = group_count_z;
|
||||||
|
|
||||||
|
job->csd.wg_base[0] = base_offset_x;
|
||||||
|
job->csd.wg_base[1] = base_offset_y;
|
||||||
|
job->csd.wg_base[2] = base_offset_z;
|
||||||
|
|
||||||
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||||
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||||
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||||
@@ -5367,6 +5374,9 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
|
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
|
||||||
|
uint32_t base_offset_x,
|
||||||
|
uint32_t base_offset_y,
|
||||||
|
uint32_t base_offset_z,
|
||||||
uint32_t group_count_x,
|
uint32_t group_count_x,
|
||||||
uint32_t group_count_y,
|
uint32_t group_count_y,
|
||||||
uint32_t group_count_z)
|
uint32_t group_count_z)
|
||||||
@@ -5376,6 +5386,9 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
|
|||||||
|
|
||||||
struct v3dv_job *job =
|
struct v3dv_job *job =
|
||||||
cmd_buffer_create_csd_job(cmd_buffer,
|
cmd_buffer_create_csd_job(cmd_buffer,
|
||||||
|
base_offset_x,
|
||||||
|
base_offset_y,
|
||||||
|
base_offset_z,
|
||||||
group_count_x,
|
group_count_x,
|
||||||
group_count_y,
|
group_count_y,
|
||||||
group_count_z,
|
group_count_z,
|
||||||
@@ -5394,9 +5407,27 @@ v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
|
|||||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
|
|
||||||
cmd_buffer_emit_pre_dispatch(cmd_buffer);
|
cmd_buffer_emit_pre_dispatch(cmd_buffer);
|
||||||
cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ);
|
cmd_buffer_dispatch(cmd_buffer, 0, 0, 0,
|
||||||
|
groupCountX, groupCountY, groupCountZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void v3dv_CmdDispatchBase(VkCommandBuffer commandBuffer,
|
||||||
|
uint32_t baseGroupX,
|
||||||
|
uint32_t baseGroupY,
|
||||||
|
uint32_t baseGroupZ,
|
||||||
|
uint32_t groupCountX,
|
||||||
|
uint32_t groupCountY,
|
||||||
|
uint32_t groupCountZ)
|
||||||
|
{
|
||||||
|
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
|
|
||||||
|
cmd_buffer_emit_pre_dispatch(cmd_buffer);
|
||||||
|
cmd_buffer_dispatch(cmd_buffer,
|
||||||
|
baseGroupX, baseGroupY, baseGroupZ,
|
||||||
|
groupCountX, groupCountY, groupCountZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
|
cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
|
||||||
struct v3dv_buffer *buffer,
|
struct v3dv_buffer *buffer,
|
||||||
@@ -5421,6 +5452,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
|
|||||||
*/
|
*/
|
||||||
struct v3dv_job *csd_job =
|
struct v3dv_job *csd_job =
|
||||||
cmd_buffer_create_csd_job(cmd_buffer,
|
cmd_buffer_create_csd_job(cmd_buffer,
|
||||||
|
0, 0, 0,
|
||||||
1, 1, 1,
|
1, 1, 1,
|
||||||
&job->cpu.csd_indirect.wg_uniform_offsets[0],
|
&job->cpu.csd_indirect.wg_uniform_offsets[0],
|
||||||
&job->cpu.csd_indirect.wg_size);
|
&job->cpu.csd_indirect.wg_size);
|
||||||
|
@@ -1004,6 +1004,7 @@ struct v3dv_job {
|
|||||||
struct {
|
struct {
|
||||||
struct v3dv_bo *shared_memory;
|
struct v3dv_bo *shared_memory;
|
||||||
uint32_t wg_count[3];
|
uint32_t wg_count[3];
|
||||||
|
uint32_t wg_base[3];
|
||||||
struct drm_v3d_submit_csd submit;
|
struct drm_v3d_submit_csd submit;
|
||||||
} csd;
|
} csd;
|
||||||
};
|
};
|
||||||
|
@@ -486,6 +486,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
|
|||||||
cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
|
cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case QUNIFORM_WORK_GROUP_BASE:
|
||||||
|
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||||
|
cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
|
||||||
|
break;
|
||||||
|
|
||||||
case QUNIFORM_SHARED_OFFSET:
|
case QUNIFORM_SHARED_OFFSET:
|
||||||
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
||||||
assert(job->csd.shared_memory);
|
assert(job->csd.shared_memory);
|
||||||
|
Reference in New Issue
Block a user