v3d: Add support for CS workgroup/invocation id intrinsics.
We get a payload for the ivec3 workgroup and an int local invocation index, and we use the core lowering to turn into the global invocation id and the local invocation id ivec3s.
This commit is contained in:
@@ -1899,6 +1899,32 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
*/
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
for (int i = 0; i < 3; i++) {
|
||||
ntq_store_dest(c, &instr->dest, i,
|
||||
vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS,
|
||||
i));
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_local_invocation_index:
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_SHR(c, c->cs_payload[1],
|
||||
vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_AND(c, c->cs_payload[0],
|
||||
vir_uniform_ui(c, 0xffff)));
|
||||
ntq_store_dest(c, &instr->dest, 1,
|
||||
vir_SHR(c, c->cs_payload[0],
|
||||
vir_uniform_ui(c, 16)));
|
||||
ntq_store_dest(c, &instr->dest, 2,
|
||||
vir_AND(c, c->cs_payload[1],
|
||||
vir_uniform_ui(c, 0xffff)));
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Unknown intrinsic: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
@@ -2255,7 +2281,8 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
|
||||
static void
|
||||
nir_to_vir(struct v3d_compile *c)
|
||||
{
|
||||
if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
switch (c->s->info.stage) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
|
||||
c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
@@ -2270,6 +2297,30 @@ nir_to_vir(struct v3d_compile *c)
|
||||
} else if (c->fs_key->is_lines) {
|
||||
c->line_x = emit_fragment_varying(c, NULL, 0, 0);
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
if (c->s->info.system_values_read &
|
||||
((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
|
||||
(1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
|
||||
c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
}
|
||||
if (c->s->info.system_values_read &
|
||||
((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
|
||||
c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
}
|
||||
|
||||
/* Set up the division between gl_LocalInvocationIndex and
|
||||
* wg_in_mem in the payload reg.
|
||||
*/
|
||||
int wg_size = (c->s->info.cs.local_size[0] *
|
||||
c->s->info.cs.local_size[1] *
|
||||
c->s->info.cs.local_size[2]);
|
||||
c->local_invocation_index_bits =
|
||||
ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
|
||||
assert(c->local_invocation_index_bits <= 8);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (c->s->info.stage == MESA_SHADER_FRAGMENT)
|
||||
@@ -2298,6 +2349,7 @@ const nir_shader_compiler_options v3d_nir_options = {
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
.lower_cs_local_id_from_index = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
|
@@ -259,6 +259,11 @@ enum quniform_contents {
|
||||
|
||||
QUNIFORM_ALPHA_REF,
|
||||
|
||||
/* Number of workgroups passed to glDispatchCompute in the dimension
|
||||
* selected by the data value.
|
||||
*/
|
||||
QUNIFORM_NUM_WORK_GROUPS,
|
||||
|
||||
/**
|
||||
* Returns the the offset of the scratch buffer for register spilling.
|
||||
*/
|
||||
@@ -540,6 +545,9 @@ struct v3d_compile {
|
||||
/* Fragment shader payload regs. */
|
||||
struct qreg payload_w, payload_w_centroid, payload_z;
|
||||
|
||||
struct qreg cs_payload[2];
|
||||
int local_invocation_index_bits;
|
||||
|
||||
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
|
||||
uint32_t num_vpm_writes;
|
||||
|
||||
|
@@ -639,6 +639,7 @@ v3d_lower_nir(struct v3d_compile *c)
|
||||
}
|
||||
|
||||
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
|
||||
NIR_PASS_V(c->s, nir_lower_system_values);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -108,6 +108,10 @@ vir_dump_uniform(enum quniform_contents contents,
|
||||
fprintf(stderr, "ssbo_size[%d]", data);
|
||||
break;
|
||||
|
||||
case QUNIFORM_NUM_WORK_GROUPS:
|
||||
fprintf(stderr, "num_wg.%c", data < 3 ? "xyz"[data] : '?');
|
||||
break;
|
||||
|
||||
default:
|
||||
if (quniform_contents_is_texture_p0(contents)) {
|
||||
fprintf(stderr, "tex[%d].p0: 0x%08x",
|
||||
|
@@ -482,6 +482,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
/* Payload setup instructions: Force allocate
|
||||
* the dst to the given register (so the MOV
|
||||
* will disappear).
|
||||
|
Reference in New Issue
Block a user