v3d: Add support for CS workgroup/invocation id intrinsics.

We get a payload for the ivec3 workgroup and an int local invocation
index, and we use the core lowering to turn into the global invocation id
and the local invocation id ivec3s.
This commit is contained in:
Eric Anholt
2018-12-07 15:47:18 -08:00
parent 6281f26f06
commit 01d913cf90
5 changed files with 67 additions and 1 deletions

View File

@@ -1899,6 +1899,32 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
*/
break;
case nir_intrinsic_load_num_work_groups:
for (int i = 0; i < 3; i++) {
ntq_store_dest(c, &instr->dest, i,
vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS,
i));
}
break;
case nir_intrinsic_load_local_invocation_index:
ntq_store_dest(c, &instr->dest, 0,
vir_SHR(c, c->cs_payload[1],
vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
break;
case nir_intrinsic_load_work_group_id:
ntq_store_dest(c, &instr->dest, 0,
vir_AND(c, c->cs_payload[0],
vir_uniform_ui(c, 0xffff)));
ntq_store_dest(c, &instr->dest, 1,
vir_SHR(c, c->cs_payload[0],
vir_uniform_ui(c, 16)));
ntq_store_dest(c, &instr->dest, 2,
vir_AND(c, c->cs_payload[1],
vir_uniform_ui(c, 0xffff)));
break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
@@ -2255,7 +2281,8 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
static void
nir_to_vir(struct v3d_compile *c)
{
if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
switch (c->s->info.stage) {
case MESA_SHADER_FRAGMENT:
c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
@@ -2270,6 +2297,30 @@ nir_to_vir(struct v3d_compile *c)
} else if (c->fs_key->is_lines) {
c->line_x = emit_fragment_varying(c, NULL, 0, 0);
}
break;
case MESA_SHADER_COMPUTE:
if (c->s->info.system_values_read &
((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
(1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
}
if (c->s->info.system_values_read &
((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
}
/* Set up the division between gl_LocalInvocationIndex and
* wg_in_mem in the payload reg.
*/
int wg_size = (c->s->info.cs.local_size[0] *
c->s->info.cs.local_size[1] *
c->s->info.cs.local_size[2]);
c->local_invocation_index_bits =
ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
assert(c->local_invocation_index_bits <= 8);
break;
default:
break;
}
if (c->s->info.stage == MESA_SHADER_FRAGMENT)
@@ -2298,6 +2349,7 @@ const nir_shader_compiler_options v3d_nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
.lower_cs_local_id_from_index = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,

View File

@@ -259,6 +259,11 @@ enum quniform_contents {
QUNIFORM_ALPHA_REF,
/* Number of workgroups passed to glDispatchCompute in the dimension
* selected by the data value.
*/
QUNIFORM_NUM_WORK_GROUPS,
/**
* Returns the the offset of the scratch buffer for register spilling.
*/
@@ -540,6 +545,9 @@ struct v3d_compile {
/* Fragment shader payload regs. */
struct qreg payload_w, payload_w_centroid, payload_z;
struct qreg cs_payload[2];
int local_invocation_index_bits;
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
uint32_t num_vpm_writes;

View File

@@ -639,6 +639,7 @@ v3d_lower_nir(struct v3d_compile *c)
}
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
NIR_PASS_V(c->s, nir_lower_system_values);
}
static void

View File

@@ -108,6 +108,10 @@ vir_dump_uniform(enum quniform_contents contents,
fprintf(stderr, "ssbo_size[%d]", data);
break;
case QUNIFORM_NUM_WORK_GROUPS:
fprintf(stderr, "num_wg.%c", data < 3 ? "xyz"[data] : '?');
break;
default:
if (quniform_contents_is_texture_p0(contents)) {
fprintf(stderr, "tex[%d].p0: 0x%08x",

View File

@@ -482,6 +482,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
case 0:
case 1:
case 2:
case 3:
/* Payload setup instructions: Force allocate
* the dst to the given register (so the MOV
* will disappear).