intel/rt: Implement push constants as global memory reads

They're not really "push" anymore but that's because there is no such
thing as push constants in bindless shaders on Intel.  They should be
fast enough, though.  There is some room for debate here as to whether
we want to do the pull in NIR or push it into the back-end.  The
advantage of doing it in the back-end is that it'd be easier to use
MOV_INDIRECT for indirect push constant access rather than falling back
to a dataport message.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:
Jason Ekstrand
2020-08-06 22:17:17 -05:00
committed by Marge Bot
parent f7e24e559f
commit 9fa1cdfe7f
3 changed files with 66 additions and 0 deletions

View File

@@ -1495,6 +1495,21 @@ nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
nir_builder_instr_insert(build, &store->instr);
}
static inline nir_ssa_def *
nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align,
unsigned num_components, unsigned bit_size)
{
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant);
load->num_components = num_components;
load->src[0] = nir_src_for_ssa(addr);
nir_intrinsic_set_align(load, align, 0);
nir_ssa_dest_init(&load->instr, &load->dest,
num_components, bit_size, NULL);
nir_builder_instr_insert(build, &load->instr);
return &load->dest.ssa;
}
static inline nir_ssa_def *
nir_load_param(nir_builder *build, uint32_t param_idx)
{

View File

@@ -135,6 +135,54 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
nir_instr_remove(instr);
break;
case nir_intrinsic_load_uniform: {
/* We don't want to lower this in the launch trampoline. */
if (stage == MESA_SHADER_COMPUTE)
break;
assert(intrin->dest.is_ssa);
assert(intrin->src[0].is_ssa);
unsigned bit_size = intrin->dest.ssa.bit_size;
assert(bit_size >= 8 && bit_size % 8 == 0);
unsigned byte_size = bit_size / 8;
if (nir_src_is_const(intrin->src[0])) {
uint64_t offset = BRW_RT_PUSH_CONST_OFFSET +
nir_intrinsic_base(intrin) +
nir_src_as_uint(intrin->src[0]);
/* Things should be component-aligned. */
assert(offset % byte_size == 0);
unsigned suboffset = offset % 64;
uint64_t aligned_offset = offset - suboffset;
/* Load two just in case we go over a 64B boundary */
nir_ssa_def *data[2];
for (unsigned i = 0; i < 2; i++) {
nir_ssa_def *addr =
nir_iadd_imm(b, nir_load_btd_global_arg_addr_intel(b),
aligned_offset + i * 64);
data[i] = nir_load_global_const_block_intel(b, addr, 16);
}
sysval = nir_extract_bits(b, data, 2, suboffset * 8,
intrin->num_components, bit_size);
} else {
nir_ssa_def *offset32 =
nir_iadd_imm(b, intrin->src[0].ssa,
BRW_RT_PUSH_CONST_OFFSET +
nir_intrinsic_base(intrin));
nir_ssa_def *addr =
nir_iadd(b, nir_load_btd_global_arg_addr_intel(b),
nir_u2u64(b, offset32));
sysval = nir_load_global_constant(b, addr, byte_size,
intrin->num_components, bit_size);
}
break;
}
case nir_intrinsic_load_ray_launch_id:
sysval = nir_channels(b, hotzone, 0xe);
break;

View File

@@ -31,6 +31,9 @@ extern "C" {
/** Vulkan defines shaderGroupHandleSize = 32 */
#define BRW_RT_SBT_HANDLE_SIZE 32
/** Offset after the RT dispatch globals at which "push" constants live */
#define BRW_RT_PUSH_CONST_OFFSET 128
/** Stride of the resume SBT */
#define BRW_BTD_RESUME_SBT_STRIDE 8