intel/rt: Implement push constants as global memory reads
They're not really "push" anymore but that's because there is no such thing as push constants in bindless shaders on Intel. They should be fast enough, though. There is some room for debate here as to whether we want to do the pull in NIR or push it into the back-end. The advantage of doing it in the back-end is that it'd be easier to use MOV_INDIRECT for indirect push constant access rather than falling back to a dataport message. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:

committed by
Marge Bot

parent
f7e24e559f
commit
9fa1cdfe7f
@@ -1495,6 +1495,21 @@ nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
|
||||
nir_builder_instr_insert(build, &store->instr);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align,
|
||||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant);
|
||||
load->num_components = num_components;
|
||||
load->src[0] = nir_src_for_ssa(addr);
|
||||
nir_intrinsic_set_align(load, align, 0);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
num_components, bit_size, NULL);
|
||||
nir_builder_instr_insert(build, &load->instr);
|
||||
return &load->dest.ssa;
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_load_param(nir_builder *build, uint32_t param_idx)
|
||||
{
|
||||
|
@@ -135,6 +135,54 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
nir_instr_remove(instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_uniform: {
|
||||
/* We don't want to lower this in the launch trampoline. */
|
||||
if (stage == MESA_SHADER_COMPUTE)
|
||||
break;
|
||||
|
||||
assert(intrin->dest.is_ssa);
|
||||
assert(intrin->src[0].is_ssa);
|
||||
|
||||
unsigned bit_size = intrin->dest.ssa.bit_size;
|
||||
assert(bit_size >= 8 && bit_size % 8 == 0);
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
if (nir_src_is_const(intrin->src[0])) {
|
||||
uint64_t offset = BRW_RT_PUSH_CONST_OFFSET +
|
||||
nir_intrinsic_base(intrin) +
|
||||
nir_src_as_uint(intrin->src[0]);
|
||||
|
||||
/* Things should be component-aligned. */
|
||||
assert(offset % byte_size == 0);
|
||||
|
||||
unsigned suboffset = offset % 64;
|
||||
uint64_t aligned_offset = offset - suboffset;
|
||||
|
||||
/* Load two just in case we go over a 64B boundary */
|
||||
nir_ssa_def *data[2];
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
nir_ssa_def *addr =
|
||||
nir_iadd_imm(b, nir_load_btd_global_arg_addr_intel(b),
|
||||
aligned_offset + i * 64);
|
||||
data[i] = nir_load_global_const_block_intel(b, addr, 16);
|
||||
}
|
||||
|
||||
sysval = nir_extract_bits(b, data, 2, suboffset * 8,
|
||||
intrin->num_components, bit_size);
|
||||
} else {
|
||||
nir_ssa_def *offset32 =
|
||||
nir_iadd_imm(b, intrin->src[0].ssa,
|
||||
BRW_RT_PUSH_CONST_OFFSET +
|
||||
nir_intrinsic_base(intrin));
|
||||
nir_ssa_def *addr =
|
||||
nir_iadd(b, nir_load_btd_global_arg_addr_intel(b),
|
||||
nir_u2u64(b, offset32));
|
||||
sysval = nir_load_global_constant(b, addr, byte_size,
|
||||
intrin->num_components, bit_size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_launch_id:
|
||||
sysval = nir_channels(b, hotzone, 0xe);
|
||||
break;
|
||||
|
@@ -31,6 +31,9 @@ extern "C" {
|
||||
/** Vulkan defines shaderGroupHandleSize = 32 */
|
||||
#define BRW_RT_SBT_HANDLE_SIZE 32
|
||||
|
||||
/** Offset after the RT dispatch globals at which "push" constants live */
|
||||
#define BRW_RT_PUSH_CONST_OFFSET 128
|
||||
|
||||
/** Stride of the resume SBT */
|
||||
#define BRW_BTD_RESUME_SBT_STRIDE 8
|
||||
|
||||
|
Reference in New Issue
Block a user