nvk,nak: Switch to nir_intrinsic_ldc_nv

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand
2024-05-22 14:50:37 -05:00
committed by Marge Bot
parent b107240474
commit dc99d9b2df
6 changed files with 110 additions and 100 deletions

View File

@@ -2022,7 +2022,8 @@ Converter::visit(nir_intrinsic_instr *insn)
mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
break;
}
case nir_intrinsic_load_ubo: {
case nir_intrinsic_load_ubo:
case nir_intrinsic_ldc_nv: {
const DataType dType = getDType(insn);
LValues &newDefs = convert(&insn->def);
Value *indirectIndex;

View File

@@ -2606,7 +2606,7 @@ impl<'a> ShaderFromNir<'a> {
}
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_load_ubo => {
nir_intrinsic_ldc_nv => {
let size_B =
(intrin.def.bit_size() / 8) * intrin.def.num_components();
let idx = &srcs[0];

View File

@@ -803,7 +803,7 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
assert(util_is_power_of_two_nonzero(align_mul));
unsigned max_bytes = 128u / 8u;
if (low->intrinsic == nir_intrinsic_load_ubo)
if (low->intrinsic == nir_intrinsic_ldc_nv)
max_bytes = 64u / 8u;
align_mul = MIN2(align_mul, max_bytes);
@@ -830,10 +830,10 @@ nak_mem_access_size_align(nir_intrinsic_op intrin,
unsigned chunk_bytes = MIN3(bytes_pow2, align, 16);
assert(util_is_power_of_two_nonzero(chunk_bytes));
if (intrin == nir_intrinsic_load_ubo)
if (intrin == nir_intrinsic_ldc_nv)
chunk_bytes = MIN2(chunk_bytes, 8);
if (intrin == nir_intrinsic_load_ubo && align < 4) {
if (intrin == nir_intrinsic_ldc_nv && align < 4) {
/* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8.
* In particular, this applies to ldc.u16 which means we either have to
* fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get

View File

@@ -107,12 +107,10 @@ static nir_def *
load_sample_pos_at(nir_builder *b, nir_def *sample_id,
const struct nak_fs_key *fs_key)
{
nir_def *loc = nir_load_ubo(b, 1, 64,
nir_imm_int(b, fs_key->sample_locations_cb),
nir_imm_int(b, fs_key->sample_locations_offset),
.align_mul = 8,
.align_offset = 0,
.range = fs_key->sample_locations_offset + 8);
nir_def *loc = nir_ldc_nv(b, 1, 64,
nir_imm_int(b, fs_key->sample_locations_cb),
nir_imm_int(b, fs_key->sample_locations_offset),
.align_mul = 8, .align_offset = 0);
/* Yay little endian */
loc = nir_ushr(b, loc, nir_imul_imm(b, sample_id, 8));

View File

@@ -512,16 +512,14 @@ lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
assert(cbuf_idx >= 0);
uint32_t base = nir_intrinsic_base(load);
uint32_t range = nir_intrinsic_range(load);
b->cursor = nir_before_instr(&load->instr);
nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
nir_def *data = nir_load_ubo(b, load->def.num_components, load->def.bit_size,
nir_imm_int(b, cbuf_idx), offset,
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load),
.range_base = base, .range = range);
nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
nir_imm_int(b, cbuf_idx), offset,
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load));
nir_def_rewrite_uses(&load->def, data);
@@ -535,9 +533,9 @@ load_descriptor_set_addr(nir_builder *b, uint32_t set,
uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
set * sizeof(struct nvk_buffer_address);
return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
nir_imm_int(b, set_addr_offset),
.align_mul = 8, .align_offset = 0, .range = ~0);
return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
nir_imm_int(b, set_addr_offset),
.align_mul = 8, .align_offset = 0);
}
static nir_def *
@@ -560,10 +558,9 @@ load_dynamic_buffer_start(nir_builder *b, uint32_t set,
uint32_t root_offset =
nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
return nir_u2u32(b, nir_load_ubo(b, 1, 8, nir_imm_int(b, 0),
nir_imm_int(b, root_offset),
.align_mul = 1, .align_offset = 0,
.range = ~0));
return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
nir_imm_int(b, root_offset),
.align_mul = 1, .align_offset = 0));
}
}
@@ -594,8 +591,8 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
assert(num_components == 4 && bit_size == 32);
nir_def *desc =
nir_load_ubo(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
.align_mul = 16, .align_offset = 0, .range = ~0);
nir_ldc_nv(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
.align_mul = 16, .align_offset = 0);
/* We know a priori that the the .w compnent (offset) is zero */
return nir_vec4(b, nir_channel(b, desc, 0),
nir_channel(b, desc, 1),
@@ -641,12 +638,11 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
nir_def *desc;
if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
desc = nir_load_ubo(b, num_components, bit_size,
nir_imm_int(b, cbuf_idx),
desc_ubo_offset,
.align_mul = desc_align_mul,
.align_offset = desc_align_offset,
.range = ~0);
desc = nir_ldc_nv(b, num_components, bit_size,
nir_imm_int(b, cbuf_idx),
desc_ubo_offset,
.align_mul = desc_align_mul,
.align_offset = desc_align_offset);
} else {
nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
desc = nir_load_global_constant_offset(b, num_components, bit_size,
@@ -727,13 +723,12 @@ _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
{
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *val = nir_load_ubo(b, intrin->def.num_components,
intrin->def.bit_size,
nir_imm_int(b, 0), /* Root table */
nir_imm_int(b, root_table_offset),
.align_mul = 4,
.align_offset = 0,
.range = root_table_offset + 3 * 4);
nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
intrin->def.bit_size,
nir_imm_int(b, 0), /* Root table */
nir_imm_int(b, root_table_offset),
.align_mul = 4,
.align_offset = 0);
nir_def_rewrite_uses(&intrin->def, val);
@@ -759,12 +754,10 @@ lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
push_region_offset + base);
nir_def *val =
nir_load_ubo(b, load->def.num_components, load->def.bit_size,
nir_imm_int(b, 0), offset,
.align_mul = load->def.bit_size / 8,
.align_offset = 0,
.range = push_region_offset + base +
nir_intrinsic_range(load));
nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
nir_imm_int(b, 0), offset,
.align_mul = load->def.bit_size / 8,
.align_offset = 0);
nir_def_rewrite_uses(&load->def, val);
@@ -903,12 +896,11 @@ lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
b->cursor = nir_before_instr(&interp->instr);
nir_def *loc = nir_load_ubo(b, 1, 64,
nir_imm_int(b, 0), /* Root table */
nir_imm_int(b, root_table_offset),
.align_mul = 8,
.align_offset = 0,
.range = root_table_offset + 8);
nir_def *loc = nir_ldc_nv(b, 1, 64,
nir_imm_int(b, 0), /* Root table */
nir_imm_int(b, root_table_offset),
.align_mul = 8,
.align_offset = 0);
/* Yay little endian */
loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
@@ -1092,9 +1084,9 @@ lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
nvk_root_descriptor_offset(root_desc_addr);
nir_def *root_desc_addr =
nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
nir_imm_int(b, root_desc_addr_offset),
.align_mul = 8, .align_offset = 0, .range = ~0);
nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
nir_imm_int(b, root_desc_addr_offset),
.align_mul = 8, .align_offset = 0);
nir_def *dynamic_buffer_start =
nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),

View File

@@ -215,54 +215,73 @@ nvk_hash_graphics_state(struct vk_physical_device *device,
}
static bool
lower_load_global_constant_offset_instr(nir_builder *b,
nir_intrinsic_instr *intrin,
UNUSED void *_data)
lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
UNUSED void *_data)
{
if (intrin->intrinsic != nir_intrinsic_load_global_constant_offset &&
intrin->intrinsic != nir_intrinsic_load_global_constant_bounded)
switch (load->intrinsic) {
case nir_intrinsic_load_ubo: {
b->cursor = nir_before_instr(&load->instr);
nir_def *index = load->src[0].ssa;
nir_def *offset = load->src[1].ssa;
const enum gl_access_qualifier access = nir_intrinsic_access(load);
const uint32_t align_mul = nir_intrinsic_align_mul(load);
const uint32_t align_offset = nir_intrinsic_align_offset(load);
nir_def *val = nir_ldc_nv(b, load->num_components, load->def.bit_size,
index, offset, .access = access,
.align_mul = align_mul,
.align_offset = align_offset);
nir_def_rewrite_uses(&load->def, val);
return true;
}
case nir_intrinsic_load_global_constant_offset:
case nir_intrinsic_load_global_constant_bounded: {
b->cursor = nir_before_instr(&load->instr);
nir_def *base_addr = load->src[0].ssa;
nir_def *offset = load->src[1].ssa;
nir_def *zero = NULL;
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
nir_def *bound = load->src[2].ssa;
unsigned bit_size = load->def.bit_size;
assert(bit_size >= 8 && bit_size % 8 == 0);
unsigned byte_size = bit_size / 8;
zero = nir_imm_zero(b, load->num_components, bit_size);
unsigned load_size = byte_size * load->num_components;
nir_def *sat_offset =
nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
nir_def *in_bounds =
nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
nir_push_if(b, in_bounds);
}
nir_def *val =
nir_build_load_global_constant(b, load->def.num_components,
load->def.bit_size,
nir_iadd(b, base_addr, nir_u2u64(b, offset)),
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load));
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
nir_pop_if(b, NULL);
val = nir_if_phi(b, val, zero);
}
nir_def_rewrite_uses(&load->def, val);
return true;
}
default:
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_def *base_addr = intrin->src[0].ssa;
nir_def *offset = intrin->src[1].ssa;
nir_def *zero = NULL;
if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
nir_def *bound = intrin->src[2].ssa;
unsigned bit_size = intrin->def.bit_size;
assert(bit_size >= 8 && bit_size % 8 == 0);
unsigned byte_size = bit_size / 8;
zero = nir_imm_zero(b, intrin->num_components, bit_size);
unsigned load_size = byte_size * intrin->num_components;
nir_def *sat_offset =
nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
nir_def *in_bounds =
nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
nir_push_if(b, in_bounds);
}
nir_def *val =
nir_build_load_global_constant(b, intrin->def.num_components,
intrin->def.bit_size,
nir_iadd(b, base_addr, nir_u2u64(b, offset)),
.align_mul = nir_intrinsic_align_mul(intrin),
.align_offset = nir_intrinsic_align_offset(intrin));
if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
nir_pop_if(b, NULL);
val = nir_if_phi(b, val, zero);
}
nir_def_rewrite_uses(&intrin->def, val);
return true;
}
struct lower_ycbcr_state {
@@ -402,7 +421,7 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
nvk_buffer_addr_format(rs->uniform_buffers));
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
lower_load_global_constant_offset_instr, nir_metadata_none, NULL);
lower_load_intrinsic, nir_metadata_none, NULL);
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,