nvk,nak: Switch to nir_intrinsic_ldc_nv
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:

committed by
Marge Bot

parent
b107240474
commit
dc99d9b2df
@@ -2022,7 +2022,8 @@ Converter::visit(nir_intrinsic_instr *insn)
|
||||
mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ubo: {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_ldc_nv: {
|
||||
const DataType dType = getDType(insn);
|
||||
LValues &newDefs = convert(&insn->def);
|
||||
Value *indirectIndex;
|
||||
|
@@ -2606,7 +2606,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||
}
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_load_ubo => {
|
||||
nir_intrinsic_ldc_nv => {
|
||||
let size_B =
|
||||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||
let idx = &srcs[0];
|
||||
|
@@ -803,7 +803,7 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
|
||||
assert(util_is_power_of_two_nonzero(align_mul));
|
||||
|
||||
unsigned max_bytes = 128u / 8u;
|
||||
if (low->intrinsic == nir_intrinsic_load_ubo)
|
||||
if (low->intrinsic == nir_intrinsic_ldc_nv)
|
||||
max_bytes = 64u / 8u;
|
||||
|
||||
align_mul = MIN2(align_mul, max_bytes);
|
||||
@@ -830,10 +830,10 @@ nak_mem_access_size_align(nir_intrinsic_op intrin,
|
||||
|
||||
unsigned chunk_bytes = MIN3(bytes_pow2, align, 16);
|
||||
assert(util_is_power_of_two_nonzero(chunk_bytes));
|
||||
if (intrin == nir_intrinsic_load_ubo)
|
||||
if (intrin == nir_intrinsic_ldc_nv)
|
||||
chunk_bytes = MIN2(chunk_bytes, 8);
|
||||
|
||||
if (intrin == nir_intrinsic_load_ubo && align < 4) {
|
||||
if (intrin == nir_intrinsic_ldc_nv && align < 4) {
|
||||
/* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8.
|
||||
* In particular, this applies to ldc.u16 which means we either have to
|
||||
* fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get
|
||||
|
@@ -107,12 +107,10 @@ static nir_def *
|
||||
load_sample_pos_at(nir_builder *b, nir_def *sample_id,
|
||||
const struct nak_fs_key *fs_key)
|
||||
{
|
||||
nir_def *loc = nir_load_ubo(b, 1, 64,
|
||||
nir_imm_int(b, fs_key->sample_locations_cb),
|
||||
nir_imm_int(b, fs_key->sample_locations_offset),
|
||||
.align_mul = 8,
|
||||
.align_offset = 0,
|
||||
.range = fs_key->sample_locations_offset + 8);
|
||||
nir_def *loc = nir_ldc_nv(b, 1, 64,
|
||||
nir_imm_int(b, fs_key->sample_locations_cb),
|
||||
nir_imm_int(b, fs_key->sample_locations_offset),
|
||||
.align_mul = 8, .align_offset = 0);
|
||||
|
||||
/* Yay little endian */
|
||||
loc = nir_ushr(b, loc, nir_imul_imm(b, sample_id, 8));
|
||||
|
@@ -512,16 +512,14 @@ lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
|
||||
assert(cbuf_idx >= 0);
|
||||
|
||||
uint32_t base = nir_intrinsic_base(load);
|
||||
uint32_t range = nir_intrinsic_range(load);
|
||||
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
|
||||
nir_def *data = nir_load_ubo(b, load->def.num_components, load->def.bit_size,
|
||||
nir_imm_int(b, cbuf_idx), offset,
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load),
|
||||
.range_base = base, .range = range);
|
||||
nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
|
||||
nir_imm_int(b, cbuf_idx), offset,
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load));
|
||||
|
||||
nir_def_rewrite_uses(&load->def, data);
|
||||
|
||||
@@ -535,9 +533,9 @@ load_descriptor_set_addr(nir_builder *b, uint32_t set,
|
||||
uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
|
||||
set * sizeof(struct nvk_buffer_address);
|
||||
|
||||
return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, set_addr_offset),
|
||||
.align_mul = 8, .align_offset = 0, .range = ~0);
|
||||
return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, set_addr_offset),
|
||||
.align_mul = 8, .align_offset = 0);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
@@ -560,10 +558,9 @@ load_dynamic_buffer_start(nir_builder *b, uint32_t set,
|
||||
uint32_t root_offset =
|
||||
nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
|
||||
|
||||
return nir_u2u32(b, nir_load_ubo(b, 1, 8, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, root_offset),
|
||||
.align_mul = 1, .align_offset = 0,
|
||||
.range = ~0));
|
||||
return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, root_offset),
|
||||
.align_mul = 1, .align_offset = 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -594,8 +591,8 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
nir_def *desc =
|
||||
nir_load_ubo(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
|
||||
.align_mul = 16, .align_offset = 0, .range = ~0);
|
||||
nir_ldc_nv(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
|
||||
.align_mul = 16, .align_offset = 0);
|
||||
/* We know a priori that the the .w compnent (offset) is zero */
|
||||
return nir_vec4(b, nir_channel(b, desc, 0),
|
||||
nir_channel(b, desc, 1),
|
||||
@@ -641,12 +638,11 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
|
||||
nir_def *desc;
|
||||
if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
|
||||
desc = nir_load_ubo(b, num_components, bit_size,
|
||||
nir_imm_int(b, cbuf_idx),
|
||||
desc_ubo_offset,
|
||||
.align_mul = desc_align_mul,
|
||||
.align_offset = desc_align_offset,
|
||||
.range = ~0);
|
||||
desc = nir_ldc_nv(b, num_components, bit_size,
|
||||
nir_imm_int(b, cbuf_idx),
|
||||
desc_ubo_offset,
|
||||
.align_mul = desc_align_mul,
|
||||
.align_offset = desc_align_offset);
|
||||
} else {
|
||||
nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
|
||||
desc = nir_load_global_constant_offset(b, num_components, bit_size,
|
||||
@@ -727,13 +723,12 @@ _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
{
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def *val = nir_load_ubo(b, intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
nir_imm_int(b, 0), /* Root table */
|
||||
nir_imm_int(b, root_table_offset),
|
||||
.align_mul = 4,
|
||||
.align_offset = 0,
|
||||
.range = root_table_offset + 3 * 4);
|
||||
nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
nir_imm_int(b, 0), /* Root table */
|
||||
nir_imm_int(b, root_table_offset),
|
||||
.align_mul = 4,
|
||||
.align_offset = 0);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, val);
|
||||
|
||||
@@ -759,12 +754,10 @@ lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
|
||||
push_region_offset + base);
|
||||
|
||||
nir_def *val =
|
||||
nir_load_ubo(b, load->def.num_components, load->def.bit_size,
|
||||
nir_imm_int(b, 0), offset,
|
||||
.align_mul = load->def.bit_size / 8,
|
||||
.align_offset = 0,
|
||||
.range = push_region_offset + base +
|
||||
nir_intrinsic_range(load));
|
||||
nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
|
||||
nir_imm_int(b, 0), offset,
|
||||
.align_mul = load->def.bit_size / 8,
|
||||
.align_offset = 0);
|
||||
|
||||
nir_def_rewrite_uses(&load->def, val);
|
||||
|
||||
@@ -903,12 +896,11 @@ lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
|
||||
|
||||
b->cursor = nir_before_instr(&interp->instr);
|
||||
|
||||
nir_def *loc = nir_load_ubo(b, 1, 64,
|
||||
nir_imm_int(b, 0), /* Root table */
|
||||
nir_imm_int(b, root_table_offset),
|
||||
.align_mul = 8,
|
||||
.align_offset = 0,
|
||||
.range = root_table_offset + 8);
|
||||
nir_def *loc = nir_ldc_nv(b, 1, 64,
|
||||
nir_imm_int(b, 0), /* Root table */
|
||||
nir_imm_int(b, root_table_offset),
|
||||
.align_mul = 8,
|
||||
.align_offset = 0);
|
||||
|
||||
/* Yay little endian */
|
||||
loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
|
||||
@@ -1092,9 +1084,9 @@ lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
nvk_root_descriptor_offset(root_desc_addr);
|
||||
|
||||
nir_def *root_desc_addr =
|
||||
nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, root_desc_addr_offset),
|
||||
.align_mul = 8, .align_offset = 0, .range = ~0);
|
||||
nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, root_desc_addr_offset),
|
||||
.align_mul = 8, .align_offset = 0);
|
||||
|
||||
nir_def *dynamic_buffer_start =
|
||||
nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
|
||||
|
@@ -215,54 +215,73 @@ nvk_hash_graphics_state(struct vk_physical_device *device,
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_global_constant_offset_instr(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
UNUSED void *_data)
|
||||
lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
|
||||
UNUSED void *_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_global_constant_offset &&
|
||||
intrin->intrinsic != nir_intrinsic_load_global_constant_bounded)
|
||||
switch (load->intrinsic) {
|
||||
case nir_intrinsic_load_ubo: {
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
nir_def *index = load->src[0].ssa;
|
||||
nir_def *offset = load->src[1].ssa;
|
||||
const enum gl_access_qualifier access = nir_intrinsic_access(load);
|
||||
const uint32_t align_mul = nir_intrinsic_align_mul(load);
|
||||
const uint32_t align_offset = nir_intrinsic_align_offset(load);
|
||||
|
||||
nir_def *val = nir_ldc_nv(b, load->num_components, load->def.bit_size,
|
||||
index, offset, .access = access,
|
||||
.align_mul = align_mul,
|
||||
.align_offset = align_offset);
|
||||
nir_def_rewrite_uses(&load->def, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_global_constant_offset:
|
||||
case nir_intrinsic_load_global_constant_bounded: {
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
nir_def *base_addr = load->src[0].ssa;
|
||||
nir_def *offset = load->src[1].ssa;
|
||||
|
||||
nir_def *zero = NULL;
|
||||
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
|
||||
nir_def *bound = load->src[2].ssa;
|
||||
|
||||
unsigned bit_size = load->def.bit_size;
|
||||
assert(bit_size >= 8 && bit_size % 8 == 0);
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
zero = nir_imm_zero(b, load->num_components, bit_size);
|
||||
|
||||
unsigned load_size = byte_size * load->num_components;
|
||||
|
||||
nir_def *sat_offset =
|
||||
nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
|
||||
nir_def *in_bounds =
|
||||
nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
|
||||
|
||||
nir_push_if(b, in_bounds);
|
||||
}
|
||||
|
||||
nir_def *val =
|
||||
nir_build_load_global_constant(b, load->def.num_components,
|
||||
load->def.bit_size,
|
||||
nir_iadd(b, base_addr, nir_u2u64(b, offset)),
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load));
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
|
||||
nir_pop_if(b, NULL);
|
||||
val = nir_if_phi(b, val, zero);
|
||||
}
|
||||
|
||||
nir_def_rewrite_uses(&load->def, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_def *base_addr = intrin->src[0].ssa;
|
||||
nir_def *offset = intrin->src[1].ssa;
|
||||
|
||||
nir_def *zero = NULL;
|
||||
if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
|
||||
nir_def *bound = intrin->src[2].ssa;
|
||||
|
||||
unsigned bit_size = intrin->def.bit_size;
|
||||
assert(bit_size >= 8 && bit_size % 8 == 0);
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
zero = nir_imm_zero(b, intrin->num_components, bit_size);
|
||||
|
||||
unsigned load_size = byte_size * intrin->num_components;
|
||||
|
||||
nir_def *sat_offset =
|
||||
nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
|
||||
nir_def *in_bounds =
|
||||
nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
|
||||
|
||||
nir_push_if(b, in_bounds);
|
||||
}
|
||||
|
||||
nir_def *val =
|
||||
nir_build_load_global_constant(b, intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
nir_iadd(b, base_addr, nir_u2u64(b, offset)),
|
||||
.align_mul = nir_intrinsic_align_mul(intrin),
|
||||
.align_offset = nir_intrinsic_align_offset(intrin));
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_global_constant_bounded) {
|
||||
nir_pop_if(b, NULL);
|
||||
val = nir_if_phi(b, val, zero);
|
||||
}
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct lower_ycbcr_state {
|
||||
@@ -402,7 +421,7 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
||||
nvk_buffer_addr_format(rs->uniform_buffers));
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
|
||||
lower_load_global_constant_offset_instr, nir_metadata_none, NULL);
|
||||
lower_load_intrinsic, nir_metadata_none, NULL);
|
||||
|
||||
if (!nir->info.shared_memory_explicit_layout) {
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||
|
Reference in New Issue
Block a user