nir: Add nir_intrinsic_{load,store}_deref_block_intel

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>
This commit is contained in:
Caio Marcelo de Oliveira Filho
2020-10-05 14:46:36 -07:00
committed by Marge Bot
parent b86ce274f9
commit dd39e311b3
4 changed files with 196 additions and 60 deletions

View File

@@ -976,3 +976,27 @@ system_value("simd_width_intel", 1)
# Load a relocatable 32-bit value
intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER])
# OpSubgroupBlockReadINTEL and OpSubgroupBlockWriteINTEL from SPV_INTEL_subgroups.
intrinsic("load_deref_block_intel", dest_comp=0, src_comp=[-1],
indices=[ACCESS], flags=[CAN_ELIMINATE])
intrinsic("store_deref_block_intel", src_comp=[-1, 0], indices=[WRMASK, ACCESS])
# src[] = { address }.
load("global_block_intel", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# src[] = { buffer_index, offset }.
load("ssbo_block_intel", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# src[] = { offset }.
load("shared_block_intel", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# src[] = { value, address }.
store("global_block_intel", [1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { value, block_index, offset }
store("ssbo_block_intel", [-1, 1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { value, offset }.
store("shared_block_intel", [1], [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])

View File

@@ -1250,6 +1250,8 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
const nir_variable_mode mode = modes;
nir_intrinsic_op op;
switch (intrin->intrinsic) {
case nir_intrinsic_load_deref:
switch (mode) {
case nir_var_mem_ubo:
op = nir_intrinsic_load_ubo;
@@ -1297,6 +1299,30 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
default:
unreachable("Unsupported explicit IO variable mode");
}
break;
case nir_intrinsic_load_deref_block_intel:
switch (mode) {
case nir_var_mem_ssbo:
if (addr_format_is_global(addr_format, mode))
op = nir_intrinsic_load_global_block_intel;
else
op = nir_intrinsic_load_ssbo_block_intel;
break;
case nir_var_mem_global:
op = nir_intrinsic_load_global_block_intel;
break;
case nir_var_mem_shared:
op = nir_intrinsic_load_shared_block_intel;
break;
default:
unreachable("Unsupported explicit IO variable mode");
}
break;
default:
unreachable("Invalid intrinsic");
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
@@ -1356,6 +1382,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
*/
nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
/* TODO: Better handle block_intel. */
const unsigned load_size = (bit_size / 8) * load->num_components;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
@@ -1436,6 +1463,10 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
const nir_variable_mode mode = modes;
nir_intrinsic_op op;
switch (intrin->intrinsic) {
case nir_intrinsic_store_deref:
assert(write_mask != 0);
switch (mode) {
case nir_var_mem_ssbo:
if (addr_format_is_global(addr_format, mode))
@@ -1463,6 +1494,32 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
default:
unreachable("Unsupported explicit IO variable mode");
}
break;
case nir_intrinsic_store_deref_block_intel:
assert(write_mask == 0);
switch (mode) {
case nir_var_mem_ssbo:
if (addr_format_is_global(addr_format, mode))
op = nir_intrinsic_store_global_block_intel;
else
op = nir_intrinsic_store_ssbo_block_intel;
break;
case nir_var_mem_global:
op = nir_intrinsic_store_global_block_intel;
break;
case nir_var_mem_shared:
op = nir_intrinsic_store_shared_block_intel;
break;
default:
unreachable("Unsupported explicit IO variable mode");
}
break;
default:
unreachable("Invalid intrinsic");
}
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
@@ -1506,6 +1563,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
assert(value->bit_size % 8 == 0);
if (addr_format_needs_bounds_check(addr_format)) {
/* TODO: Better handle block_intel. */
const unsigned store_size = (value->bit_size / 8) * store->num_components;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
@@ -1749,6 +1807,25 @@ nir_lower_explicit_io_instr(nir_builder *b,
break;
}
case nir_intrinsic_load_deref_block_intel: {
nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
deref->modes,
align_mul, align_offset,
intrin->num_components);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
break;
}
case nir_intrinsic_store_deref_block_intel: {
assert(intrin->src[1].is_ssa);
nir_ssa_def *value = intrin->src[1].ssa;
const nir_component_mask_t write_mask = 0;
build_explicit_io_store(b, intrin, addr, addr_format,
deref->modes, align_mul, align_offset,
value, write_mask);
break;
}
default: {
nir_ssa_def *value =
build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
@@ -1985,6 +2062,8 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
switch (intrin->intrinsic) {
case nir_intrinsic_load_deref:
case nir_intrinsic_store_deref:
case nir_intrinsic_load_deref_block_intel:
case nir_intrinsic_store_deref_block_intel:
case nir_intrinsic_deref_atomic_add:
case nir_intrinsic_deref_atomic_imin:
case nir_intrinsic_deref_atomic_umin:

View File

@@ -356,6 +356,21 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block)
break;
}
case nir_intrinsic_load_deref_block_intel:
case nir_intrinsic_store_deref_block_intel: {
/* Combine all the stores that may alias with the whole variable (or
* cast).
*/
nir_deref_instr *operand = nir_src_as_deref(intrin->src[0]);
while (nir_deref_instr_parent(operand))
operand = nir_deref_instr_parent(operand);
assert(operand->deref_type == nir_deref_type_var ||
operand->deref_type == nir_deref_type_cast);
combine_stores_with_deref(state, operand);
break;
}
case nir_intrinsic_copy_deref:
case nir_intrinsic_memcpy_deref: {
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);

View File

@@ -1076,6 +1076,24 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
kill_aliases(copies, dst, full_mask);
break;
case nir_intrinsic_store_deref_block_intel: {
if (debug) dump_instr(instr);
/* Invalidate the whole variable (or cast) and anything that alias
* with it.
*/
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
while (nir_deref_instr_parent(dst))
dst = nir_deref_instr_parent(dst);
assert(dst->deref_type == nir_deref_type_var ||
dst->deref_type == nir_deref_type_cast);
unsigned num_components = glsl_get_vector_elements(dst->type);
unsigned full_mask = (1 << num_components) - 1;
kill_aliases(copies, dst, full_mask);
break;
}
default:
continue; /* To skip the debug below. */
}