nir: Add nir_intrinsic_{load,store}_deref_block_intel
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>
This commit is contained in:

committed by
Marge Bot

parent
b86ce274f9
commit
dd39e311b3
@@ -976,3 +976,27 @@ system_value("simd_width_intel", 1)
|
|||||||
# Load a relocatable 32-bit value
|
# Load a relocatable 32-bit value
|
||||||
intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
|
intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
|
||||||
indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER])
|
indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||||
|
|
||||||
|
# OpSubgroupBlockReadINTEL and OpSubgroupBlockWriteINTEL from SPV_INTEL_subgroups.
|
||||||
|
intrinsic("load_deref_block_intel", dest_comp=0, src_comp=[-1],
|
||||||
|
indices=[ACCESS], flags=[CAN_ELIMINATE])
|
||||||
|
intrinsic("store_deref_block_intel", src_comp=[-1, 0], indices=[WRMASK, ACCESS])
|
||||||
|
|
||||||
|
# src[] = { address }.
|
||||||
|
load("global_block_intel", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
|
|
||||||
|
# src[] = { buffer_index, offset }.
|
||||||
|
load("ssbo_block_intel", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
|
|
||||||
|
# src[] = { offset }.
|
||||||
|
load("shared_block_intel", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
|
|
||||||
|
# src[] = { value, address }.
|
||||||
|
store("global_block_intel", [1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
|
|
||||||
|
# src[] = { value, block_index, offset }
|
||||||
|
store("ssbo_block_intel", [-1, 1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
|
|
||||||
|
# src[] = { value, offset }.
|
||||||
|
store("shared_block_intel", [1], [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
|
|
||||||
|
@@ -1250,6 +1250,8 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
const nir_variable_mode mode = modes;
|
const nir_variable_mode mode = modes;
|
||||||
|
|
||||||
nir_intrinsic_op op;
|
nir_intrinsic_op op;
|
||||||
|
switch (intrin->intrinsic) {
|
||||||
|
case nir_intrinsic_load_deref:
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case nir_var_mem_ubo:
|
case nir_var_mem_ubo:
|
||||||
op = nir_intrinsic_load_ubo;
|
op = nir_intrinsic_load_ubo;
|
||||||
@@ -1297,6 +1299,30 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
default:
|
default:
|
||||||
unreachable("Unsupported explicit IO variable mode");
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_deref_block_intel:
|
||||||
|
switch (mode) {
|
||||||
|
case nir_var_mem_ssbo:
|
||||||
|
if (addr_format_is_global(addr_format, mode))
|
||||||
|
op = nir_intrinsic_load_global_block_intel;
|
||||||
|
else
|
||||||
|
op = nir_intrinsic_load_ssbo_block_intel;
|
||||||
|
break;
|
||||||
|
case nir_var_mem_global:
|
||||||
|
op = nir_intrinsic_load_global_block_intel;
|
||||||
|
break;
|
||||||
|
case nir_var_mem_shared:
|
||||||
|
op = nir_intrinsic_load_shared_block_intel;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("Invalid intrinsic");
|
||||||
|
}
|
||||||
|
|
||||||
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
|
||||||
|
|
||||||
@@ -1356,6 +1382,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
*/
|
*/
|
||||||
nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
|
nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
|
||||||
|
|
||||||
|
/* TODO: Better handle block_intel. */
|
||||||
const unsigned load_size = (bit_size / 8) * load->num_components;
|
const unsigned load_size = (bit_size / 8) * load->num_components;
|
||||||
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
|
||||||
|
|
||||||
@@ -1436,6 +1463,10 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
const nir_variable_mode mode = modes;
|
const nir_variable_mode mode = modes;
|
||||||
|
|
||||||
nir_intrinsic_op op;
|
nir_intrinsic_op op;
|
||||||
|
switch (intrin->intrinsic) {
|
||||||
|
case nir_intrinsic_store_deref:
|
||||||
|
assert(write_mask != 0);
|
||||||
|
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case nir_var_mem_ssbo:
|
case nir_var_mem_ssbo:
|
||||||
if (addr_format_is_global(addr_format, mode))
|
if (addr_format_is_global(addr_format, mode))
|
||||||
@@ -1463,6 +1494,32 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
default:
|
default:
|
||||||
unreachable("Unsupported explicit IO variable mode");
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_store_deref_block_intel:
|
||||||
|
assert(write_mask == 0);
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case nir_var_mem_ssbo:
|
||||||
|
if (addr_format_is_global(addr_format, mode))
|
||||||
|
op = nir_intrinsic_store_global_block_intel;
|
||||||
|
else
|
||||||
|
op = nir_intrinsic_store_ssbo_block_intel;
|
||||||
|
break;
|
||||||
|
case nir_var_mem_global:
|
||||||
|
op = nir_intrinsic_store_global_block_intel;
|
||||||
|
break;
|
||||||
|
case nir_var_mem_shared:
|
||||||
|
op = nir_intrinsic_store_shared_block_intel;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("Invalid intrinsic");
|
||||||
|
}
|
||||||
|
|
||||||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
|
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
|
||||||
|
|
||||||
@@ -1506,6 +1563,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
assert(value->bit_size % 8 == 0);
|
assert(value->bit_size % 8 == 0);
|
||||||
|
|
||||||
if (addr_format_needs_bounds_check(addr_format)) {
|
if (addr_format_needs_bounds_check(addr_format)) {
|
||||||
|
/* TODO: Better handle block_intel. */
|
||||||
const unsigned store_size = (value->bit_size / 8) * store->num_components;
|
const unsigned store_size = (value->bit_size / 8) * store->num_components;
|
||||||
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
|
||||||
|
|
||||||
@@ -1749,6 +1807,25 @@ nir_lower_explicit_io_instr(nir_builder *b,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_deref_block_intel: {
|
||||||
|
nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
|
||||||
|
deref->modes,
|
||||||
|
align_mul, align_offset,
|
||||||
|
intrin->num_components);
|
||||||
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_store_deref_block_intel: {
|
||||||
|
assert(intrin->src[1].is_ssa);
|
||||||
|
nir_ssa_def *value = intrin->src[1].ssa;
|
||||||
|
const nir_component_mask_t write_mask = 0;
|
||||||
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
||||||
|
deref->modes, align_mul, align_offset,
|
||||||
|
value, write_mask);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default: {
|
default: {
|
||||||
nir_ssa_def *value =
|
nir_ssa_def *value =
|
||||||
build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
|
build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
|
||||||
@@ -1985,6 +2062,8 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
|
|||||||
switch (intrin->intrinsic) {
|
switch (intrin->intrinsic) {
|
||||||
case nir_intrinsic_load_deref:
|
case nir_intrinsic_load_deref:
|
||||||
case nir_intrinsic_store_deref:
|
case nir_intrinsic_store_deref:
|
||||||
|
case nir_intrinsic_load_deref_block_intel:
|
||||||
|
case nir_intrinsic_store_deref_block_intel:
|
||||||
case nir_intrinsic_deref_atomic_add:
|
case nir_intrinsic_deref_atomic_add:
|
||||||
case nir_intrinsic_deref_atomic_imin:
|
case nir_intrinsic_deref_atomic_imin:
|
||||||
case nir_intrinsic_deref_atomic_umin:
|
case nir_intrinsic_deref_atomic_umin:
|
||||||
|
@@ -356,6 +356,21 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_deref_block_intel:
|
||||||
|
case nir_intrinsic_store_deref_block_intel: {
|
||||||
|
/* Combine all the stores that may alias with the whole variable (or
|
||||||
|
* cast).
|
||||||
|
*/
|
||||||
|
nir_deref_instr *operand = nir_src_as_deref(intrin->src[0]);
|
||||||
|
while (nir_deref_instr_parent(operand))
|
||||||
|
operand = nir_deref_instr_parent(operand);
|
||||||
|
assert(operand->deref_type == nir_deref_type_var ||
|
||||||
|
operand->deref_type == nir_deref_type_cast);
|
||||||
|
|
||||||
|
combine_stores_with_deref(state, operand);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case nir_intrinsic_copy_deref:
|
case nir_intrinsic_copy_deref:
|
||||||
case nir_intrinsic_memcpy_deref: {
|
case nir_intrinsic_memcpy_deref: {
|
||||||
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
||||||
|
@@ -1076,6 +1076,24 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
|
|||||||
kill_aliases(copies, dst, full_mask);
|
kill_aliases(copies, dst, full_mask);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_store_deref_block_intel: {
|
||||||
|
if (debug) dump_instr(instr);
|
||||||
|
|
||||||
|
/* Invalidate the whole variable (or cast) and anything that alias
|
||||||
|
* with it.
|
||||||
|
*/
|
||||||
|
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
||||||
|
while (nir_deref_instr_parent(dst))
|
||||||
|
dst = nir_deref_instr_parent(dst);
|
||||||
|
assert(dst->deref_type == nir_deref_type_var ||
|
||||||
|
dst->deref_type == nir_deref_type_cast);
|
||||||
|
|
||||||
|
unsigned num_components = glsl_get_vector_elements(dst->type);
|
||||||
|
unsigned full_mask = (1 << num_components) - 1;
|
||||||
|
kill_aliases(copies, dst, full_mask);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
continue; /* To skip the debug below. */
|
continue; /* To skip the debug below. */
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user