nir/lower_explicit_io: add nir_var_mem_shared support
v2: require nir_address_format_32bit_offset instead v3: don't call nir_intrinsic_set_access() for shared atomics Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:

committed by
Jason Ekstrand

parent
1e21bb4123
commit
8bd2e138f5
@@ -93,6 +93,31 @@ global_atomic_for_deref(nir_intrinsic_op deref_op)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static nir_intrinsic_op
|
||||||
|
shared_atomic_for_deref(nir_intrinsic_op deref_op)
|
||||||
|
{
|
||||||
|
switch (deref_op) {
|
||||||
|
#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
|
||||||
|
OP(atomic_exchange)
|
||||||
|
OP(atomic_comp_swap)
|
||||||
|
OP(atomic_add)
|
||||||
|
OP(atomic_imin)
|
||||||
|
OP(atomic_umin)
|
||||||
|
OP(atomic_imax)
|
||||||
|
OP(atomic_umax)
|
||||||
|
OP(atomic_and)
|
||||||
|
OP(atomic_or)
|
||||||
|
OP(atomic_xor)
|
||||||
|
OP(atomic_fadd)
|
||||||
|
OP(atomic_fmin)
|
||||||
|
OP(atomic_fmax)
|
||||||
|
OP(atomic_fcomp_swap)
|
||||||
|
#undef OP
|
||||||
|
default:
|
||||||
|
unreachable("Invalid shared atomic");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
|
nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
|
||||||
int (*type_size)(const struct glsl_type *, bool))
|
int (*type_size)(const struct glsl_type *, bool))
|
||||||
@@ -427,27 +452,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||||||
nir_builder *b = &state->builder;
|
nir_builder *b = &state->builder;
|
||||||
assert(var->data.mode == nir_var_mem_shared);
|
assert(var->data.mode == nir_var_mem_shared);
|
||||||
|
|
||||||
nir_intrinsic_op op;
|
nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic);
|
||||||
switch (intrin->intrinsic) {
|
|
||||||
#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
|
|
||||||
OP(atomic_exchange)
|
|
||||||
OP(atomic_comp_swap)
|
|
||||||
OP(atomic_add)
|
|
||||||
OP(atomic_imin)
|
|
||||||
OP(atomic_umin)
|
|
||||||
OP(atomic_imax)
|
|
||||||
OP(atomic_umax)
|
|
||||||
OP(atomic_and)
|
|
||||||
OP(atomic_or)
|
|
||||||
OP(atomic_xor)
|
|
||||||
OP(atomic_fadd)
|
|
||||||
OP(atomic_fmin)
|
|
||||||
OP(atomic_fmax)
|
|
||||||
OP(atomic_fcomp_swap)
|
|
||||||
#undef OP
|
|
||||||
default:
|
|
||||||
unreachable("Invalid atomic");
|
|
||||||
}
|
|
||||||
|
|
||||||
nir_intrinsic_instr *atomic =
|
nir_intrinsic_instr *atomic =
|
||||||
nir_intrinsic_instr_create(state->builder.shader, op);
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
||||||
@@ -849,6 +854,10 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
assert(addr_format_is_global(addr_format));
|
assert(addr_format_is_global(addr_format));
|
||||||
op = nir_intrinsic_load_kernel_input;
|
op = nir_intrinsic_load_kernel_input;
|
||||||
break;
|
break;
|
||||||
|
case nir_var_mem_shared:
|
||||||
|
assert(addr_format == nir_address_format_32bit_offset);
|
||||||
|
op = nir_intrinsic_load_shared;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("Unsupported explicit IO variable mode");
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
}
|
}
|
||||||
@@ -857,12 +866,15 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
|
|
||||||
if (addr_format_is_global(addr_format)) {
|
if (addr_format_is_global(addr_format)) {
|
||||||
load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
||||||
|
} else if (addr_format == nir_address_format_32bit_offset) {
|
||||||
|
assert(addr->num_components == 1);
|
||||||
|
load->src[0] = nir_src_for_ssa(addr);
|
||||||
} else {
|
} else {
|
||||||
load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
||||||
load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode != nir_var_mem_ubo && mode != nir_var_shader_in)
|
if (mode != nir_var_mem_ubo && mode != nir_var_shader_in && mode != nir_var_mem_shared)
|
||||||
nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
|
nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
|
||||||
|
|
||||||
/* TODO: We should try and provide a better alignment. For OpenCL, we need
|
/* TODO: We should try and provide a better alignment. For OpenCL, we need
|
||||||
@@ -919,6 +931,10 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
assert(addr_format_is_global(addr_format));
|
assert(addr_format_is_global(addr_format));
|
||||||
op = nir_intrinsic_store_global;
|
op = nir_intrinsic_store_global;
|
||||||
break;
|
break;
|
||||||
|
case nir_var_mem_shared:
|
||||||
|
assert(addr_format == nir_address_format_32bit_offset);
|
||||||
|
op = nir_intrinsic_store_shared;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("Unsupported explicit IO variable mode");
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
}
|
}
|
||||||
@@ -928,6 +944,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
store->src[0] = nir_src_for_ssa(value);
|
store->src[0] = nir_src_for_ssa(value);
|
||||||
if (addr_format_is_global(addr_format)) {
|
if (addr_format_is_global(addr_format)) {
|
||||||
store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
||||||
|
} else if (addr_format == nir_address_format_32bit_offset) {
|
||||||
|
assert(addr->num_components == 1);
|
||||||
|
store->src[1] = nir_src_for_ssa(addr);
|
||||||
} else {
|
} else {
|
||||||
store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
||||||
store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
||||||
@@ -935,7 +954,8 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
|
|
||||||
nir_intrinsic_set_write_mask(store, write_mask);
|
nir_intrinsic_set_write_mask(store, write_mask);
|
||||||
|
|
||||||
nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
|
if (mode != nir_var_mem_shared)
|
||||||
|
nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
|
||||||
|
|
||||||
/* TODO: We should try and provide a better alignment. For OpenCL, we need
|
/* TODO: We should try and provide a better alignment. For OpenCL, we need
|
||||||
* to plumb the alignment through from SPIR-V when we have one.
|
* to plumb the alignment through from SPIR-V when we have one.
|
||||||
@@ -980,6 +1000,10 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
assert(addr_format_is_global(addr_format));
|
assert(addr_format_is_global(addr_format));
|
||||||
op = global_atomic_for_deref(intrin->intrinsic);
|
op = global_atomic_for_deref(intrin->intrinsic);
|
||||||
break;
|
break;
|
||||||
|
case nir_var_mem_shared:
|
||||||
|
assert(addr_format == nir_address_format_32bit_offset);
|
||||||
|
op = shared_atomic_for_deref(intrin->intrinsic);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("Unsupported explicit IO variable mode");
|
unreachable("Unsupported explicit IO variable mode");
|
||||||
}
|
}
|
||||||
@@ -989,6 +1013,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
unsigned src = 0;
|
unsigned src = 0;
|
||||||
if (addr_format_is_global(addr_format)) {
|
if (addr_format_is_global(addr_format)) {
|
||||||
atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
||||||
|
} else if (addr_format == nir_address_format_32bit_offset) {
|
||||||
|
assert(addr->num_components == 1);
|
||||||
|
atomic->src[src++] = nir_src_for_ssa(addr);
|
||||||
} else {
|
} else {
|
||||||
atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
||||||
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
||||||
@@ -1000,7 +1027,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||||||
/* Global atomics don't have access flags because they assume that the
|
/* Global atomics don't have access flags because they assume that the
|
||||||
* address may be non-uniform.
|
* address may be non-uniform.
|
||||||
*/
|
*/
|
||||||
if (!addr_format_is_global(addr_format))
|
if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
|
||||||
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
|
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
|
||||||
|
|
||||||
assert(intrin->dest.ssa.num_components == 1);
|
assert(intrin->dest.ssa.num_components == 1);
|
||||||
@@ -1032,7 +1059,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
|
|||||||
assert(deref->dest.is_ssa);
|
assert(deref->dest.is_ssa);
|
||||||
switch (deref->deref_type) {
|
switch (deref->deref_type) {
|
||||||
case nir_deref_type_var:
|
case nir_deref_type_var:
|
||||||
assert(deref->mode == nir_var_shader_in);
|
assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
|
||||||
return nir_imm_intN_t(b, deref->var->data.driver_location,
|
return nir_imm_intN_t(b, deref->var->data.driver_location,
|
||||||
deref->dest.ssa.bit_size);
|
deref->dest.ssa.bit_size);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user