nir: Vectorize intrinsics

We used to have the number of components built into the intrinsic.  This
meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4
variants.  This lead to piles of switch statements to generate the correct
intrinsic names, and introspection to figure out the number of components.
We can make things much nicer by allowing "vectorized" intrinsics.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
Jason Ekstrand
2014-12-03 17:03:19 -08:00
parent d1d12efb36
commit 27663dbe8e
9 changed files with 123 additions and 315 deletions

View File

@@ -625,7 +625,8 @@ nir_visitor::visit(ir_call *ir)
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
nir_intrinsic_instr *store_instr =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var_vec1);
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->num_components = 1;
ir->return_deref->accept(this);
store_instr->variables[0] = this->deref_head;
@@ -699,17 +700,9 @@ nir_visitor::visit(ir_assignment *ir)
* back into the LHS. Copy propagation should get rid of the mess.
*/
nir_intrinsic_op load_op;
switch (ir->lhs->type->vector_elements) {
case 1: load_op = nir_intrinsic_load_var_vec1; break;
case 2: load_op = nir_intrinsic_load_var_vec2; break;
case 3: load_op = nir_intrinsic_load_var_vec3; break;
case 4: load_op = nir_intrinsic_load_var_vec4; break;
default: unreachable("Invalid number of components"); break;
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader,
load_op);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
load->num_components = ir->lhs->type->vector_elements;
load->dest.is_ssa = true;
nir_ssa_def_init(&load->instr, &load->dest.ssa,
num_components, NULL);
@@ -754,17 +747,9 @@ nir_visitor::visit(ir_assignment *ir)
src.ssa = &vec->dest.dest.ssa;
}
nir_intrinsic_op store_op;
switch (ir->lhs->type->vector_elements) {
case 1: store_op = nir_intrinsic_store_var_vec1; break;
case 2: store_op = nir_intrinsic_store_var_vec2; break;
case 3: store_op = nir_intrinsic_store_var_vec3; break;
case 4: store_op = nir_intrinsic_store_var_vec4; break;
default: unreachable("Invalid number of components"); break;
}
nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader,
store_op);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
store->num_components = ir->lhs->type->vector_elements;
nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = src;
@@ -843,17 +828,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
* must emit a variable load.
*/
nir_intrinsic_op load_op;
switch (ir->type->vector_elements) {
case 1: load_op = nir_intrinsic_load_var_vec1; break;
case 2: load_op = nir_intrinsic_load_var_vec2; break;
case 3: load_op = nir_intrinsic_load_var_vec3; break;
case 4: load_op = nir_intrinsic_load_var_vec4; break;
default: unreachable("Invalid number of components");
}
nir_intrinsic_instr *load_instr =
nir_intrinsic_instr_create(this->shader, load_op);
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
add_instr(&load_instr->instr, ir->type->vector_elements);
}
@@ -912,23 +889,12 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_op op;
if (const_index) {
switch (ir->type->vector_elements) {
case 1: op = nir_intrinsic_load_ubo_vec1; break;
case 2: op = nir_intrinsic_load_ubo_vec2; break;
case 3: op = nir_intrinsic_load_ubo_vec3; break;
case 4: op = nir_intrinsic_load_ubo_vec4; break;
default: assert(0); break;
}
op = nir_intrinsic_load_ubo;
} else {
switch (ir->type->vector_elements) {
case 1: op = nir_intrinsic_load_ubo_vec1_indirect; break;
case 2: op = nir_intrinsic_load_ubo_vec2_indirect; break;
case 3: op = nir_intrinsic_load_ubo_vec3_indirect; break;
case 4: op = nir_intrinsic_load_ubo_vec4_indirect; break;
default: assert(0); break;
}
op = nir_intrinsic_load_ubo_indirect;
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
load->num_components = ir->type->vector_elements;
load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];
load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */
load->const_index[2] = 1; /* number of vec4's */

View File

@@ -693,6 +693,9 @@ typedef struct {
nir_dest dest;
/** number of components if this is a vectorized intrinsic */
uint8_t num_components;
int const_index[3];
nir_deref_var *variables[2];
@@ -732,12 +735,20 @@ typedef struct {
unsigned num_srcs; /** < number of register/SSA inputs */
/** number of components of each input register */
/** number of components of each input register
*
* If this value is 0, the number of components is given by the
* num_components field of nir_intrinsic_instr.
*/
unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
bool has_dest;
/** number of components of each output register */
/** number of components of the output register
*
* If this value is 0, the number of components is given by the
* num_components field of nir_intrinsic_instr.
*/
unsigned dest_components;
/** the number of inputs/outputs that are variables */

View File

@@ -42,19 +42,9 @@
#define ARR(...) { __VA_ARGS__ }
INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)
INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)
INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)
INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
/*
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
@@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)
SYSTEM_VALUE(sample_mask_in, 1)
SYSTEM_VALUE(invocation_id, 1)
#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \
INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)
#define LOAD(name, num_indices, flags) \
LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)
/*
* The first index is the address to load from, and the second index is the
* number of array elements to load. For UBO's (and SSBO's), the first index
@@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)
* elements begin immediately after the previous array element.
*/
#define LOAD(name, num_indices, flags) \
INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
@@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
* interp_at_offset* intrinsics take a second source that is either a
* sample id or a vec2 position offset.
*/
#define INTERP(name, flags) \
LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)
#define INTERP_WITH_ARG(name, src_comps, flags) \
LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)
#define INTERP(name, num_srcs, src_comps) \
INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \
0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \
0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)
INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)
INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
#define STORE(name, num_indices, flags) \
INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \
num_indices, flags) \
INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \
num_indices, flags) \
INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \
num_indices, flags) \
INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \
num_indices, flags) \
INTERP(at_centroid, 0, 0)
INTERP(at_sample, 1, 1)
INTERP(at_offset, 1, 1)
/*
* Stores work the same way as loads, except now the first register input is
@@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
* offset.
*/
#define STORE(name, num_indices, flags) \
INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
num_indices, flags) \
STORE(output, 2, 0)
/* STORE(ssbo, 3, 0) */
LAST_INTRINSIC(store_output_vec4_indirect)
LAST_INTRINSIC(store_output_indirect)

View File

@@ -189,66 +189,6 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
return base_offset;
}
static nir_intrinsic_op
get_load_op(nir_variable_mode mode, bool indirect, unsigned num_components)
{
if (indirect) {
switch (mode) {
case nir_var_shader_in:
switch (num_components) {
case 1: return nir_intrinsic_load_input_vec1_indirect;
case 2: return nir_intrinsic_load_input_vec2_indirect;
case 3: return nir_intrinsic_load_input_vec3_indirect;
case 4: return nir_intrinsic_load_input_vec4_indirect;
default: unreachable("Invalid number of components"); break;
}
break;
case nir_var_uniform:
switch (num_components) {
case 1: return nir_intrinsic_load_uniform_vec1_indirect;
case 2: return nir_intrinsic_load_uniform_vec2_indirect;
case 3: return nir_intrinsic_load_uniform_vec3_indirect;
case 4: return nir_intrinsic_load_uniform_vec4_indirect;
default: unreachable("Invalid number of components"); break;
}
break;
default:
unreachable("Invalid input type");
break;
}
} else {
switch (mode) {
case nir_var_shader_in:
switch (num_components) {
case 1: return nir_intrinsic_load_input_vec1;
case 2: return nir_intrinsic_load_input_vec2;
case 3: return nir_intrinsic_load_input_vec3;
case 4: return nir_intrinsic_load_input_vec4;
default: unreachable("Invalid number of components"); break;
}
break;
case nir_var_uniform:
switch (num_components) {
case 1: return nir_intrinsic_load_uniform_vec1;
case 2: return nir_intrinsic_load_uniform_vec2;
case 3: return nir_intrinsic_load_uniform_vec3;
case 4: return nir_intrinsic_load_uniform_vec4;
default: unreachable("Invalid number of components"); break;
}
break;
default:
unreachable("Invalid input type");
break;
}
}
return nir_intrinsic_load_input_vec1;
}
static bool
nir_lower_io_block(nir_block *block, void *void_state)
{
@@ -261,22 +201,35 @@ nir_lower_io_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_var_vec1:
case nir_intrinsic_load_var_vec2:
case nir_intrinsic_load_var_vec3:
case nir_intrinsic_load_var_vec4: {
case nir_intrinsic_load_var: {
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
if (mode != nir_var_shader_in && mode != nir_var_uniform)
continue;
bool has_indirect = deref_has_indirect(intrin->variables[0]);
unsigned num_components =
nir_intrinsic_infos[intrin->intrinsic].dest_components;
nir_intrinsic_op load_op = get_load_op(mode, has_indirect,
num_components);
nir_intrinsic_op load_op;
switch (mode) {
case nir_var_shader_in:
if (has_indirect) {
load_op = nir_intrinsic_load_input_indirect;
} else {
load_op = nir_intrinsic_load_input;
}
break;
case nir_var_uniform:
if (has_indirect) {
load_op = nir_intrinsic_load_uniform_indirect;
} else {
load_op = nir_intrinsic_load_uniform;
}
break;
default:
unreachable("Unknown variable mode");
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
load_op);
load->num_components = intrin->num_components;
nir_src indirect;
unsigned offset = get_io_offset(intrin->variables[0],
@@ -292,7 +245,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (intrin->dest.is_ssa) {
load->dest.is_ssa = true;
nir_ssa_def_init(&load->instr, &load->dest.ssa,
num_components, NULL);
intrin->num_components, NULL);
nir_src new_src = {
.is_ssa = true,
@@ -310,38 +263,22 @@ nir_lower_io_block(nir_block *block, void *void_state)
break;
}
case nir_intrinsic_store_var_vec1:
case nir_intrinsic_store_var_vec2:
case nir_intrinsic_store_var_vec3:
case nir_intrinsic_store_var_vec4: {
case nir_intrinsic_store_var: {
if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
continue;
bool has_indirect = deref_has_indirect(intrin->variables[0]);
unsigned num_components =
nir_intrinsic_infos[intrin->intrinsic].src_components[0];
nir_intrinsic_op store_op;
if (has_indirect) {
switch (num_components) {
case 1: store_op = nir_intrinsic_store_output_vec1_indirect; break;
case 2: store_op = nir_intrinsic_store_output_vec2_indirect; break;
case 3: store_op = nir_intrinsic_store_output_vec3_indirect; break;
case 4: store_op = nir_intrinsic_store_output_vec4_indirect; break;
default: unreachable("Invalid number of components"); break;
}
store_op = nir_intrinsic_store_output_indirect;
} else {
switch (num_components) {
case 1: store_op = nir_intrinsic_store_output_vec1; break;
case 2: store_op = nir_intrinsic_store_output_vec2; break;
case 3: store_op = nir_intrinsic_store_output_vec3; break;
case 4: store_op = nir_intrinsic_store_output_vec4; break;
default: unreachable("Invalid number of components"); break;
}
store_op = nir_intrinsic_store_output;
}
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
store_op);
store->num_components = intrin->num_components;
nir_src indirect;
unsigned offset = get_io_offset(intrin->variables[0],

View File

@@ -219,22 +219,18 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_var_vec1:
case nir_intrinsic_load_var_vec2:
case nir_intrinsic_load_var_vec3:
case nir_intrinsic_load_var_vec4: {
case nir_intrinsic_load_var: {
if (intrin->variables[0]->var->data.mode != nir_var_local)
continue;
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
mov->src[0].src = get_deref_reg_src(intrin->variables[0],
&intrin->instr, state);
unsigned num_components = mov->src[0].src.reg.reg->num_components;
mov->dest.write_mask = (1 << num_components) - 1;
mov->dest.write_mask = (1 << intrin->num_components) - 1;
if (intrin->dest.is_ssa) {
mov->dest.dest.is_ssa = true;
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
num_components, NULL);
intrin->num_components, NULL);
nir_src new_src = {
.is_ssa = true,
@@ -252,20 +248,16 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
break;
}
case nir_intrinsic_store_var_vec1:
case nir_intrinsic_store_var_vec2:
case nir_intrinsic_store_var_vec3:
case nir_intrinsic_store_var_vec4: {
case nir_intrinsic_store_var: {
if (intrin->variables[0]->var->data.mode != nir_var_local)
continue;
nir_src reg_src = get_deref_reg_src(intrin->variables[0],
&intrin->instr, state);
unsigned num_components = reg_src.reg.reg->num_components;
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);
mov->dest.write_mask = (1 << num_components) - 1;
mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = false;
mov->dest.dest.reg.reg = reg_src.reg.reg;
mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;

View File

@@ -30,8 +30,7 @@
static void
convert_instr(nir_intrinsic_instr *instr)
{
if (instr->intrinsic != nir_intrinsic_load_var_vec1 &&
instr->intrinsic != nir_intrinsic_load_var_vec2)
if (instr->intrinsic != nir_intrinsic_load_var)
return;
nir_variable *var = instr->variables[0]->var;

View File

@@ -445,17 +445,11 @@ fill_deref_tables_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_var_vec1:
case nir_intrinsic_load_var_vec2:
case nir_intrinsic_load_var_vec3:
case nir_intrinsic_load_var_vec4:
case nir_intrinsic_load_var:
register_load_instr(intrin, true, state);
break;
case nir_intrinsic_store_var_vec1:
case nir_intrinsic_store_var_vec2:
case nir_intrinsic_store_var_vec3:
case nir_intrinsic_store_var_vec4:
case nir_intrinsic_store_var:
register_store_instr(intrin, true, state);
break;
@@ -537,17 +531,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_deref *src_deref = nir_copy_deref(state->mem_ctx, &src_head->deref);
nir_deref *dest_deref = nir_copy_deref(state->mem_ctx, &dest_head->deref);
nir_intrinsic_op load_op;
switch (num_components) {
case 1: load_op = nir_intrinsic_load_var_vec1; break;
case 2: load_op = nir_intrinsic_load_var_vec2; break;
case 3: load_op = nir_intrinsic_load_var_vec3; break;
case 4: load_op = nir_intrinsic_load_var_vec4; break;
default: unreachable("Invalid number of components"); break;
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
load_op);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_load_var);
load->num_components = num_components;
load->variables[0] = nir_deref_as_var(src_deref);
load->dest.is_ssa = true;
nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components, NULL);
@@ -555,17 +541,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_instr_insert_before(&copy_instr->instr, &load->instr);
register_load_instr(load, false, state);
nir_intrinsic_op store_op;
switch (num_components) {
case 1: store_op = nir_intrinsic_store_var_vec1; break;
case 2: store_op = nir_intrinsic_store_var_vec2; break;
case 3: store_op = nir_intrinsic_store_var_vec3; break;
case 4: store_op = nir_intrinsic_store_var_vec4; break;
default: unreachable("Invalid number of components"); break;
}
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
store_op);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_store_var);
store->num_components = num_components;
store->variables[0] = nir_deref_as_var(dest_deref);
store->src[0].is_ssa = true;
store->src[0].ssa = &load->dest.ssa;
@@ -776,14 +754,9 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_var_vec1:
case nir_intrinsic_load_var_vec2:
case nir_intrinsic_load_var_vec3:
case nir_intrinsic_load_var_vec4: {
case nir_intrinsic_load_var: {
struct deref_node *node = get_deref_node(intrin->variables[0],
false, state);
unsigned num_chans =
nir_intrinsic_infos[intrin->intrinsic].dest_components;
if (node == NULL) {
/* If we hit this path then we are referencing an invalid
@@ -793,7 +766,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
*/
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(state->mem_ctx);
nir_ssa_def_init(&undef->instr, &undef->def, num_chans, NULL);
nir_ssa_def_init(&undef->instr, &undef->def,
intrin->num_components, NULL);
nir_instr_insert_before(&intrin->instr, &undef->instr);
nir_instr_remove(&intrin->instr);
@@ -815,14 +789,15 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
nir_op_imov);
mov->src[0].src.is_ssa = true;
mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
for (unsigned i = num_chans; i < 4; i++)
for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[0].swizzle[i] = 0;
assert(intrin->dest.is_ssa);
mov->dest.write_mask = (1 << num_chans) - 1;
mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = true;
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
intrin->num_components, NULL);
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
@@ -837,10 +812,7 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
break;
}
case nir_intrinsic_store_var_vec1:
case nir_intrinsic_store_var_vec2:
case nir_intrinsic_store_var_vec3:
case nir_intrinsic_store_var_vec4: {
case nir_intrinsic_store_var: {
struct deref_node *node = get_deref_node(intrin->variables[0],
false, state);
@@ -854,7 +826,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
if (!node->lower_to_ssa)
continue;
unsigned num_chans = glsl_get_vector_elements(node->type);
assert(intrin->num_components ==
glsl_get_vector_elements(node->type));
assert(intrin->src[0].is_ssa);
@@ -867,12 +840,12 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
mov->src[1].src.is_ssa = true;
mov->src[1].src.ssa = intrin->src[0].ssa;
for (unsigned i = num_chans; i < 4; i++)
for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[1].swizzle[i] = 0;
mov->src[2].src.is_ssa = true;
mov->src[2].src.ssa = get_ssa_def_for_block(node, block, state);
for (unsigned i = num_chans; i < 4; i++)
for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[2].swizzle[i] = 0;
} else {
@@ -880,13 +853,14 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
mov->src[0].src.is_ssa = true;
mov->src[0].src.ssa = intrin->src[0].ssa;
for (unsigned i = num_chans; i < 4; i++)
for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[0].swizzle[i] = 0;
}
mov->dest.write_mask = (1 << num_chans) - 1;
mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = true;
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
intrin->num_components, NULL);
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);

View File

@@ -331,16 +331,10 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
}
switch (instr->intrinsic) {
case nir_intrinsic_load_var_vec1:
case nir_intrinsic_load_var_vec2:
case nir_intrinsic_load_var_vec3:
case nir_intrinsic_load_var_vec4:
case nir_intrinsic_load_var:
assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
break;
case nir_intrinsic_store_var_vec1:
case nir_intrinsic_store_var_vec2:
case nir_intrinsic_store_var_vec3:
case nir_intrinsic_store_var_vec4:
case nir_intrinsic_store_var:
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
instr->variables[0]->var->data.mode != nir_var_uniform);
break;

View File

@@ -1312,14 +1312,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_uniform_vec1:
case nir_intrinsic_load_uniform_vec2:
case nir_intrinsic_load_uniform_vec3:
case nir_intrinsic_load_uniform_vec4: {
case nir_intrinsic_load_uniform: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
for (unsigned j = 0;
j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_uniforms;
src.reg_offset = instr->const_index[0] + index;
src.type = dest.type;
@@ -1335,14 +1331,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_uniform_vec1_indirect:
case nir_intrinsic_load_uniform_vec2_indirect:
case nir_intrinsic_load_uniform_vec3_indirect:
case nir_intrinsic_load_uniform_vec4_indirect: {
case nir_intrinsic_load_uniform_indirect: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
for (unsigned j = 0;
j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_uniforms;
src.reg_offset = instr->const_index[0] + index;
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
@@ -1360,10 +1352,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_ubo_vec1:
case nir_intrinsic_load_ubo_vec2:
case nir_intrinsic_load_ubo_vec3:
case nir_intrinsic_load_ubo_vec4: {
case nir_intrinsic_load_ubo: {
fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
(unsigned) instr->const_index[0]);
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
@@ -1373,8 +1362,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
packed_consts, surf_index, const_offset_reg));
for (unsigned i = 0;
i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
for (unsigned i = 0; i < instr->num_components; i++) {
packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
/* The std140 packing rules don't allow vectors to cross 16-byte
@@ -1392,10 +1380,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_ubo_vec1_indirect:
case nir_intrinsic_load_ubo_vec2_indirect:
case nir_intrinsic_load_ubo_vec3_indirect:
case nir_intrinsic_load_ubo_vec4_indirect: {
case nir_intrinsic_load_ubo_indirect: {
fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
instr->const_index[0]);
/* Turn the byte offset into a dword offset. */
@@ -1404,8 +1389,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
fs_reg(2)));
for (unsigned i = 0;
i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
for (unsigned i = 0; i < instr->num_components; i++) {
exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
offset, base_offset + i);
fs_inst *last_inst = (fs_inst *) list.get_tail();
@@ -1418,14 +1402,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_input_vec1:
case nir_intrinsic_load_input_vec2:
case nir_intrinsic_load_input_vec3:
case nir_intrinsic_load_input_vec4: {
case nir_intrinsic_load_input: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
for (unsigned j = 0;
j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_inputs;
src.reg_offset = instr->const_index[0] + index;
src.type = dest.type;
@@ -1441,14 +1421,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_input_vec1_indirect:
case nir_intrinsic_load_input_vec2_indirect:
case nir_intrinsic_load_input_vec3_indirect:
case nir_intrinsic_load_input_vec4_indirect: {
case nir_intrinsic_load_input_indirect: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
for (unsigned j = 0;
j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_inputs;
src.reg_offset = instr->const_index[0] + index;
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
@@ -1466,15 +1442,11 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_store_output_vec1:
case nir_intrinsic_store_output_vec2:
case nir_intrinsic_store_output_vec3:
case nir_intrinsic_store_output_vec4: {
case nir_intrinsic_store_output: {
fs_reg src = get_nir_src(instr->src[0]);
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
for (unsigned j = 0;
j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg new_dest = nir_outputs;
new_dest.reg_offset = instr->const_index[0] + index;
new_dest.type = src.type;
@@ -1489,16 +1461,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_store_output_vec1_indirect:
case nir_intrinsic_store_output_vec2_indirect:
case nir_intrinsic_store_output_vec3_indirect:
case nir_intrinsic_store_output_vec4_indirect: {
case nir_intrinsic_store_output_indirect: {
fs_reg src = get_nir_src(instr->src[0]);
fs_reg indirect = get_nir_src(instr->src[1]);
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
for (unsigned j = 0;
j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg new_dest = nir_outputs;
new_dest.reg_offset = instr->const_index[0] + index;
new_dest.reladdr = new(mem_ctx) fs_reg(indirect);