intel/vec4: Drop all of the 64-bit varying code
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
@@ -38,6 +38,7 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
|
|
||||||
switch (instr->intrinsic) {
|
switch (instr->intrinsic) {
|
||||||
case nir_intrinsic_load_per_vertex_input: {
|
case nir_intrinsic_load_per_vertex_input: {
|
||||||
|
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||||
/* The EmitNoIndirectInput flag guarantees our vertex index will
|
/* The EmitNoIndirectInput flag guarantees our vertex index will
|
||||||
* be constant. We should handle indirects someday.
|
* be constant. We should handle indirects someday.
|
||||||
*/
|
*/
|
||||||
@@ -46,34 +47,17 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
|
|
||||||
const unsigned input_array_stride = prog_data->urb_read_length * 2;
|
const unsigned input_array_stride = prog_data->urb_read_length * 2;
|
||||||
|
|
||||||
if (nir_dest_bit_size(instr->dest) == 64) {
|
/* Make up a type...we have no way of knowing... */
|
||||||
src = src_reg(ATTR, input_array_stride * vertex +
|
const glsl_type *const type = glsl_type::ivec(instr->num_components);
|
||||||
instr->const_index[0] + offset_reg,
|
|
||||||
glsl_type::dvec4_type);
|
|
||||||
|
|
||||||
dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
|
src = src_reg(ATTR, input_array_stride * vertex +
|
||||||
shuffle_64bit_data(tmp, src, false);
|
instr->const_index[0] + offset_reg,
|
||||||
|
type);
|
||||||
|
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
|
||||||
|
|
||||||
src = src_reg(tmp);
|
dest = get_nir_dest(instr->dest, src.type);
|
||||||
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr) / 2);
|
dest.writemask = brw_writemask_for_size(instr->num_components);
|
||||||
|
emit(MOV(dest, src));
|
||||||
/* Write to dst reg taking into account original writemask */
|
|
||||||
dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
|
|
||||||
dest.writemask = brw_writemask_for_size(instr->num_components);
|
|
||||||
emit(MOV(dest, src));
|
|
||||||
} else {
|
|
||||||
/* Make up a type...we have no way of knowing... */
|
|
||||||
const glsl_type *const type = glsl_type::ivec(instr->num_components);
|
|
||||||
|
|
||||||
src = src_reg(ATTR, input_array_stride * vertex +
|
|
||||||
instr->const_index[0] + offset_reg,
|
|
||||||
type);
|
|
||||||
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
|
|
||||||
|
|
||||||
dest = get_nir_dest(instr->dest, src.type);
|
|
||||||
dest.writemask = brw_writemask_for_size(instr->num_components);
|
|
||||||
emit(MOV(dest, src));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -407,6 +407,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
switch (instr->intrinsic) {
|
switch (instr->intrinsic) {
|
||||||
|
|
||||||
case nir_intrinsic_load_input: {
|
case nir_intrinsic_load_input: {
|
||||||
|
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||||
/* We set EmitNoIndirectInput for VS */
|
/* We set EmitNoIndirectInput for VS */
|
||||||
unsigned load_offset = nir_src_as_uint(instr->src[0]);
|
unsigned load_offset = nir_src_as_uint(instr->src[0]);
|
||||||
|
|
||||||
@@ -417,53 +418,22 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
glsl_type::uvec4_type);
|
glsl_type::uvec4_type);
|
||||||
src = retype(src, dest.type);
|
src = retype(src, dest.type);
|
||||||
|
|
||||||
bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
|
/* Swizzle source based on component layout qualifier */
|
||||||
if (is_64bit) {
|
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
|
||||||
dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
|
emit(MOV(dest, src));
|
||||||
src.swizzle = BRW_SWIZZLE_XYZW;
|
|
||||||
shuffle_64bit_data(tmp, src, false);
|
|
||||||
emit(MOV(dest, src_reg(tmp)));
|
|
||||||
} else {
|
|
||||||
/* Swizzle source based on component layout qualifier */
|
|
||||||
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
|
|
||||||
emit(MOV(dest, src));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_store_output: {
|
case nir_intrinsic_store_output: {
|
||||||
|
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||||
unsigned store_offset = nir_src_as_uint(instr->src[1]);
|
unsigned store_offset = nir_src_as_uint(instr->src[1]);
|
||||||
int varying = instr->const_index[0] + store_offset;
|
int varying = instr->const_index[0] + store_offset;
|
||||||
|
src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
|
||||||
bool is_64bit = nir_src_bit_size(instr->src[0]) == 64;
|
instr->num_components);
|
||||||
if (is_64bit) {
|
|
||||||
src_reg data;
|
|
||||||
src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF,
|
|
||||||
instr->num_components);
|
|
||||||
data = src_reg(this, glsl_type::dvec4_type);
|
|
||||||
shuffle_64bit_data(dst_reg(data), src, true);
|
|
||||||
src = retype(data, BRW_REGISTER_TYPE_F);
|
|
||||||
} else {
|
|
||||||
src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
|
|
||||||
instr->num_components);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned c = nir_intrinsic_component(instr);
|
unsigned c = nir_intrinsic_component(instr);
|
||||||
output_reg[varying][c] = dst_reg(src);
|
output_reg[varying][c] = dst_reg(src);
|
||||||
output_num_components[varying][c] = instr->num_components;
|
output_num_components[varying][c] = instr->num_components;
|
||||||
|
|
||||||
unsigned num_components = instr->num_components;
|
|
||||||
if (is_64bit)
|
|
||||||
num_components *= 2;
|
|
||||||
|
|
||||||
output_reg[varying][c] = dst_reg(src);
|
|
||||||
output_num_components[varying][c] = MIN2(4, num_components);
|
|
||||||
|
|
||||||
if (is_64bit && num_components > 4) {
|
|
||||||
assert(num_components <= 8);
|
|
||||||
output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE);
|
|
||||||
output_num_components[varying + 1][c] = num_components - 4;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -257,6 +257,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
brw_imm_d(key->input_vertices)));
|
brw_imm_d(key->input_vertices)));
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_per_vertex_input: {
|
case nir_intrinsic_load_per_vertex_input: {
|
||||||
|
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||||
src_reg indirect_offset = get_indirect_offset(instr);
|
src_reg indirect_offset = get_indirect_offset(instr);
|
||||||
unsigned imm_offset = instr->const_index[0];
|
unsigned imm_offset = instr->const_index[0];
|
||||||
|
|
||||||
@@ -264,36 +265,10 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
BRW_REGISTER_TYPE_UD);
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
unsigned first_component = nir_intrinsic_component(instr);
|
unsigned first_component = nir_intrinsic_component(instr);
|
||||||
if (nir_dest_bit_size(instr->dest) == 64) {
|
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
||||||
/* We need to emit up to two 32-bit URB reads, then shuffle
|
dst.writemask = brw_writemask_for_size(instr->num_components);
|
||||||
* the result into a temporary, then move to the destination
|
emit_input_urb_read(dst, vertex_index, imm_offset,
|
||||||
* honoring the writemask
|
first_component, indirect_offset);
|
||||||
*
|
|
||||||
* We don't need to divide first_component by 2 because
|
|
||||||
* emit_input_urb_read takes a 32-bit type.
|
|
||||||
*/
|
|
||||||
dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
|
|
||||||
dst_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
|
|
||||||
emit_input_urb_read(tmp_d, vertex_index, imm_offset,
|
|
||||||
first_component, indirect_offset);
|
|
||||||
if (instr->num_components > 2) {
|
|
||||||
emit_input_urb_read(byte_offset(tmp_d, REG_SIZE), vertex_index,
|
|
||||||
imm_offset + 1, 0, indirect_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
src_reg tmp_src = retype(src_reg(tmp_d), BRW_REGISTER_TYPE_DF);
|
|
||||||
dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
|
|
||||||
shuffle_64bit_data(shuffled, tmp_src, false);
|
|
||||||
|
|
||||||
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
|
|
||||||
dst.writemask = brw_writemask_for_size(instr->num_components);
|
|
||||||
emit(MOV(dst, src_reg(shuffled)));
|
|
||||||
} else {
|
|
||||||
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
|
||||||
dst.writemask = brw_writemask_for_size(instr->num_components);
|
|
||||||
emit_input_urb_read(dst, vertex_index, imm_offset,
|
|
||||||
first_component, indirect_offset);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_load_input:
|
case nir_intrinsic_load_input:
|
||||||
@@ -313,6 +288,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
}
|
}
|
||||||
case nir_intrinsic_store_output:
|
case nir_intrinsic_store_output:
|
||||||
case nir_intrinsic_store_per_vertex_output: {
|
case nir_intrinsic_store_per_vertex_output: {
|
||||||
|
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||||
src_reg value = get_nir_src(instr->src[0]);
|
src_reg value = get_nir_src(instr->src[0]);
|
||||||
unsigned mask = instr->const_index[1];
|
unsigned mask = instr->const_index[1];
|
||||||
unsigned swiz = BRW_SWIZZLE_XYZW;
|
unsigned swiz = BRW_SWIZZLE_XYZW;
|
||||||
@@ -322,40 +298,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
|
|
||||||
unsigned first_component = nir_intrinsic_component(instr);
|
unsigned first_component = nir_intrinsic_component(instr);
|
||||||
if (first_component) {
|
if (first_component) {
|
||||||
if (nir_src_bit_size(instr->src[0]) == 64)
|
|
||||||
first_component /= 2;
|
|
||||||
assert(swiz == BRW_SWIZZLE_XYZW);
|
assert(swiz == BRW_SWIZZLE_XYZW);
|
||||||
swiz = BRW_SWZ_COMP_OUTPUT(first_component);
|
swiz = BRW_SWZ_COMP_OUTPUT(first_component);
|
||||||
mask = mask << first_component;
|
mask = mask << first_component;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir_src_bit_size(instr->src[0]) == 64) {
|
emit_urb_write(swizzle(value, swiz), mask,
|
||||||
/* For 64-bit data we need to shuffle the data before we write and
|
imm_offset, indirect_offset);
|
||||||
* emit two messages. Also, since each channel is twice as large we
|
|
||||||
* need to fix the writemask in each 32-bit message to account for it.
|
|
||||||
*/
|
|
||||||
value = swizzle(retype(value, BRW_REGISTER_TYPE_DF), swiz);
|
|
||||||
dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
|
|
||||||
shuffle_64bit_data(shuffled, value, true);
|
|
||||||
src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F));
|
|
||||||
|
|
||||||
for (int n = 0; n < 2; n++) {
|
|
||||||
unsigned fixed_mask = 0;
|
|
||||||
if (mask & WRITEMASK_X)
|
|
||||||
fixed_mask |= WRITEMASK_XY;
|
|
||||||
if (mask & WRITEMASK_Y)
|
|
||||||
fixed_mask |= WRITEMASK_ZW;
|
|
||||||
emit_urb_write(shuffled_float, fixed_mask,
|
|
||||||
imm_offset, indirect_offset);
|
|
||||||
|
|
||||||
shuffled_float = byte_offset(shuffled_float, REG_SIZE);
|
|
||||||
mask >>= 2;
|
|
||||||
imm_offset++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
emit_urb_write(swizzle(value, swiz), mask,
|
|
||||||
imm_offset, indirect_offset);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -63,33 +63,13 @@ vec4_tes_visitor::setup_payload()
|
|||||||
if (inst->src[i].file != ATTR)
|
if (inst->src[i].file != ATTR)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bool is_64bit = type_sz(inst->src[i].type) == 8;
|
|
||||||
|
|
||||||
unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
|
unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
|
||||||
struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
|
struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
|
||||||
grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
|
grf = stride(grf, 0, 4, 1);
|
||||||
grf.swizzle = inst->src[i].swizzle;
|
grf.swizzle = inst->src[i].swizzle;
|
||||||
grf.type = inst->src[i].type;
|
grf.type = inst->src[i].type;
|
||||||
grf.abs = inst->src[i].abs;
|
grf.abs = inst->src[i].abs;
|
||||||
grf.negate = inst->src[i].negate;
|
grf.negate = inst->src[i].negate;
|
||||||
|
|
||||||
/* For 64-bit attributes we can end up with components XY in the
|
|
||||||
* second half of a register and components ZW in the first half
|
|
||||||
* of the next. Fix it up here.
|
|
||||||
*/
|
|
||||||
if (is_64bit && grf.subnr > 0) {
|
|
||||||
/* We can't do swizzles that mix XY and ZW channels in this case.
|
|
||||||
* Such cases should have been handled by the scalarization pass.
|
|
||||||
*/
|
|
||||||
assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
|
|
||||||
(brw_mask_for_swizzle(grf.swizzle) & 0xc));
|
|
||||||
if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
|
|
||||||
grf.subnr = 0;
|
|
||||||
grf.nr++;
|
|
||||||
grf.swizzle -= BRW_SWIZZLE_ZZZZ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inst->src[i] = grf;
|
inst->src[i] = grf;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -176,13 +156,11 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
|
|
||||||
case nir_intrinsic_load_input:
|
case nir_intrinsic_load_input:
|
||||||
case nir_intrinsic_load_per_vertex_input: {
|
case nir_intrinsic_load_per_vertex_input: {
|
||||||
|
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||||
src_reg indirect_offset = get_indirect_offset(instr);
|
src_reg indirect_offset = get_indirect_offset(instr);
|
||||||
unsigned imm_offset = instr->const_index[0];
|
unsigned imm_offset = instr->const_index[0];
|
||||||
src_reg header = input_read_header;
|
src_reg header = input_read_header;
|
||||||
bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
|
|
||||||
unsigned first_component = nir_intrinsic_component(instr);
|
unsigned first_component = nir_intrinsic_component(instr);
|
||||||
if (is_64bit)
|
|
||||||
first_component /= 2;
|
|
||||||
|
|
||||||
if (indirect_offset.file != BAD_FILE) {
|
if (indirect_offset.file != BAD_FILE) {
|
||||||
src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
|
src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
|
||||||
@@ -204,67 +182,33 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
*/
|
*/
|
||||||
const unsigned max_push_slots = 24;
|
const unsigned max_push_slots = 24;
|
||||||
if (imm_offset < max_push_slots) {
|
if (imm_offset < max_push_slots) {
|
||||||
const glsl_type *src_glsl_type =
|
src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
|
||||||
is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
|
|
||||||
src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
|
|
||||||
src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
|
src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
|
||||||
|
|
||||||
const brw_reg_type dst_reg_type =
|
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src));
|
||||||
is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
|
|
||||||
emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
|
|
||||||
|
|
||||||
prog_data->urb_read_length =
|
prog_data->urb_read_length =
|
||||||
MAX2(prog_data->urb_read_length,
|
MAX2(prog_data->urb_read_length,
|
||||||
DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
|
DIV_ROUND_UP(imm_offset + 1, 2));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_64bit) {
|
dst_reg temp(this, glsl_type::ivec4_type);
|
||||||
dst_reg temp(this, glsl_type::ivec4_type);
|
vec4_instruction *read =
|
||||||
vec4_instruction *read =
|
emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
|
||||||
emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
|
read->offset = imm_offset;
|
||||||
read->offset = imm_offset;
|
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
|
||||||
|
|
||||||
src_reg src = src_reg(temp);
|
src_reg src = src_reg(temp);
|
||||||
src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
|
src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
|
||||||
|
|
||||||
/* Copy to target. We might end up with some funky writemasks landing
|
/* Copy to target. We might end up with some funky writemasks landing
|
||||||
* in here, but we really don't want them in the above pseudo-ops.
|
* in here, but we really don't want them in the above pseudo-ops.
|
||||||
*/
|
*/
|
||||||
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
||||||
dst.writemask = brw_writemask_for_size(instr->num_components);
|
dst.writemask = brw_writemask_for_size(instr->num_components);
|
||||||
emit(MOV(dst, src));
|
emit(MOV(dst, src));
|
||||||
} else {
|
|
||||||
/* For 64-bit we need to load twice as many 32-bit components, and for
|
|
||||||
* dvec3/4 we need to emit 2 URB Read messages
|
|
||||||
*/
|
|
||||||
dst_reg temp(this, glsl_type::dvec4_type);
|
|
||||||
dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
|
|
||||||
|
|
||||||
vec4_instruction *read =
|
|
||||||
emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
|
|
||||||
read->offset = imm_offset;
|
|
||||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
|
||||||
|
|
||||||
if (instr->num_components > 2) {
|
|
||||||
read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
|
|
||||||
src_reg(header));
|
|
||||||
read->offset = imm_offset + 1;
|
|
||||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
|
||||||
}
|
|
||||||
|
|
||||||
src_reg temp_as_src = src_reg(temp);
|
|
||||||
temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
|
|
||||||
|
|
||||||
dst_reg shuffled(this, glsl_type::dvec4_type);
|
|
||||||
shuffle_64bit_data(shuffled, temp_as_src, false);
|
|
||||||
|
|
||||||
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
|
|
||||||
dst.writemask = brw_writemask_for_size(instr->num_components);
|
|
||||||
emit(MOV(dst, src_reg(shuffled)));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
Reference in New Issue
Block a user