intel/vec4: Remove all support for Gen8+ [v2]

v2: Restore the gen == 10 hunk in brw_compile_vs (around line 2940).
This function is also used for scalar VS compiles.  Squash in:

    intel/vec4: Reindent after removing Gen8+ support
    intel/vec4: Silence unused parameter warning in try_immediate_source

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> [v1]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6826>
This commit is contained in:
Ian Romanick
2020-09-22 13:09:56 -07:00
parent 60e1d0f028
commit 2a49007411
8 changed files with 86 additions and 284 deletions

View File

@@ -1009,23 +1009,7 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
#define IS_64BIT(reg) (reg.file != BAD_FILE && type_sz(reg.type) == 8)
/* From the Cherryview and Broadwell PRMs:
*
* "When source or destination datatype is 64b or operation is integer DWord
* multiply, DepCtrl must not be used."
*
* SKL PRMs don't include this restriction, however, gen7 seems to be
* affected, at least by the 64b restriction, since DepCtrl with double
* precision instructions seems to produce GPU hangs in some cases.
*/
if (devinfo->gen == 8 || gen_device_info_is_9lp(devinfo)) {
if (inst->opcode == BRW_OPCODE_MUL &&
IS_DWORD(inst->src[0]) &&
IS_DWORD(inst->src[1]))
return true;
}
if (devinfo->gen >= 7 && devinfo->gen <= 8) {
if (devinfo->gen >= 7) {
if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) ||
IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2]))
return true;
@@ -1034,11 +1018,6 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
#undef IS_64BIT
#undef IS_DWORD
if (devinfo->gen >= 8) {
if (inst->opcode == BRW_OPCODE_F32TO16)
return true;
}
/*
* mlen:
* In the presence of send messages, totally interrupt dependency
@@ -1912,7 +1891,7 @@ vec4_visitor::lower_minmax()
src_reg
vec4_visitor::get_timestamp()
{
assert(devinfo->gen >= 7);
assert(devinfo->gen == 7);
src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_TIMESTAMP,

View File

@@ -241,7 +241,6 @@ public:
void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
src_reg fix_3src_operand(const src_reg &src);
src_reg resolve_source_modifiers(const src_reg &src);
vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
const src_reg &src1 = src_reg());

View File

@@ -78,15 +78,6 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
}
static bool
is_logic_op(enum opcode opcode)
{
return (opcode == BRW_OPCODE_AND ||
opcode == BRW_OPCODE_OR ||
opcode == BRW_OPCODE_XOR ||
opcode == BRW_OPCODE_NOT);
}
/**
* Get the origin of a copy as a single register if all components present in
* the given readmask originate from the same register and have compatible
@@ -132,8 +123,7 @@ get_copy_value(const copy_entry &entry, unsigned readmask)
}
static bool
try_constant_propagate(const struct gen_device_info *devinfo,
vec4_instruction *inst,
try_constant_propagate(vec4_instruction *inst,
int arg, const copy_entry *entry)
{
/* For constant propagation, we only handle the same constant
@@ -169,17 +159,13 @@ try_constant_propagate(const struct gen_device_info *devinfo,
}
if (inst->src[arg].abs) {
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
!brw_abs_immediate(value.type, &value.as_brw_reg())) {
if (!brw_abs_immediate(value.type, &value.as_brw_reg()))
return false;
}
}
if (inst->src[arg].negate) {
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
!brw_negate_immediate(value.type, &value.as_brw_reg())) {
if (!brw_negate_immediate(value.type, &value.as_brw_reg()))
return false;
}
}
value = swizzle(value, inst->src[arg].swizzle);
@@ -200,9 +186,7 @@ try_constant_propagate(const struct gen_device_info *devinfo,
case SHADER_OPCODE_POW:
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
if (devinfo->gen < 8)
break;
/* fallthrough */
case BRW_OPCODE_DP2:
case BRW_OPCODE_DP3:
case BRW_OPCODE_DP4:
@@ -333,11 +317,10 @@ try_copy_propagate(const struct gen_device_info *devinfo,
value.file != ATTR)
return false;
/* In gen < 8 instructions that write 2 registers also need to read 2
* registers. Make sure we don't break that restriction by copy
* propagating from a uniform.
/* Instructions that write 2 registers also need to read 2 registers. Make
* sure we don't break that restriction by copy propagating from a uniform.
*/
if (devinfo->gen < 8 && inst->size_written > REG_SIZE && is_uniform(value))
if (inst->size_written > REG_SIZE && is_uniform(value))
return false;
/* There is a regioning restriction such that if execsize == width
@@ -358,11 +341,6 @@ try_copy_propagate(const struct gen_device_info *devinfo,
if (type_sz(value.type) != type_sz(inst->src[arg].type))
return false;
if (devinfo->gen >= 8 && (value.negate || value.abs) &&
is_logic_op(inst->opcode)) {
return false;
}
if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE)
return false;
@@ -516,7 +494,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
inst->src[i].offset / REG_SIZE);
const copy_entry &entry = entries[reg];
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
if (do_constant_prop && try_constant_propagate(inst, i, &entry))
progress = true;
else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
progress = true;

View File

@@ -130,7 +130,7 @@ generate_tex(struct brw_codegen *p,
case SHADER_OPCODE_TXD:
if (inst->shadow_compare) {
/* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
assert(devinfo->gen >= 8 || devinfo->is_haswell);
assert(devinfo->is_haswell);
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
@@ -139,10 +139,6 @@ generate_tex(struct brw_codegen *p,
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_CMS_W:
assert(devinfo->gen >= 9);
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
break;
case SHADER_OPCODE_TXF_CMS:
if (devinfo->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -234,12 +230,6 @@ generate_tex(struct brw_codegen *p,
/* Set the texel offset bits in DWord 2. */
dw2 = inst->offset;
if (devinfo->gen >= 9)
/* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
* based on bit 22 in the header.
*/
dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
/* The VS, DS, and FS stages have the g0.2 payload delivered as 0,
* so header0.2 is 0 when g0 is copied. The HS and GS stages do
* not, so we must set to to 0 to avoid setting undesirable bits
@@ -472,29 +462,24 @@ generate_gs_set_vertex_count(struct brw_codegen *p,
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
if (p->devinfo->gen >= 8) {
/* Move the vertex count into the second MRF for the EOT write. */
brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
src);
} else {
/* If we think of the src and dst registers as composed of 8 DWORDs each,
* we want to pick up the contents of DWORDs 0 and 4 from src, truncate
* them to WORDs, and then pack them into DWORD 2 of dst.
*
* It's easier to get the EU to do this if we think of the src and dst
* registers as composed of 16 WORDS each; then, we want to pick up the
* contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
* of dst.
*
* We can do that by the following EU instruction:
*
* mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask }
*/
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_MOV(p,
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
}
/* If we think of the src and dst registers as composed of 8 DWORDs each,
* we want to pick up the contents of DWORDs 0 and 4 from src, truncate
* them to WORDs, and then pack them into DWORD 2 of dst.
*
* It's easier to get the EU to do this if we think of the src and dst
* registers as composed of 16 WORDS each; then, we want to pick up the
* contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
* of dst.
*
* We can do that by the following EU instruction:
*
* mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask }
*/
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_MOV(p,
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
brw_pop_insn_state(p);
}

View File

@@ -214,35 +214,17 @@ vec4_gs_visitor::emit_thread_end()
*/
int base_mrf = 1;
bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
/* If the previous instruction was a URB write, we don't need to issue
* a second one - we can just set the EOT bit on the previous write.
*
* Skip this on Gen8+ unless there's a static vertex count, as we also
* need to write the vertex count out, and combining the two may not be
* possible (or at least not straightforward).
*/
vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
if (last && last->opcode == GS_OPCODE_URB_WRITE &&
!(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
devinfo->gen >= 8 && static_vertex_count) {
last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
return;
}
current_annotation = "thread end";
dst_reg mrf_reg(MRF, base_mrf);
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
if (devinfo->gen < 8 || !static_vertex_count)
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
inst = emit(GS_OPCODE_THREAD_END);
inst->base_mrf = base_mrf;
inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
inst->mlen = 1;
}
@@ -279,12 +261,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
inst->offset = gs_prog_data->control_data_header_size_hwords;
/* We need to increment Global Offset by 1 to make room for Broadwell's
* extra "Vertex Count" payload at the beginning of the URB entry.
*/
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset++;
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
return inst;
}
@@ -398,13 +374,6 @@ vec4_gs_visitor::emit_control_data_bits()
inst->force_writemask_all = true;
inst = emit(GS_OPCODE_URB_WRITE);
inst->urb_write_flags = urb_write_flags;
/* We need to increment Global Offset by 256-bits to make room for
* Broadwell's extra "Vertex Count" payload at the beginning of the
* URB entry. Since this is an OWord message, Global Offset is counted
* in 128-bit units, so we must set it to 2.
*/
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset = 2;
inst->base_mrf = base_mrf;
inst->mlen = 2;
}

View File

@@ -283,10 +283,7 @@ static src_reg
setup_imm_df(const vec4_builder &bld, double v)
{
const gen_device_info *devinfo = bld.shader->devinfo;
assert(devinfo->gen >= 7);
if (devinfo->gen >= 8)
return brw_imm_df(v);
assert(devinfo->gen == 7);
/* gen7.5 does not support DF immediates straighforward but the DIM
* instruction allows to set the 64-bit immediate value.
@@ -463,7 +460,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
case nir_intrinsic_store_ssbo: {
assert(devinfo->gen >= 7);
assert(devinfo->gen == 7);
/* brw_nir_lower_mem_access_bit_sizes takes care of this */
assert(nir_src_bit_size(instr->src[0]) == 32);
@@ -525,7 +522,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
case nir_intrinsic_load_ssbo: {
assert(devinfo->gen >= 7);
assert(devinfo->gen == 7);
/* brw_nir_lower_mem_access_bit_sizes takes care of this */
assert(nir_dest_bit_size(instr->dest) == 32);
@@ -867,16 +864,6 @@ emit_find_msb_using_lzd(const vec4_builder &bld,
void
vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
{
/* BDW PRM vol 15 - workarounds:
* DF->f format conversion for Align16 has wrong emask calculation when
* source is immediate.
*/
if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F &&
src.file == BRW_IMMEDIATE_VALUE) {
emit(MOV(dst, brw_imm_f(src.df)));
return;
}
enum opcode op;
switch (dst.type) {
case BRW_REGISTER_TYPE_D:
@@ -932,8 +919,7 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src)
*/
static int
try_immediate_source(const nir_alu_instr *instr, src_reg *op,
bool try_src0_also,
ASSERTED const gen_device_info *devinfo)
bool try_src0_also)
{
unsigned idx;
@@ -982,16 +968,8 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
if (op[idx].abs)
d = MAX2(-d, d);
if (op[idx].negate) {
/* On Gen8+ a negation source modifier on a logical operation means
* something different. Nothing should generate this, so assert that
* it does not occur.
*/
assert(devinfo->gen < 8 || (instr->op != nir_op_iand &&
instr->op != nir_op_ior &&
instr->op != nir_op_ixor));
if (op[idx].negate)
d = -d;
}
op[idx] = retype(src_reg(brw_imm_d(d)), old_type);
break;
@@ -1146,7 +1124,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
switch (instr->op) {
case nir_op_mov:
try_immediate_source(instr, &op[0], true, devinfo);
try_immediate_source(instr, &op[0], true);
inst = emit(MOV(dst, op[0]));
break;
@@ -1197,7 +1175,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
assert(nir_dest_bit_size(instr->dest.dest) < 64);
/* fall through */
case nir_op_fadd:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit(ADD(dst, op[0], op[1]));
break;
@@ -1208,42 +1186,39 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_fmul:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit(MUL(dst, op[0], op[1]));
break;
case nir_op_imul: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
if (devinfo->gen < 8) {
/* For integer multiplication, the MUL uses the low 16 bits of one of
* the operands (src0 through SNB, src1 on IVB and later). The MACH
* accumulates in the contribution of the upper 16 bits of that
* operand. If we can determine that one of the args is in the low
* 16 bits, though, we can just emit a single MUL.
*/
if (nir_src_is_const(instr->src[0].src) &&
nir_alu_instr_src_read_mask(instr, 0) == 1 &&
const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
if (devinfo->gen < 7)
emit(MUL(dst, op[0], op[1]));
else
emit(MUL(dst, op[1], op[0]));
} else if (nir_src_is_const(instr->src[1].src) &&
nir_alu_instr_src_read_mask(instr, 1) == 1 &&
const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
if (devinfo->gen < 7)
emit(MUL(dst, op[1], op[0]));
else
emit(MUL(dst, op[0], op[1]));
} else {
struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
emit(MUL(acc, op[0], op[1]));
emit(MACH(dst_null_d(), op[0], op[1]));
emit(MOV(dst, src_reg(acc)));
}
/* For integer multiplication, the MUL uses the low 16 bits of one of
* the operands (src0 through SNB, src1 on IVB and later). The MACH
* accumulates in the contribution of the upper 16 bits of that
* operand. If we can determine that one of the args is in the low
* 16 bits, though, we can just emit a single MUL.
*/
if (nir_src_is_const(instr->src[0].src) &&
nir_alu_instr_src_read_mask(instr, 0) == 1 &&
const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
if (devinfo->gen < 7)
emit(MUL(dst, op[0], op[1]));
else
emit(MUL(dst, op[1], op[0]));
} else if (nir_src_is_const(instr->src[1].src) &&
nir_alu_instr_src_read_mask(instr, 1) == 1 &&
const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
if (devinfo->gen < 7)
emit(MUL(dst, op[1], op[0]));
else
emit(MUL(dst, op[0], op[1]));
} else {
emit(MUL(dst, op[0], op[1]));
struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
emit(MUL(acc, op[0], op[1]));
emit(MACH(dst_null_d(), op[0], op[1]));
emit(MOV(dst, src_reg(acc)));
}
break;
}
@@ -1253,11 +1228,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
assert(nir_dest_bit_size(instr->dest.dest) < 64);
struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
if (devinfo->gen >= 8)
emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW)));
else
emit(MUL(acc, op[0], op[1]));
emit(MUL(acc, op[0], op[1]));
emit(MACH(dst, op[0], op[1]));
break;
}
@@ -1433,7 +1404,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
assert(nir_dest_bit_size(instr->dest.dest) < 64);
/* fall through */
case nir_op_fmin:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
break;
@@ -1442,7 +1413,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
assert(nir_dest_bit_size(instr->dest.dest) < 64);
/* fall through */
case nir_op_fmax:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
break;
@@ -1473,7 +1444,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
/* If the order of the sources is changed due to an immediate value,
* then the condition must also be changed.
*/
if (try_immediate_source(instr, op, true, devinfo) == 0)
if (try_immediate_source(instr, op, true) == 0)
conditional_mod = brw_swap_cmod(conditional_mod);
emit(CMP(dst, op[0], op[1], conditional_mod));
@@ -1533,39 +1504,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_inot:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
if (devinfo->gen >= 8) {
op[0] = resolve_source_modifiers(op[0]);
}
emit(NOT(dst, op[0]));
break;
case nir_op_ixor:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
if (devinfo->gen >= 8) {
op[0] = resolve_source_modifiers(op[0]);
op[1] = resolve_source_modifiers(op[1]);
}
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
emit(XOR(dst, op[0], op[1]));
break;
case nir_op_ior:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
if (devinfo->gen >= 8) {
op[0] = resolve_source_modifiers(op[0]);
op[1] = resolve_source_modifiers(op[1]);
}
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
emit(OR(dst, op[0], op[1]));
break;
case nir_op_iand:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
if (devinfo->gen >= 8) {
op[0] = resolve_source_modifiers(op[0]);
op[1] = resolve_source_modifiers(op[1]);
}
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
emit(AND(dst, op[0], op[1]));
break;
@@ -1843,19 +1799,19 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_ishl:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
try_immediate_source(instr, op, false, devinfo);
try_immediate_source(instr, op, false);
emit(SHL(dst, op[0], op[1]));
break;
case nir_op_ishr:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
try_immediate_source(instr, op, false, devinfo);
try_immediate_source(instr, op, false);
emit(ASR(dst, op[0], op[1]));
break;
case nir_op_ushr:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
try_immediate_source(instr, op, false, devinfo);
try_immediate_source(instr, op, false);
emit(SHR(dst, op[0], op[1]));
break;
@@ -1902,22 +1858,22 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_fdot_replicated2:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
break;
case nir_op_fdot_replicated3:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
break;
case nir_op_fdot_replicated4:
try_immediate_source(instr, op, true, devinfo);
try_immediate_source(instr, op, true);
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
break;
case nir_op_fdph_replicated:
try_immediate_source(instr, op, false, devinfo);
try_immediate_source(instr, op, false);
inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
break;

View File

@@ -163,8 +163,7 @@ namespace brw {
unsigned dims, unsigned size,
brw_predicate pred)
{
const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
bld.shader->devinfo->is_haswell);
const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
emit_insert(bld, addr, dims, has_simd4x2),
has_simd4x2 ? 1 : dims,
@@ -185,8 +184,7 @@ namespace brw {
unsigned dims, unsigned rsize, unsigned op,
brw_predicate pred)
{
const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
bld.shader->devinfo->is_haswell);
const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
/* Zip the components of both sources, they are represented as the X
* and Y components of the same vector.

View File

@@ -305,23 +305,10 @@ vec4_visitor::fix_3src_operand(const src_reg &src)
return src_reg(expanded);
}
src_reg
vec4_visitor::resolve_source_modifiers(const src_reg &src)
{
if (!src.abs && !src.negate)
return src;
dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
resolved.type = src.type;
emit(MOV(resolved, src));
return src_reg(resolved);
}
src_reg
vec4_visitor::fix_math_operand(const src_reg &src)
{
if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
if (devinfo->gen < 6 || src.file == BAD_FILE)
return src;
/* The gen6 math instruction ignores the source modifiers --
@@ -753,35 +740,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
vec4_instruction *pull;
if (devinfo->gen >= 9) {
/* Gen9+ needs a message header in order to use SIMD4x2 mode */
src_reg header(this, glsl_type::uvec4_type, 2);
pull = new(mem_ctx)
vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
dst_reg(header));
if (before_inst)
emit_before(before_block, before_inst, pull);
else
emit(pull);
dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE),
offset_reg.type);
pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
if (before_inst)
emit_before(before_block, before_inst, pull);
else
emit(pull);
pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
dst,
surf_index,
header);
pull->mlen = 2;
pull->header_size = 1;
} else if (devinfo->gen >= 7) {
if (devinfo->gen >= 7) {
dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
grf_offset.type = offset_reg.type;
@@ -838,24 +797,9 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
inst->base_mrf = 2;
inst->src[1] = surface;
inst->src[2] = brw_imm_ud(0); /* sampler */
inst->mlen = 1;
int param_base;
if (devinfo->gen >= 9) {
/* Gen9+ needs a message header in order to use SIMD4x2 mode */
vec4_instruction *header_inst = new(mem_ctx)
vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
dst_reg(MRF, inst->base_mrf));
emit(header_inst);
inst->mlen = 2;
inst->header_size = 1;
param_base = inst->base_mrf + 1;
} else {
inst->mlen = 1;
param_base = inst->base_mrf;
}
const int param_base = inst->base_mrf;
/* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
int coord_mask = (1 << coordinate_type->vector_elements) - 1;
@@ -874,7 +818,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
bool
vec4_visitor::is_high_sampler(src_reg sampler)
{
if (devinfo->gen < 8 && !devinfo->is_haswell)
if (!devinfo->is_haswell)
return false;
return sampler.file != IMM || sampler.ud >= 16;
@@ -902,8 +846,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
case ir_txl: opcode = SHADER_OPCODE_TXL; break;
case ir_txd: opcode = SHADER_OPCODE_TXD; break;
case ir_txf: opcode = SHADER_OPCODE_TXF; break;
case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
SHADER_OPCODE_TXF_CMS); break;
case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
case ir_txs: opcode = SHADER_OPCODE_TXS; break;
case ir_tg4: opcode = offset_value.file != BAD_FILE
? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
@@ -937,7 +880,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
* - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
*/
inst->header_size =
(devinfo->gen < 5 || devinfo->gen >= 9 ||
(devinfo->gen < 5 ||
inst->offset != 0 || op == ir_tg4 ||
op == ir_texture_samples ||
is_high_sampler(sampler_reg)) ? 1 : 0;
@@ -1705,11 +1648,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, ADD(dst_reg(offset), indirect,
brw_imm_ud(reg_offset * 16)));
} else if (devinfo->gen >= 8) {
/* Store the offset in a GRF so we can send-from-GRF. */
offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, MOV(dst_reg(offset),
brw_imm_ud(reg_offset * 16)));
} else {
offset = brw_imm_d(reg_offset * 16);
}