intel/vec4: Remove all support for Gen8+ [v2]
v2: Restore the gen == 10 hunk in brw_compile_vs (around line 2940). This function is also used for scalar VS compiles. Squash in: intel/vec4: Reindent after removing Gen8+ support intel/vec4: Silence unused parameter warning in try_immediate_source Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> [v1] Reviewed-by: Matt Turner <mattst88@gmail.com> [v1] Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6826>
This commit is contained in:
@@ -1009,23 +1009,7 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
|
||||
|
||||
#define IS_64BIT(reg) (reg.file != BAD_FILE && type_sz(reg.type) == 8)
|
||||
|
||||
/* From the Cherryview and Broadwell PRMs:
|
||||
*
|
||||
* "When source or destination datatype is 64b or operation is integer DWord
|
||||
* multiply, DepCtrl must not be used."
|
||||
*
|
||||
* SKL PRMs don't include this restriction, however, gen7 seems to be
|
||||
* affected, at least by the 64b restriction, since DepCtrl with double
|
||||
* precision instructions seems to produce GPU hangs in some cases.
|
||||
*/
|
||||
if (devinfo->gen == 8 || gen_device_info_is_9lp(devinfo)) {
|
||||
if (inst->opcode == BRW_OPCODE_MUL &&
|
||||
IS_DWORD(inst->src[0]) &&
|
||||
IS_DWORD(inst->src[1]))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (devinfo->gen >= 7 && devinfo->gen <= 8) {
|
||||
if (devinfo->gen >= 7) {
|
||||
if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) ||
|
||||
IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2]))
|
||||
return true;
|
||||
@@ -1034,11 +1018,6 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
|
||||
#undef IS_64BIT
|
||||
#undef IS_DWORD
|
||||
|
||||
if (devinfo->gen >= 8) {
|
||||
if (inst->opcode == BRW_OPCODE_F32TO16)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* mlen:
|
||||
* In the presence of send messages, totally interrupt dependency
|
||||
@@ -1912,7 +1891,7 @@ vec4_visitor::lower_minmax()
|
||||
src_reg
|
||||
vec4_visitor::get_timestamp()
|
||||
{
|
||||
assert(devinfo->gen >= 7);
|
||||
assert(devinfo->gen == 7);
|
||||
|
||||
src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
|
||||
BRW_ARF_TIMESTAMP,
|
||||
|
@@ -241,7 +241,6 @@ public:
|
||||
void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
|
||||
|
||||
src_reg fix_3src_operand(const src_reg &src);
|
||||
src_reg resolve_source_modifiers(const src_reg &src);
|
||||
|
||||
vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
|
||||
const src_reg &src1 = src_reg());
|
||||
|
@@ -78,15 +78,6 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
|
||||
inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_logic_op(enum opcode opcode)
|
||||
{
|
||||
return (opcode == BRW_OPCODE_AND ||
|
||||
opcode == BRW_OPCODE_OR ||
|
||||
opcode == BRW_OPCODE_XOR ||
|
||||
opcode == BRW_OPCODE_NOT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the origin of a copy as a single register if all components present in
|
||||
* the given readmask originate from the same register and have compatible
|
||||
@@ -132,8 +123,7 @@ get_copy_value(const copy_entry &entry, unsigned readmask)
|
||||
}
|
||||
|
||||
static bool
|
||||
try_constant_propagate(const struct gen_device_info *devinfo,
|
||||
vec4_instruction *inst,
|
||||
try_constant_propagate(vec4_instruction *inst,
|
||||
int arg, const copy_entry *entry)
|
||||
{
|
||||
/* For constant propagation, we only handle the same constant
|
||||
@@ -169,17 +159,13 @@ try_constant_propagate(const struct gen_device_info *devinfo,
|
||||
}
|
||||
|
||||
if (inst->src[arg].abs) {
|
||||
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
|
||||
!brw_abs_immediate(value.type, &value.as_brw_reg())) {
|
||||
if (!brw_abs_immediate(value.type, &value.as_brw_reg()))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->src[arg].negate) {
|
||||
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
|
||||
!brw_negate_immediate(value.type, &value.as_brw_reg())) {
|
||||
if (!brw_negate_immediate(value.type, &value.as_brw_reg()))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
value = swizzle(value, inst->src[arg].swizzle);
|
||||
@@ -200,9 +186,7 @@ try_constant_propagate(const struct gen_device_info *devinfo,
|
||||
case SHADER_OPCODE_POW:
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
if (devinfo->gen < 8)
|
||||
break;
|
||||
/* fallthrough */
|
||||
case BRW_OPCODE_DP2:
|
||||
case BRW_OPCODE_DP3:
|
||||
case BRW_OPCODE_DP4:
|
||||
@@ -333,11 +317,10 @@ try_copy_propagate(const struct gen_device_info *devinfo,
|
||||
value.file != ATTR)
|
||||
return false;
|
||||
|
||||
/* In gen < 8 instructions that write 2 registers also need to read 2
|
||||
* registers. Make sure we don't break that restriction by copy
|
||||
* propagating from a uniform.
|
||||
/* Instructions that write 2 registers also need to read 2 registers. Make
|
||||
* sure we don't break that restriction by copy propagating from a uniform.
|
||||
*/
|
||||
if (devinfo->gen < 8 && inst->size_written > REG_SIZE && is_uniform(value))
|
||||
if (inst->size_written > REG_SIZE && is_uniform(value))
|
||||
return false;
|
||||
|
||||
/* There is a regioning restriction such that if execsize == width
|
||||
@@ -358,11 +341,6 @@ try_copy_propagate(const struct gen_device_info *devinfo,
|
||||
if (type_sz(value.type) != type_sz(inst->src[arg].type))
|
||||
return false;
|
||||
|
||||
if (devinfo->gen >= 8 && (value.negate || value.abs) &&
|
||||
is_logic_op(inst->opcode)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE)
|
||||
return false;
|
||||
|
||||
@@ -516,7 +494,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
|
||||
inst->src[i].offset / REG_SIZE);
|
||||
const copy_entry &entry = entries[reg];
|
||||
|
||||
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
|
||||
if (do_constant_prop && try_constant_propagate(inst, i, &entry))
|
||||
progress = true;
|
||||
else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
|
||||
progress = true;
|
||||
|
@@ -130,7 +130,7 @@ generate_tex(struct brw_codegen *p,
|
||||
case SHADER_OPCODE_TXD:
|
||||
if (inst->shadow_compare) {
|
||||
/* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
|
||||
assert(devinfo->gen >= 8 || devinfo->is_haswell);
|
||||
assert(devinfo->is_haswell);
|
||||
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
|
||||
} else {
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
@@ -139,10 +139,6 @@ generate_tex(struct brw_codegen *p,
|
||||
case SHADER_OPCODE_TXF:
|
||||
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
assert(devinfo->gen >= 9);
|
||||
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
if (devinfo->gen >= 7)
|
||||
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
|
||||
@@ -234,12 +230,6 @@ generate_tex(struct brw_codegen *p,
|
||||
/* Set the texel offset bits in DWord 2. */
|
||||
dw2 = inst->offset;
|
||||
|
||||
if (devinfo->gen >= 9)
|
||||
/* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
|
||||
* based on bit 22 in the header.
|
||||
*/
|
||||
dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
|
||||
|
||||
/* The VS, DS, and FS stages have the g0.2 payload delivered as 0,
|
||||
* so header0.2 is 0 when g0 is copied. The HS and GS stages do
|
||||
* not, so we must set to to 0 to avoid setting undesirable bits
|
||||
@@ -472,29 +462,24 @@ generate_gs_set_vertex_count(struct brw_codegen *p,
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
if (p->devinfo->gen >= 8) {
|
||||
/* Move the vertex count into the second MRF for the EOT write. */
|
||||
brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
|
||||
src);
|
||||
} else {
|
||||
/* If we think of the src and dst registers as composed of 8 DWORDs each,
|
||||
* we want to pick up the contents of DWORDs 0 and 4 from src, truncate
|
||||
* them to WORDs, and then pack them into DWORD 2 of dst.
|
||||
*
|
||||
* It's easier to get the EU to do this if we think of the src and dst
|
||||
* registers as composed of 16 WORDS each; then, we want to pick up the
|
||||
* contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
|
||||
* of dst.
|
||||
*
|
||||
* We can do that by the following EU instruction:
|
||||
*
|
||||
* mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask }
|
||||
*/
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_MOV(p,
|
||||
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
|
||||
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
|
||||
}
|
||||
/* If we think of the src and dst registers as composed of 8 DWORDs each,
|
||||
* we want to pick up the contents of DWORDs 0 and 4 from src, truncate
|
||||
* them to WORDs, and then pack them into DWORD 2 of dst.
|
||||
*
|
||||
* It's easier to get the EU to do this if we think of the src and dst
|
||||
* registers as composed of 16 WORDS each; then, we want to pick up the
|
||||
* contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
|
||||
* of dst.
|
||||
*
|
||||
* We can do that by the following EU instruction:
|
||||
*
|
||||
* mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask }
|
||||
*/
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_MOV(p,
|
||||
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
|
||||
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
|
@@ -214,35 +214,17 @@ vec4_gs_visitor::emit_thread_end()
|
||||
*/
|
||||
int base_mrf = 1;
|
||||
|
||||
bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
|
||||
|
||||
/* If the previous instruction was a URB write, we don't need to issue
|
||||
* a second one - we can just set the EOT bit on the previous write.
|
||||
*
|
||||
* Skip this on Gen8+ unless there's a static vertex count, as we also
|
||||
* need to write the vertex count out, and combining the two may not be
|
||||
* possible (or at least not straightforward).
|
||||
*/
|
||||
vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
|
||||
if (last && last->opcode == GS_OPCODE_URB_WRITE &&
|
||||
!(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
|
||||
devinfo->gen >= 8 && static_vertex_count) {
|
||||
last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
|
||||
return;
|
||||
}
|
||||
|
||||
current_annotation = "thread end";
|
||||
dst_reg mrf_reg(MRF, base_mrf);
|
||||
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
|
||||
inst->force_writemask_all = true;
|
||||
if (devinfo->gen < 8 || !static_vertex_count)
|
||||
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
|
||||
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
emit_shader_time_end();
|
||||
inst = emit(GS_OPCODE_THREAD_END);
|
||||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
|
||||
inst->mlen = 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -279,12 +261,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
|
||||
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
|
||||
inst->offset = gs_prog_data->control_data_header_size_hwords;
|
||||
|
||||
/* We need to increment Global Offset by 1 to make room for Broadwell's
|
||||
* extra "Vertex Count" payload at the beginning of the URB entry.
|
||||
*/
|
||||
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
|
||||
inst->offset++;
|
||||
|
||||
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
return inst;
|
||||
}
|
||||
@@ -398,13 +374,6 @@ vec4_gs_visitor::emit_control_data_bits()
|
||||
inst->force_writemask_all = true;
|
||||
inst = emit(GS_OPCODE_URB_WRITE);
|
||||
inst->urb_write_flags = urb_write_flags;
|
||||
/* We need to increment Global Offset by 256-bits to make room for
|
||||
* Broadwell's extra "Vertex Count" payload at the beginning of the
|
||||
* URB entry. Since this is an OWord message, Global Offset is counted
|
||||
* in 128-bit units, so we must set it to 2.
|
||||
*/
|
||||
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
|
||||
inst->offset = 2;
|
||||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = 2;
|
||||
}
|
||||
|
@@ -283,10 +283,7 @@ static src_reg
|
||||
setup_imm_df(const vec4_builder &bld, double v)
|
||||
{
|
||||
const gen_device_info *devinfo = bld.shader->devinfo;
|
||||
assert(devinfo->gen >= 7);
|
||||
|
||||
if (devinfo->gen >= 8)
|
||||
return brw_imm_df(v);
|
||||
assert(devinfo->gen == 7);
|
||||
|
||||
/* gen7.5 does not support DF immediates straighforward but the DIM
|
||||
* instruction allows to set the 64-bit immediate value.
|
||||
@@ -463,7 +460,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_ssbo: {
|
||||
assert(devinfo->gen >= 7);
|
||||
assert(devinfo->gen == 7);
|
||||
|
||||
/* brw_nir_lower_mem_access_bit_sizes takes care of this */
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
@@ -525,7 +522,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ssbo: {
|
||||
assert(devinfo->gen >= 7);
|
||||
assert(devinfo->gen == 7);
|
||||
|
||||
/* brw_nir_lower_mem_access_bit_sizes takes care of this */
|
||||
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||
@@ -867,16 +864,6 @@ emit_find_msb_using_lzd(const vec4_builder &bld,
|
||||
void
|
||||
vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
|
||||
{
|
||||
/* BDW PRM vol 15 - workarounds:
|
||||
* DF->f format conversion for Align16 has wrong emask calculation when
|
||||
* source is immediate.
|
||||
*/
|
||||
if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F &&
|
||||
src.file == BRW_IMMEDIATE_VALUE) {
|
||||
emit(MOV(dst, brw_imm_f(src.df)));
|
||||
return;
|
||||
}
|
||||
|
||||
enum opcode op;
|
||||
switch (dst.type) {
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
@@ -932,8 +919,7 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src)
|
||||
*/
|
||||
static int
|
||||
try_immediate_source(const nir_alu_instr *instr, src_reg *op,
|
||||
bool try_src0_also,
|
||||
ASSERTED const gen_device_info *devinfo)
|
||||
bool try_src0_also)
|
||||
{
|
||||
unsigned idx;
|
||||
|
||||
@@ -982,16 +968,8 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
|
||||
if (op[idx].abs)
|
||||
d = MAX2(-d, d);
|
||||
|
||||
if (op[idx].negate) {
|
||||
/* On Gen8+ a negation source modifier on a logical operation means
|
||||
* something different. Nothing should generate this, so assert that
|
||||
* it does not occur.
|
||||
*/
|
||||
assert(devinfo->gen < 8 || (instr->op != nir_op_iand &&
|
||||
instr->op != nir_op_ior &&
|
||||
instr->op != nir_op_ixor));
|
||||
if (op[idx].negate)
|
||||
d = -d;
|
||||
}
|
||||
|
||||
op[idx] = retype(src_reg(brw_imm_d(d)), old_type);
|
||||
break;
|
||||
@@ -1146,7 +1124,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
|
||||
switch (instr->op) {
|
||||
case nir_op_mov:
|
||||
try_immediate_source(instr, &op[0], true, devinfo);
|
||||
try_immediate_source(instr, &op[0], true);
|
||||
inst = emit(MOV(dst, op[0]));
|
||||
break;
|
||||
|
||||
@@ -1197,7 +1175,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
/* fall through */
|
||||
case nir_op_fadd:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit(ADD(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
@@ -1208,42 +1186,39 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
break;
|
||||
|
||||
case nir_op_fmul:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit(MUL(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case nir_op_imul: {
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
if (devinfo->gen < 8) {
|
||||
/* For integer multiplication, the MUL uses the low 16 bits of one of
|
||||
* the operands (src0 through SNB, src1 on IVB and later). The MACH
|
||||
* accumulates in the contribution of the upper 16 bits of that
|
||||
* operand. If we can determine that one of the args is in the low
|
||||
* 16 bits, though, we can just emit a single MUL.
|
||||
*/
|
||||
if (nir_src_is_const(instr->src[0].src) &&
|
||||
nir_alu_instr_src_read_mask(instr, 0) == 1 &&
|
||||
const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
|
||||
if (devinfo->gen < 7)
|
||||
emit(MUL(dst, op[0], op[1]));
|
||||
else
|
||||
emit(MUL(dst, op[1], op[0]));
|
||||
} else if (nir_src_is_const(instr->src[1].src) &&
|
||||
nir_alu_instr_src_read_mask(instr, 1) == 1 &&
|
||||
const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
|
||||
if (devinfo->gen < 7)
|
||||
emit(MUL(dst, op[1], op[0]));
|
||||
else
|
||||
emit(MUL(dst, op[0], op[1]));
|
||||
} else {
|
||||
struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
|
||||
|
||||
emit(MUL(acc, op[0], op[1]));
|
||||
emit(MACH(dst_null_d(), op[0], op[1]));
|
||||
emit(MOV(dst, src_reg(acc)));
|
||||
}
|
||||
/* For integer multiplication, the MUL uses the low 16 bits of one of
|
||||
* the operands (src0 through SNB, src1 on IVB and later). The MACH
|
||||
* accumulates in the contribution of the upper 16 bits of that
|
||||
* operand. If we can determine that one of the args is in the low
|
||||
* 16 bits, though, we can just emit a single MUL.
|
||||
*/
|
||||
if (nir_src_is_const(instr->src[0].src) &&
|
||||
nir_alu_instr_src_read_mask(instr, 0) == 1 &&
|
||||
const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
|
||||
if (devinfo->gen < 7)
|
||||
emit(MUL(dst, op[0], op[1]));
|
||||
else
|
||||
emit(MUL(dst, op[1], op[0]));
|
||||
} else if (nir_src_is_const(instr->src[1].src) &&
|
||||
nir_alu_instr_src_read_mask(instr, 1) == 1 &&
|
||||
const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
|
||||
if (devinfo->gen < 7)
|
||||
emit(MUL(dst, op[1], op[0]));
|
||||
else
|
||||
emit(MUL(dst, op[0], op[1]));
|
||||
} else {
|
||||
emit(MUL(dst, op[0], op[1]));
|
||||
struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
|
||||
|
||||
emit(MUL(acc, op[0], op[1]));
|
||||
emit(MACH(dst_null_d(), op[0], op[1]));
|
||||
emit(MOV(dst, src_reg(acc)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1253,11 +1228,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
|
||||
|
||||
if (devinfo->gen >= 8)
|
||||
emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW)));
|
||||
else
|
||||
emit(MUL(acc, op[0], op[1]));
|
||||
|
||||
emit(MUL(acc, op[0], op[1]));
|
||||
emit(MACH(dst, op[0], op[1]));
|
||||
break;
|
||||
}
|
||||
@@ -1433,7 +1404,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
/* fall through */
|
||||
case nir_op_fmin:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
|
||||
break;
|
||||
|
||||
@@ -1442,7 +1413,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
/* fall through */
|
||||
case nir_op_fmax:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
|
||||
break;
|
||||
|
||||
@@ -1473,7 +1444,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
/* If the order of the sources is changed due to an immediate value,
|
||||
* then the condition must also be changed.
|
||||
*/
|
||||
if (try_immediate_source(instr, op, true, devinfo) == 0)
|
||||
if (try_immediate_source(instr, op, true) == 0)
|
||||
conditional_mod = brw_swap_cmod(conditional_mod);
|
||||
|
||||
emit(CMP(dst, op[0], op[1], conditional_mod));
|
||||
@@ -1533,39 +1504,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
|
||||
case nir_op_inot:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
if (devinfo->gen >= 8) {
|
||||
op[0] = resolve_source_modifiers(op[0]);
|
||||
}
|
||||
emit(NOT(dst, op[0]));
|
||||
break;
|
||||
|
||||
case nir_op_ixor:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
if (devinfo->gen >= 8) {
|
||||
op[0] = resolve_source_modifiers(op[0]);
|
||||
op[1] = resolve_source_modifiers(op[1]);
|
||||
}
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
emit(XOR(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case nir_op_ior:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
if (devinfo->gen >= 8) {
|
||||
op[0] = resolve_source_modifiers(op[0]);
|
||||
op[1] = resolve_source_modifiers(op[1]);
|
||||
}
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
emit(OR(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case nir_op_iand:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
if (devinfo->gen >= 8) {
|
||||
op[0] = resolve_source_modifiers(op[0]);
|
||||
op[1] = resolve_source_modifiers(op[1]);
|
||||
}
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
emit(AND(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
@@ -1843,19 +1799,19 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
|
||||
case nir_op_ishl:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
try_immediate_source(instr, op, false, devinfo);
|
||||
try_immediate_source(instr, op, false);
|
||||
emit(SHL(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case nir_op_ishr:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
try_immediate_source(instr, op, false, devinfo);
|
||||
try_immediate_source(instr, op, false);
|
||||
emit(ASR(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case nir_op_ushr:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
try_immediate_source(instr, op, false, devinfo);
|
||||
try_immediate_source(instr, op, false);
|
||||
emit(SHR(dst, op[0], op[1]));
|
||||
break;
|
||||
|
||||
@@ -1902,22 +1858,22 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
break;
|
||||
|
||||
case nir_op_fdot_replicated2:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
|
||||
break;
|
||||
|
||||
case nir_op_fdot_replicated3:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
|
||||
break;
|
||||
|
||||
case nir_op_fdot_replicated4:
|
||||
try_immediate_source(instr, op, true, devinfo);
|
||||
try_immediate_source(instr, op, true);
|
||||
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
|
||||
break;
|
||||
|
||||
case nir_op_fdph_replicated:
|
||||
try_immediate_source(instr, op, false, devinfo);
|
||||
try_immediate_source(instr, op, false);
|
||||
inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
|
||||
break;
|
||||
|
||||
|
@@ -163,8 +163,7 @@ namespace brw {
|
||||
unsigned dims, unsigned size,
|
||||
brw_predicate pred)
|
||||
{
|
||||
const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
|
||||
bld.shader->devinfo->is_haswell);
|
||||
const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
|
||||
emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
|
||||
emit_insert(bld, addr, dims, has_simd4x2),
|
||||
has_simd4x2 ? 1 : dims,
|
||||
@@ -185,8 +184,7 @@ namespace brw {
|
||||
unsigned dims, unsigned rsize, unsigned op,
|
||||
brw_predicate pred)
|
||||
{
|
||||
const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
|
||||
bld.shader->devinfo->is_haswell);
|
||||
const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
|
||||
|
||||
/* Zip the components of both sources, they are represented as the X
|
||||
* and Y components of the same vector.
|
||||
|
@@ -305,23 +305,10 @@ vec4_visitor::fix_3src_operand(const src_reg &src)
|
||||
return src_reg(expanded);
|
||||
}
|
||||
|
||||
src_reg
|
||||
vec4_visitor::resolve_source_modifiers(const src_reg &src)
|
||||
{
|
||||
if (!src.abs && !src.negate)
|
||||
return src;
|
||||
|
||||
dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
|
||||
resolved.type = src.type;
|
||||
emit(MOV(resolved, src));
|
||||
|
||||
return src_reg(resolved);
|
||||
}
|
||||
|
||||
src_reg
|
||||
vec4_visitor::fix_math_operand(const src_reg &src)
|
||||
{
|
||||
if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
|
||||
if (devinfo->gen < 6 || src.file == BAD_FILE)
|
||||
return src;
|
||||
|
||||
/* The gen6 math instruction ignores the source modifiers --
|
||||
@@ -753,35 +740,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
|
||||
|
||||
vec4_instruction *pull;
|
||||
|
||||
if (devinfo->gen >= 9) {
|
||||
/* Gen9+ needs a message header in order to use SIMD4x2 mode */
|
||||
src_reg header(this, glsl_type::uvec4_type, 2);
|
||||
|
||||
pull = new(mem_ctx)
|
||||
vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
|
||||
dst_reg(header));
|
||||
|
||||
if (before_inst)
|
||||
emit_before(before_block, before_inst, pull);
|
||||
else
|
||||
emit(pull);
|
||||
|
||||
dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE),
|
||||
offset_reg.type);
|
||||
pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
|
||||
|
||||
if (before_inst)
|
||||
emit_before(before_block, before_inst, pull);
|
||||
else
|
||||
emit(pull);
|
||||
|
||||
pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
|
||||
dst,
|
||||
surf_index,
|
||||
header);
|
||||
pull->mlen = 2;
|
||||
pull->header_size = 1;
|
||||
} else if (devinfo->gen >= 7) {
|
||||
if (devinfo->gen >= 7) {
|
||||
dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
|
||||
|
||||
grf_offset.type = offset_reg.type;
|
||||
@@ -838,24 +797,9 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
|
||||
inst->base_mrf = 2;
|
||||
inst->src[1] = surface;
|
||||
inst->src[2] = brw_imm_ud(0); /* sampler */
|
||||
inst->mlen = 1;
|
||||
|
||||
int param_base;
|
||||
|
||||
if (devinfo->gen >= 9) {
|
||||
/* Gen9+ needs a message header in order to use SIMD4x2 mode */
|
||||
vec4_instruction *header_inst = new(mem_ctx)
|
||||
vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
|
||||
dst_reg(MRF, inst->base_mrf));
|
||||
|
||||
emit(header_inst);
|
||||
|
||||
inst->mlen = 2;
|
||||
inst->header_size = 1;
|
||||
param_base = inst->base_mrf + 1;
|
||||
} else {
|
||||
inst->mlen = 1;
|
||||
param_base = inst->base_mrf;
|
||||
}
|
||||
const int param_base = inst->base_mrf;
|
||||
|
||||
/* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
|
||||
int coord_mask = (1 << coordinate_type->vector_elements) - 1;
|
||||
@@ -874,7 +818,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
|
||||
bool
|
||||
vec4_visitor::is_high_sampler(src_reg sampler)
|
||||
{
|
||||
if (devinfo->gen < 8 && !devinfo->is_haswell)
|
||||
if (!devinfo->is_haswell)
|
||||
return false;
|
||||
|
||||
return sampler.file != IMM || sampler.ud >= 16;
|
||||
@@ -902,8 +846,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
||||
case ir_txl: opcode = SHADER_OPCODE_TXL; break;
|
||||
case ir_txd: opcode = SHADER_OPCODE_TXD; break;
|
||||
case ir_txf: opcode = SHADER_OPCODE_TXF; break;
|
||||
case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
|
||||
SHADER_OPCODE_TXF_CMS); break;
|
||||
case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
|
||||
case ir_txs: opcode = SHADER_OPCODE_TXS; break;
|
||||
case ir_tg4: opcode = offset_value.file != BAD_FILE
|
||||
? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
|
||||
@@ -937,7 +880,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
||||
* - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
|
||||
*/
|
||||
inst->header_size =
|
||||
(devinfo->gen < 5 || devinfo->gen >= 9 ||
|
||||
(devinfo->gen < 5 ||
|
||||
inst->offset != 0 || op == ir_tg4 ||
|
||||
op == ir_texture_samples ||
|
||||
is_high_sampler(sampler_reg)) ? 1 : 0;
|
||||
@@ -1705,11 +1648,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
||||
offset = src_reg(this, glsl_type::uint_type);
|
||||
emit_before(block, inst, ADD(dst_reg(offset), indirect,
|
||||
brw_imm_ud(reg_offset * 16)));
|
||||
} else if (devinfo->gen >= 8) {
|
||||
/* Store the offset in a GRF so we can send-from-GRF. */
|
||||
offset = src_reg(this, glsl_type::uint_type);
|
||||
emit_before(block, inst, MOV(dst_reg(offset),
|
||||
brw_imm_ud(reg_offset * 16)));
|
||||
} else {
|
||||
offset = brw_imm_d(reg_offset * 16);
|
||||
}
|
||||
|
Reference in New Issue
Block a user