intel/fs: Make implied_mrf_writes() an fs_inst method.

This will be convenient in a later commit enabling SIMD32 fragment
shaders, and happens to fix the calculation for MATH instructions
which is currently inaccurate for SIMD-lowered instructions on Gen4-5
platforms (all of them on Gen4 in SIMD16 mode), since it was based on
the shader's dispatch width rather than on the actual execution size
of the instruction.

This causes some shader-db noise on Gen4 due to the more compact
register allocation interacting with the SEND dependency workarounds,
but otherwise no major changes.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Francisco Jerez
2019-12-27 16:38:26 -08:00
parent 591f146fd2
commit c20dc9b836
7 changed files with 17 additions and 18 deletions

View File

@@ -1164,16 +1164,16 @@ fs_inst::flags_written() const
* Note that this is not the 0 or 1 implied writes in an actual gen
* instruction -- the FS opcodes often generate MOVs in addition.
*/
int
fs_visitor::implied_mrf_writes(const fs_inst *inst) const
unsigned
fs_inst::implied_mrf_writes() const
{
if (inst->mlen == 0)
if (mlen == 0)
return 0;
if (inst->base_mrf == -1)
if (base_mrf == -1)
return 0;
switch (inst->opcode) {
switch (opcode) {
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
case SHADER_OPCODE_SQRT:
@@ -1181,11 +1181,11 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
return 1 * dispatch_width / 8;
return 1 * exec_size / 8;
case SHADER_OPCODE_POW:
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
return 2 * dispatch_width / 8;
return 2 * exec_size / 8;
case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
@@ -1201,14 +1201,14 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
return 1;
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_REP_FB_WRITE:
return inst->src[0].file == BAD_FILE ? 0 : 2;
return src[0].file == BAD_FILE ? 0 : 2;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_GEN4_SCRATCH_READ:
return 1;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
return inst->mlen;
return mlen;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
return inst->mlen;
return mlen;
default:
unreachable("not reached");
}
@@ -3494,7 +3494,7 @@ fs_visitor::remove_duplicate_mrf_writes()
/* Found a SEND instruction, which will include two or fewer
* implied MRF writes. We could do better here.
*/
for (int i = 0; i < implied_mrf_writes(inst); i++) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
last_mrf_move[inst->base_mrf + i] = NULL;
}
}

View File

@@ -302,8 +302,6 @@ public:
fs_reg interp_reg(int location, int channel);
int implied_mrf_writes(const fs_inst *inst) const;
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
void dump_instruction(backend_instruction *inst);

View File

@@ -573,7 +573,7 @@ namespace {
if (v->devinfo->gen >= 7) {
assert(inst->dst.file != MRF);
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
constrained[p.atom_of_reg(reg)] = true;
}

View File

@@ -494,7 +494,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
}
if (inst->mlen > 0) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
mrf_used[inst->base_mrf + i] = true;
}
}

View File

@@ -896,7 +896,7 @@ namespace {
}
if (is_send(inst) && inst->base_mrf != -1) {
for (int j = 0; j < shader->implied_mrf_writes(inst); j++)
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
add_dependency(ids, deps[ip], dependency_for_write(inst,
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
}

View File

@@ -357,6 +357,7 @@ public:
bool can_do_cmod();
bool can_change_types() const;
bool has_source_and_destination_hazard() const;
unsigned implied_mrf_writes() const;
/**
* Return whether \p arg is a control source of a virtual instruction which

View File

@@ -1190,7 +1190,7 @@ fs_instruction_scheduler::calculate_deps()
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
}
@@ -1313,7 +1313,7 @@ fs_instruction_scheduler::calculate_deps()
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}
}