intel/fs: Make implied_mrf_writes() an fs_inst method.
This will be convenient in a later commit enabling SIMD32 fragment shaders, and happens to fix the calculation for MATH instructions which is currently inaccurate for SIMD-lowered instructions on Gen4-5 platforms (all of them on Gen4 in SIMD16 mode), since it was based on the shader's dispatch width rather than on the actual execution size of the instruction. This causes some shader-db noise on Gen4 due to the more compact register allocation interacting with the SEND dependency workarounds, but otherwise no major changes. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -1164,16 +1164,16 @@ fs_inst::flags_written() const
|
|||||||
* Note that this is not the 0 or 1 implied writes in an actual gen
|
* Note that this is not the 0 or 1 implied writes in an actual gen
|
||||||
* instruction -- the FS opcodes often generate MOVs in addition.
|
* instruction -- the FS opcodes often generate MOVs in addition.
|
||||||
*/
|
*/
|
||||||
int
|
unsigned
|
||||||
fs_visitor::implied_mrf_writes(const fs_inst *inst) const
|
fs_inst::implied_mrf_writes() const
|
||||||
{
|
{
|
||||||
if (inst->mlen == 0)
|
if (mlen == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (inst->base_mrf == -1)
|
if (base_mrf == -1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
switch (inst->opcode) {
|
switch (opcode) {
|
||||||
case SHADER_OPCODE_RCP:
|
case SHADER_OPCODE_RCP:
|
||||||
case SHADER_OPCODE_RSQ:
|
case SHADER_OPCODE_RSQ:
|
||||||
case SHADER_OPCODE_SQRT:
|
case SHADER_OPCODE_SQRT:
|
||||||
@@ -1181,11 +1181,11 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
|
|||||||
case SHADER_OPCODE_LOG2:
|
case SHADER_OPCODE_LOG2:
|
||||||
case SHADER_OPCODE_SIN:
|
case SHADER_OPCODE_SIN:
|
||||||
case SHADER_OPCODE_COS:
|
case SHADER_OPCODE_COS:
|
||||||
return 1 * dispatch_width / 8;
|
return 1 * exec_size / 8;
|
||||||
case SHADER_OPCODE_POW:
|
case SHADER_OPCODE_POW:
|
||||||
case SHADER_OPCODE_INT_QUOTIENT:
|
case SHADER_OPCODE_INT_QUOTIENT:
|
||||||
case SHADER_OPCODE_INT_REMAINDER:
|
case SHADER_OPCODE_INT_REMAINDER:
|
||||||
return 2 * dispatch_width / 8;
|
return 2 * exec_size / 8;
|
||||||
case SHADER_OPCODE_TEX:
|
case SHADER_OPCODE_TEX:
|
||||||
case FS_OPCODE_TXB:
|
case FS_OPCODE_TXB:
|
||||||
case SHADER_OPCODE_TXD:
|
case SHADER_OPCODE_TXD:
|
||||||
@@ -1201,14 +1201,14 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
|
|||||||
return 1;
|
return 1;
|
||||||
case FS_OPCODE_FB_WRITE:
|
case FS_OPCODE_FB_WRITE:
|
||||||
case FS_OPCODE_REP_FB_WRITE:
|
case FS_OPCODE_REP_FB_WRITE:
|
||||||
return inst->src[0].file == BAD_FILE ? 0 : 2;
|
return src[0].file == BAD_FILE ? 0 : 2;
|
||||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||||
return 1;
|
return 1;
|
||||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
|
||||||
return inst->mlen;
|
return mlen;
|
||||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||||
return inst->mlen;
|
return mlen;
|
||||||
default:
|
default:
|
||||||
unreachable("not reached");
|
unreachable("not reached");
|
||||||
}
|
}
|
||||||
@@ -3494,7 +3494,7 @@ fs_visitor::remove_duplicate_mrf_writes()
|
|||||||
/* Found a SEND instruction, which will include two or fewer
|
/* Found a SEND instruction, which will include two or fewer
|
||||||
* implied MRF writes. We could do better here.
|
* implied MRF writes. We could do better here.
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < implied_mrf_writes(inst); i++) {
|
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||||
last_mrf_move[inst->base_mrf + i] = NULL;
|
last_mrf_move[inst->base_mrf + i] = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -302,8 +302,6 @@ public:
|
|||||||
|
|
||||||
fs_reg interp_reg(int location, int channel);
|
fs_reg interp_reg(int location, int channel);
|
||||||
|
|
||||||
int implied_mrf_writes(const fs_inst *inst) const;
|
|
||||||
|
|
||||||
virtual void dump_instructions();
|
virtual void dump_instructions();
|
||||||
virtual void dump_instructions(const char *name);
|
virtual void dump_instructions(const char *name);
|
||||||
void dump_instruction(backend_instruction *inst);
|
void dump_instruction(backend_instruction *inst);
|
||||||
|
@@ -573,7 +573,7 @@ namespace {
|
|||||||
if (v->devinfo->gen >= 7) {
|
if (v->devinfo->gen >= 7) {
|
||||||
assert(inst->dst.file != MRF);
|
assert(inst->dst.file != MRF);
|
||||||
|
|
||||||
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
|
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||||
const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
|
const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
|
||||||
constrained[p.atom_of_reg(reg)] = true;
|
constrained[p.atom_of_reg(reg)] = true;
|
||||||
}
|
}
|
||||||
|
@@ -494,7 +494,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inst->mlen > 0) {
|
if (inst->mlen > 0) {
|
||||||
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
|
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||||
mrf_used[inst->base_mrf + i] = true;
|
mrf_used[inst->base_mrf + i] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -896,7 +896,7 @@ namespace {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (is_send(inst) && inst->base_mrf != -1) {
|
if (is_send(inst) && inst->base_mrf != -1) {
|
||||||
for (int j = 0; j < shader->implied_mrf_writes(inst); j++)
|
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
|
||||||
add_dependency(ids, deps[ip], dependency_for_write(inst,
|
add_dependency(ids, deps[ip], dependency_for_write(inst,
|
||||||
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
|
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
|
||||||
}
|
}
|
||||||
|
@@ -357,6 +357,7 @@ public:
|
|||||||
bool can_do_cmod();
|
bool can_do_cmod();
|
||||||
bool can_change_types() const;
|
bool can_change_types() const;
|
||||||
bool has_source_and_destination_hazard() const;
|
bool has_source_and_destination_hazard() const;
|
||||||
|
unsigned implied_mrf_writes() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return whether \p arg is a control source of a virtual instruction which
|
* Return whether \p arg is a control source of a virtual instruction which
|
||||||
|
@@ -1190,7 +1190,7 @@ fs_instruction_scheduler::calculate_deps()
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
||||||
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
|
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||||
add_dep(last_mrf_write[inst->base_mrf + i], n);
|
add_dep(last_mrf_write[inst->base_mrf + i], n);
|
||||||
last_mrf_write[inst->base_mrf + i] = n;
|
last_mrf_write[inst->base_mrf + i] = n;
|
||||||
}
|
}
|
||||||
@@ -1313,7 +1313,7 @@ fs_instruction_scheduler::calculate_deps()
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
||||||
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
|
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||||
last_mrf_write[inst->base_mrf + i] = n;
|
last_mrf_write[inst->base_mrf + i] = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user