brw: limit dependencies on SR register

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29446>
This commit is contained in:
Lionel Landwerlin
2024-05-29 14:19:18 +03:00
committed by Marge Bot
parent d8b78924c5
commit ac03cefb28

View File

@@ -589,6 +589,7 @@ public:
void calculate_deps();
bool is_compressed(const fs_inst *inst);
bool register_needs_barrier(const fs_reg &reg);
schedule_node *choose_instruction_to_schedule();
int calculate_issue_time(const fs_inst *inst);
@@ -1048,7 +1049,6 @@ has_cross_lane_access(const fs_inst *inst)
* accesses.
*/
if (inst->opcode == SHADER_OPCODE_BROADCAST ||
inst->opcode == SHADER_OPCODE_READ_ARCH_REG ||
inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
inst->opcode == SHADER_OPCODE_SHUFFLE ||
inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
@@ -1067,6 +1067,39 @@ has_cross_lane_access(const fs_inst *inst)
return false;
}
/**
* Some register access need dependencies on other instructions.
*/
bool
instruction_scheduler::register_needs_barrier(const fs_reg &reg)
{
if (reg.file != ARF || reg.is_null())
return false;
/* If you look at SR register layout, there is nothing in there that
* depends on other instructions. This is just fixed dispatch information.
*
* ATSM PRMs, Volume 9: Render Engine, State Register Fields :
* sr0.0:
* - 0:2 TID
* - 4:13 Slice, DSS, Subslice, EU IDs
* - 20:22 Priority
* - 23:23 Priority class
* - 24:27 FFID
* sr0.1:
* - 0:5 IEEE Exception
* - 21:31 FFTID
* sr0.2:
* - 0:31 Dispatch Mask
* sr0.3:
* - 0:31 Vector Mask
*/
if (reg.nr == BRW_ARF_STATE)
return false;
return true;
}
/**
* Sometimes we really want this node to execute after everything that
* was before it and before everything that followed it. This adds
@@ -1190,7 +1223,7 @@ instruction_scheduler::calculate_deps()
}
} else if (inst->src[i].is_accumulator()) {
add_dep(last_accumulator_write, n);
} else if (inst->src[i].file == ARF && !inst->src[i].is_null()) {
} else if (register_needs_barrier(inst->src[i])) {
add_barrier_deps(n);
}
}
@@ -1229,7 +1262,7 @@ instruction_scheduler::calculate_deps()
} else if (inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
last_accumulator_write = n;
} else if (inst->dst.file == ARF && !inst->dst.is_null()) {
} else if (register_needs_barrier(inst->dst)) {
add_barrier_deps(n);
}
@@ -1275,7 +1308,7 @@ instruction_scheduler::calculate_deps()
}
} else if (inst->src[i].is_accumulator()) {
add_dep(n, last_accumulator_write, 0);
} else if (inst->src[i].file == ARF && !inst->src[i].is_null()) {
} else if (register_needs_barrier(inst->src[i])) {
add_barrier_deps(n);
}
}
@@ -1308,7 +1341,7 @@ instruction_scheduler::calculate_deps()
}
} else if (inst->dst.is_accumulator()) {
last_accumulator_write = n;
} else if (inst->dst.file == ARF && !inst->dst.is_null()) {
} else if (register_needs_barrier(inst->dst)) {
add_barrier_deps(n);
}