diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index dfdac7ef6a3..e0d2ca79d8f 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -589,6 +589,7 @@ public: void calculate_deps(); bool is_compressed(const fs_inst *inst); + bool register_needs_barrier(const fs_reg ®); schedule_node *choose_instruction_to_schedule(); int calculate_issue_time(const fs_inst *inst); @@ -1048,7 +1049,6 @@ has_cross_lane_access(const fs_inst *inst) * accesses. */ if (inst->opcode == SHADER_OPCODE_BROADCAST || - inst->opcode == SHADER_OPCODE_READ_ARCH_REG || inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST || inst->opcode == SHADER_OPCODE_SHUFFLE || inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS || @@ -1067,6 +1067,39 @@ has_cross_lane_access(const fs_inst *inst) return false; } +/** + * Some register access need dependencies on other instructions. + */ +bool +instruction_scheduler::register_needs_barrier(const fs_reg ®) +{ + if (reg.file != ARF || reg.is_null()) + return false; + + /* If you look at SR register layout, there is nothing in there that + * depends on other instructions. This is just fixed dispatch information. + * + * ATSM PRMs, Volume 9: Render Engine, State Register Fields : + * sr0.0: + * - 0:2 TID + * - 4:13 Slice, DSS, Subslice, EU IDs + * - 20:22 Priority + * - 23:23 Priority class + * - 24:27 FFID + * sr0.1: + * - 0:5 IEEE Exception + * - 21:31 FFTID + * sr0.2: + * - 0:31 Dispatch Mask + * sr0.3: + * - 0:31 Vector Mask + */ + if (reg.nr == BRW_ARF_STATE) + return false; + + return true; +} + /** * Sometimes we really want this node to execute after everything that * was before it and before everything that followed it. This adds @@ -1190,7 +1223,7 @@ instruction_scheduler::calculate_deps() } } else if (inst->src[i].is_accumulator()) { add_dep(last_accumulator_write, n); - } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) { + } else if (register_needs_barrier(inst->src[i])) { add_barrier_deps(n); } } @@ -1229,7 +1262,7 @@ instruction_scheduler::calculate_deps() } else if (inst->dst.is_accumulator()) { add_dep(last_accumulator_write, n); last_accumulator_write = n; - } else if (inst->dst.file == ARF && !inst->dst.is_null()) { + } else if (register_needs_barrier(inst->dst)) { add_barrier_deps(n); } @@ -1275,7 +1308,7 @@ instruction_scheduler::calculate_deps() } } else if (inst->src[i].is_accumulator()) { add_dep(n, last_accumulator_write, 0); - } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) { + } else if (register_needs_barrier(inst->src[i])) { add_barrier_deps(n); } } @@ -1308,7 +1341,7 @@ instruction_scheduler::calculate_deps() } } else if (inst->dst.is_accumulator()) { last_accumulator_write = n; - } else if (inst->dst.file == ARF && !inst->dst.is_null()) { + } else if (register_needs_barrier(inst->dst)) { add_barrier_deps(n); }