intel/fs: Use BRW_OPCODE_HALT for discards

We're about to start using it to implement nir_jump_halt which has
nothing inherently to do with fragment shaders or discards.  May as well
name it for the HW instruction it generates.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5071>
This commit is contained in:
Jason Ekstrand
2020-11-30 17:24:51 -06:00
parent e76e359007
commit f9d549b2bf
9 changed files with 23 additions and 27 deletions

View File

@@ -585,7 +585,6 @@ enum opcode {
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_PACK_HALF_2x16_SPLIT, FS_OPCODE_PACK_HALF_2x16_SPLIT,
FS_OPCODE_INTERPOLATE_AT_SAMPLE, FS_OPCODE_INTERPOLATE_AT_SAMPLE,

View File

@@ -3020,16 +3020,16 @@ fs_visitor::opt_register_renaming()
} }
/** /**
* Remove redundant or useless discard jumps. * Remove redundant or useless halts.
* *
* For example, we can eliminate jumps in the following sequence: * For example, we can eliminate halts in the following sequence:
* *
* discard-jump (redundant with the next jump) * halt (redundant with the next halt)
* discard-jump (useless; jumps to the next instruction) * halt (useless; jumps to the next instruction)
* placeholder-halt * halt-target
*/ */
bool bool
fs_visitor::opt_redundant_discard_jumps() fs_visitor::opt_redundant_halt()
{ {
bool progress = false; bool progress = false;
@@ -3048,7 +3048,7 @@ fs_visitor::opt_redundant_discard_jumps()
/* Delete any HALTs immediately before the halt target. */ /* Delete any HALTs immediately before the halt target. */
for (fs_inst *prev = (fs_inst *) halt_target->prev; for (fs_inst *prev = (fs_inst *) halt_target->prev;
!prev->is_head_sentinel() && prev->opcode == FS_OPCODE_DISCARD_JUMP; !prev->is_head_sentinel() && prev->opcode == BRW_OPCODE_HALT;
prev = (fs_inst *) halt_target->prev) { prev = (fs_inst *) halt_target->prev) {
prev->remove(last_bblock); prev->remove(last_bblock);
progress = true; progress = true;
@@ -3285,7 +3285,7 @@ fs_visitor::eliminate_find_live_channel()
depth--; depth--;
break; break;
case FS_OPCODE_DISCARD_JUMP: case BRW_OPCODE_HALT:
/* This can potentially make control flow non-uniform until the end /* This can potentially make control flow non-uniform until the end
* of the program. * of the program.
*/ */
@@ -7830,7 +7830,7 @@ fs_visitor::optimize()
OPT(opt_peephole_sel); OPT(opt_peephole_sel);
} }
OPT(opt_redundant_discard_jumps); OPT(opt_redundant_halt);
if (OPT(lower_load_payload)) { if (OPT(lower_load_payload)) {
split_virtual_grfs(); split_virtual_grfs();
@@ -7951,7 +7951,7 @@ find_halt_control_flow_region_start(const fs_visitor *v)
if (v->stage == MESA_SHADER_FRAGMENT && if (v->stage == MESA_SHADER_FRAGMENT &&
brw_wm_prog_data(v->prog_data)->uses_kill) { brw_wm_prog_data(v->prog_data)->uses_kill) {
foreach_block_and_inst(block, fs_inst, inst, v->cfg) { foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
if (inst->opcode == FS_OPCODE_DISCARD_JUMP || if (inst->opcode == BRW_OPCODE_HALT ||
inst->opcode == SHADER_OPCODE_HALT_TARGET) inst->opcode == SHADER_OPCODE_HALT_TARGET)
return inst; return inst;
} }
@@ -8002,7 +8002,7 @@ fs_visitor::fixup_nomask_control_flow()
switch (inst->opcode) { switch (inst->opcode) {
case BRW_OPCODE_DO: case BRW_OPCODE_DO:
case BRW_OPCODE_IF: case BRW_OPCODE_IF:
/* Note that this doesn't handle FS_OPCODE_DISCARD_JUMP since only /* Note that this doesn't handle BRW_OPCODE_HALT since only
* the first one in the program closes the region of divergent * the first one in the program closes the region of divergent
* control flow due to any HALT instructions -- Instead this is * control flow due to any HALT instructions -- Instead this is
* handled with the halt_start check below. * handled with the halt_start check below.

View File

@@ -158,7 +158,7 @@ public:
virtual void invalidate_analysis(brw::analysis_dependency_class c); virtual void invalidate_analysis(brw::analysis_dependency_class c);
void validate(); void validate();
bool opt_algebraic(); bool opt_algebraic();
bool opt_redundant_discard_jumps(); bool opt_redundant_halt();
bool opt_cse(); bool opt_cse();
bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip); bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
@@ -536,7 +536,7 @@ private:
struct brw_reg src0, struct brw_reg src0,
struct brw_reg src1); struct brw_reg src1);
void generate_discard_jump(fs_inst *inst); void generate_halt(fs_inst *inst);
void generate_pack_half_2x16_split(fs_inst *inst, void generate_pack_half_2x16_split(fs_inst *inst,
struct brw_reg dst, struct brw_reg dst,

View File

@@ -324,7 +324,7 @@ fs_visitor::opt_cse_local(const fs_live_variables &live, bblock_t *block, int &i
* with instructions dependent on the current execution mask like * with instructions dependent on the current execution mask like
* SHADER_OPCODE_FIND_LIVE_CHANNEL. * SHADER_OPCODE_FIND_LIVE_CHANNEL.
*/ */
if (inst->opcode == FS_OPCODE_DISCARD_JUMP || if (inst->opcode == BRW_OPCODE_HALT ||
inst->opcode == SHADER_OPCODE_HALT_TARGET) inst->opcode == SHADER_OPCODE_HALT_TARGET)
aeb.make_empty(); aeb.make_empty();

View File

@@ -1450,7 +1450,7 @@ fs_generator::generate_ddy(const fs_inst *inst,
} }
void void
fs_generator::generate_discard_jump(fs_inst *) fs_generator::generate_halt(fs_inst *)
{ {
/* This HALT will be patched up at FB write time to point UIP at the end of /* This HALT will be patched up at FB write time to point UIP at the end of
* the program, and at brw_uip_jip() JIP will be set to the end of the * the program, and at brw_uip_jip() JIP will be set to the end of the
@@ -2375,8 +2375,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
send_count++; send_count++;
break; break;
case FS_OPCODE_DISCARD_JUMP: case BRW_OPCODE_HALT:
generate_discard_jump(inst); generate_halt(inst);
break; break;
case SHADER_OPCODE_SHADER_TIME_ADD: case SHADER_OPCODE_SHADER_TIME_ADD:

View File

@@ -3497,7 +3497,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
cmp->predicate = BRW_PREDICATE_NORMAL; cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = sample_mask_flag_subreg(this); cmp->flag_subreg = sample_mask_flag_subreg(this);
fs_inst *jump = bld.emit(FS_OPCODE_DISCARD_JUMP); fs_inst *jump = bld.emit(BRW_OPCODE_HALT);
jump->flag_subreg = sample_mask_flag_subreg(this); jump->flag_subreg = sample_mask_flag_subreg(this);
jump->predicate_inverse = true; jump->predicate_inverse = true;

View File

@@ -589,7 +589,7 @@ namespace {
case BRW_OPCODE_WHILE: case BRW_OPCODE_WHILE:
case BRW_OPCODE_BREAK: case BRW_OPCODE_BREAK:
case BRW_OPCODE_CONTINUE: case BRW_OPCODE_CONTINUE:
case FS_OPCODE_DISCARD_JUMP: case BRW_OPCODE_HALT:
if (devinfo->gen >= 8) if (devinfo->gen >= 8)
return calculate_desc(info, unit_null, 8, 0, 0, 0, 0, return calculate_desc(info, unit_null, 8, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0); 0, 0, 0, 0, 0, 0);
@@ -1540,7 +1540,7 @@ namespace {
const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ? const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ?
1.0 : 0.5); 1.0 : 0.5);
const float loop_weight = 10; const float loop_weight = 10;
unsigned discard_count = 0; unsigned halt_count = 0;
unsigned elapsed = 0; unsigned elapsed = 0;
state st; state st;
@@ -1552,7 +1552,7 @@ namespace {
issue_instruction(st, s->devinfo, inst); issue_instruction(st, s->devinfo, inst);
if (inst->opcode == SHADER_OPCODE_HALT_TARGET && discard_count) if (inst->opcode == SHADER_OPCODE_HALT_TARGET && halt_count)
st.weight /= discard_weight; st.weight /= discard_weight;
elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight; elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight;
@@ -1561,7 +1561,7 @@ namespace {
st.weight *= loop_weight; st.weight *= loop_weight;
else if (inst->opcode == BRW_OPCODE_WHILE) else if (inst->opcode == BRW_OPCODE_WHILE)
st.weight /= loop_weight; st.weight /= loop_weight;
else if (inst->opcode == FS_OPCODE_DISCARD_JUMP && !discard_count++) else if (inst->opcode == BRW_OPCODE_HALT && !halt_count++)
st.weight *= discard_weight; st.weight *= discard_weight;
} }

View File

@@ -985,7 +985,7 @@ instruction_scheduler::compute_exits()
* optimistic unblocked time estimate calculated above. * optimistic unblocked time estimate calculated above.
*/ */
foreach_in_list_reverse(schedule_node, n, &instructions) { foreach_in_list_reverse(schedule_node, n, &instructions) {
n->exit = (n->inst->opcode == FS_OPCODE_DISCARD_JUMP ? n : NULL); n->exit = (n->inst->opcode == BRW_OPCODE_HALT ? n : NULL);
for (int i = 0; i < n->child_count; i++) { for (int i = 0; i < n->child_count; i++) {
if (exit_unblocked_time(n->children[i]) < exit_unblocked_time(n)) if (exit_unblocked_time(n->children[i]) < exit_unblocked_time(n))

View File

@@ -444,9 +444,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
return "varying_pull_const_logical"; return "varying_pull_const_logical";
case FS_OPCODE_DISCARD_JUMP:
return "discard_jump";
case FS_OPCODE_SET_SAMPLE_ID: case FS_OPCODE_SET_SAMPLE_ID:
return "set_sample_id"; return "set_sample_id";