intel/fs: Use BRW_OPCODE_HALT for discards
We're about to start using it to implement nir_jump_halt which has nothing inherently to do with fragment shaders or discards. May as well name it for the HW instruction it generates. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5071>
This commit is contained in:
@@ -585,7 +585,6 @@ enum opcode {
|
|||||||
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
|
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
|
||||||
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
|
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
|
||||||
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
|
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
|
||||||
FS_OPCODE_DISCARD_JUMP,
|
|
||||||
FS_OPCODE_SET_SAMPLE_ID,
|
FS_OPCODE_SET_SAMPLE_ID,
|
||||||
FS_OPCODE_PACK_HALF_2x16_SPLIT,
|
FS_OPCODE_PACK_HALF_2x16_SPLIT,
|
||||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||||
|
@@ -3020,16 +3020,16 @@ fs_visitor::opt_register_renaming()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove redundant or useless discard jumps.
|
* Remove redundant or useless halts.
|
||||||
*
|
*
|
||||||
* For example, we can eliminate jumps in the following sequence:
|
* For example, we can eliminate halts in the following sequence:
|
||||||
*
|
*
|
||||||
* discard-jump (redundant with the next jump)
|
* halt (redundant with the next halt)
|
||||||
* discard-jump (useless; jumps to the next instruction)
|
* halt (useless; jumps to the next instruction)
|
||||||
* placeholder-halt
|
* halt-target
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
fs_visitor::opt_redundant_discard_jumps()
|
fs_visitor::opt_redundant_halt()
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
@@ -3048,7 +3048,7 @@ fs_visitor::opt_redundant_discard_jumps()
|
|||||||
|
|
||||||
/* Delete any HALTs immediately before the halt target. */
|
/* Delete any HALTs immediately before the halt target. */
|
||||||
for (fs_inst *prev = (fs_inst *) halt_target->prev;
|
for (fs_inst *prev = (fs_inst *) halt_target->prev;
|
||||||
!prev->is_head_sentinel() && prev->opcode == FS_OPCODE_DISCARD_JUMP;
|
!prev->is_head_sentinel() && prev->opcode == BRW_OPCODE_HALT;
|
||||||
prev = (fs_inst *) halt_target->prev) {
|
prev = (fs_inst *) halt_target->prev) {
|
||||||
prev->remove(last_bblock);
|
prev->remove(last_bblock);
|
||||||
progress = true;
|
progress = true;
|
||||||
@@ -3285,7 +3285,7 @@ fs_visitor::eliminate_find_live_channel()
|
|||||||
depth--;
|
depth--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FS_OPCODE_DISCARD_JUMP:
|
case BRW_OPCODE_HALT:
|
||||||
/* This can potentially make control flow non-uniform until the end
|
/* This can potentially make control flow non-uniform until the end
|
||||||
* of the program.
|
* of the program.
|
||||||
*/
|
*/
|
||||||
@@ -7830,7 +7830,7 @@ fs_visitor::optimize()
|
|||||||
OPT(opt_peephole_sel);
|
OPT(opt_peephole_sel);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPT(opt_redundant_discard_jumps);
|
OPT(opt_redundant_halt);
|
||||||
|
|
||||||
if (OPT(lower_load_payload)) {
|
if (OPT(lower_load_payload)) {
|
||||||
split_virtual_grfs();
|
split_virtual_grfs();
|
||||||
@@ -7951,7 +7951,7 @@ find_halt_control_flow_region_start(const fs_visitor *v)
|
|||||||
if (v->stage == MESA_SHADER_FRAGMENT &&
|
if (v->stage == MESA_SHADER_FRAGMENT &&
|
||||||
brw_wm_prog_data(v->prog_data)->uses_kill) {
|
brw_wm_prog_data(v->prog_data)->uses_kill) {
|
||||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||||
if (inst->opcode == FS_OPCODE_DISCARD_JUMP ||
|
if (inst->opcode == BRW_OPCODE_HALT ||
|
||||||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
||||||
return inst;
|
return inst;
|
||||||
}
|
}
|
||||||
@@ -8002,7 +8002,7 @@ fs_visitor::fixup_nomask_control_flow()
|
|||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
case BRW_OPCODE_DO:
|
case BRW_OPCODE_DO:
|
||||||
case BRW_OPCODE_IF:
|
case BRW_OPCODE_IF:
|
||||||
/* Note that this doesn't handle FS_OPCODE_DISCARD_JUMP since only
|
/* Note that this doesn't handle BRW_OPCODE_HALT since only
|
||||||
* the first one in the program closes the region of divergent
|
* the first one in the program closes the region of divergent
|
||||||
* control flow due to any HALT instructions -- Instead this is
|
* control flow due to any HALT instructions -- Instead this is
|
||||||
* handled with the halt_start check below.
|
* handled with the halt_start check below.
|
||||||
|
@@ -158,7 +158,7 @@ public:
|
|||||||
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
||||||
void validate();
|
void validate();
|
||||||
bool opt_algebraic();
|
bool opt_algebraic();
|
||||||
bool opt_redundant_discard_jumps();
|
bool opt_redundant_halt();
|
||||||
bool opt_cse();
|
bool opt_cse();
|
||||||
bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
|
bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
|
||||||
|
|
||||||
@@ -536,7 +536,7 @@ private:
|
|||||||
struct brw_reg src0,
|
struct brw_reg src0,
|
||||||
struct brw_reg src1);
|
struct brw_reg src1);
|
||||||
|
|
||||||
void generate_discard_jump(fs_inst *inst);
|
void generate_halt(fs_inst *inst);
|
||||||
|
|
||||||
void generate_pack_half_2x16_split(fs_inst *inst,
|
void generate_pack_half_2x16_split(fs_inst *inst,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
|
@@ -324,7 +324,7 @@ fs_visitor::opt_cse_local(const fs_live_variables &live, bblock_t *block, int &i
|
|||||||
* with instructions dependent on the current execution mask like
|
* with instructions dependent on the current execution mask like
|
||||||
* SHADER_OPCODE_FIND_LIVE_CHANNEL.
|
* SHADER_OPCODE_FIND_LIVE_CHANNEL.
|
||||||
*/
|
*/
|
||||||
if (inst->opcode == FS_OPCODE_DISCARD_JUMP ||
|
if (inst->opcode == BRW_OPCODE_HALT ||
|
||||||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
||||||
aeb.make_empty();
|
aeb.make_empty();
|
||||||
|
|
||||||
|
@@ -1450,7 +1450,7 @@ fs_generator::generate_ddy(const fs_inst *inst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_generator::generate_discard_jump(fs_inst *)
|
fs_generator::generate_halt(fs_inst *)
|
||||||
{
|
{
|
||||||
/* This HALT will be patched up at FB write time to point UIP at the end of
|
/* This HALT will be patched up at FB write time to point UIP at the end of
|
||||||
* the program, and at brw_uip_jip() JIP will be set to the end of the
|
* the program, and at brw_uip_jip() JIP will be set to the end of the
|
||||||
@@ -2375,8 +2375,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
send_count++;
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FS_OPCODE_DISCARD_JUMP:
|
case BRW_OPCODE_HALT:
|
||||||
generate_discard_jump(inst);
|
generate_halt(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_SHADER_TIME_ADD:
|
case SHADER_OPCODE_SHADER_TIME_ADD:
|
||||||
|
@@ -3497,7 +3497,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
|
|||||||
cmp->predicate = BRW_PREDICATE_NORMAL;
|
cmp->predicate = BRW_PREDICATE_NORMAL;
|
||||||
cmp->flag_subreg = sample_mask_flag_subreg(this);
|
cmp->flag_subreg = sample_mask_flag_subreg(this);
|
||||||
|
|
||||||
fs_inst *jump = bld.emit(FS_OPCODE_DISCARD_JUMP);
|
fs_inst *jump = bld.emit(BRW_OPCODE_HALT);
|
||||||
jump->flag_subreg = sample_mask_flag_subreg(this);
|
jump->flag_subreg = sample_mask_flag_subreg(this);
|
||||||
jump->predicate_inverse = true;
|
jump->predicate_inverse = true;
|
||||||
|
|
||||||
|
@@ -589,7 +589,7 @@ namespace {
|
|||||||
case BRW_OPCODE_WHILE:
|
case BRW_OPCODE_WHILE:
|
||||||
case BRW_OPCODE_BREAK:
|
case BRW_OPCODE_BREAK:
|
||||||
case BRW_OPCODE_CONTINUE:
|
case BRW_OPCODE_CONTINUE:
|
||||||
case FS_OPCODE_DISCARD_JUMP:
|
case BRW_OPCODE_HALT:
|
||||||
if (devinfo->gen >= 8)
|
if (devinfo->gen >= 8)
|
||||||
return calculate_desc(info, unit_null, 8, 0, 0, 0, 0,
|
return calculate_desc(info, unit_null, 8, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0);
|
0, 0, 0, 0, 0, 0);
|
||||||
@@ -1540,7 +1540,7 @@ namespace {
|
|||||||
const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ?
|
const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ?
|
||||||
1.0 : 0.5);
|
1.0 : 0.5);
|
||||||
const float loop_weight = 10;
|
const float loop_weight = 10;
|
||||||
unsigned discard_count = 0;
|
unsigned halt_count = 0;
|
||||||
unsigned elapsed = 0;
|
unsigned elapsed = 0;
|
||||||
state st;
|
state st;
|
||||||
|
|
||||||
@@ -1552,7 +1552,7 @@ namespace {
|
|||||||
|
|
||||||
issue_instruction(st, s->devinfo, inst);
|
issue_instruction(st, s->devinfo, inst);
|
||||||
|
|
||||||
if (inst->opcode == SHADER_OPCODE_HALT_TARGET && discard_count)
|
if (inst->opcode == SHADER_OPCODE_HALT_TARGET && halt_count)
|
||||||
st.weight /= discard_weight;
|
st.weight /= discard_weight;
|
||||||
|
|
||||||
elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight;
|
elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight;
|
||||||
@@ -1561,7 +1561,7 @@ namespace {
|
|||||||
st.weight *= loop_weight;
|
st.weight *= loop_weight;
|
||||||
else if (inst->opcode == BRW_OPCODE_WHILE)
|
else if (inst->opcode == BRW_OPCODE_WHILE)
|
||||||
st.weight /= loop_weight;
|
st.weight /= loop_weight;
|
||||||
else if (inst->opcode == FS_OPCODE_DISCARD_JUMP && !discard_count++)
|
else if (inst->opcode == BRW_OPCODE_HALT && !halt_count++)
|
||||||
st.weight *= discard_weight;
|
st.weight *= discard_weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -985,7 +985,7 @@ instruction_scheduler::compute_exits()
|
|||||||
* optimistic unblocked time estimate calculated above.
|
* optimistic unblocked time estimate calculated above.
|
||||||
*/
|
*/
|
||||||
foreach_in_list_reverse(schedule_node, n, &instructions) {
|
foreach_in_list_reverse(schedule_node, n, &instructions) {
|
||||||
n->exit = (n->inst->opcode == FS_OPCODE_DISCARD_JUMP ? n : NULL);
|
n->exit = (n->inst->opcode == BRW_OPCODE_HALT ? n : NULL);
|
||||||
|
|
||||||
for (int i = 0; i < n->child_count; i++) {
|
for (int i = 0; i < n->child_count; i++) {
|
||||||
if (exit_unblocked_time(n->children[i]) < exit_unblocked_time(n))
|
if (exit_unblocked_time(n->children[i]) < exit_unblocked_time(n))
|
||||||
|
@@ -444,9 +444,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||||
return "varying_pull_const_logical";
|
return "varying_pull_const_logical";
|
||||||
|
|
||||||
case FS_OPCODE_DISCARD_JUMP:
|
|
||||||
return "discard_jump";
|
|
||||||
|
|
||||||
case FS_OPCODE_SET_SAMPLE_ID:
|
case FS_OPCODE_SET_SAMPLE_ID:
|
||||||
return "set_sample_id";
|
return "set_sample_id";
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user