diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b5834540ef1..276eb70987b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5458,15 +5458,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) * vertical predication mode. */ inst->predicate = BRW_PREDICATE_ALIGN1_ALLV; - ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2), - sample_mask.type), - sample_mask); + if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1) + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2), + sample_mask.type), + sample_mask); } else { inst->flag_subreg = 2; inst->predicate = BRW_PREDICATE_NORMAL; inst->predicate_inverse = false; - ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), - sample_mask); + if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1) + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), + sample_mask); } } @@ -5646,8 +5648,9 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) fs_reg sample_mask = sample_mask_reg(bld); const fs_builder ubld = bld.group(1, 0).exec_all(); - ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), - sample_mask); + if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1) + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), + sample_mask); } fs_reg payload, payload2; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 93d3e460098..543e760b3fe 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -419,13 +419,15 @@ private: /** * Return the flag register used in fragment shaders to keep track of live - * samples. + * samples. On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32 + * dispatch mode, while earlier generations are constrained to f0.1, which + * limits the dispatch width to SIMD16 for fragment shaders that use discard. */ static inline unsigned sample_mask_flag_subreg(const fs_visitor *shader) { assert(shader->stage == MESA_SHADER_FRAGMENT); - return 1; + return shader->devinfo->gen >= 7 ? 2 : 1; } /** diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index fac4f5c884c..896088cc5b8 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -406,7 +406,7 @@ namespace brw { const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); const dst_reg dst = vgrf(src.type); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2; + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); return src_reg(component(dst, 0)); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 93c6ee24404..5d66ead4a24 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3490,9 +3490,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, case nir_intrinsic_discard: case nir_intrinsic_demote_if: case nir_intrinsic_discard_if: { - /* We track our discarded pixels in f0.1. By predicating on it, we can - * update just the flag bits that aren't yet discarded. If there's no - * condition, we emit a CMP of g0 != g0, so all currently executing + /* We track our discarded pixels in f0.1/f1.0. By predicating on it, we + * can update just the flag bits that aren't yet discarded. If there's + * no condition, we emit a CMP of g0 != g0, so all currently executing * channels will get turned off. */ fs_inst *cmp = NULL;