intel/fs: Disable sample mask predication for scratch stores
Scratch stores are being lowered to the instructions with side-effects,
however they should be enabled in fs helper invocations, since they
are produced from operations which don't imply side-effects.
To fix this - we move the decision of whether the sample mask predication
is enable to the point where logical brw instructions are created.
GLSL example of the issue:
int tmp[1024];
...
do {
// changes to tmp
} while (some_condition(tmp))
If `tmp` is lowered to scrach memory, `some_condition` would be
undefined if scratch write is predicated on sample mask, making
possible for the while loop to become infinite and hang the GPU.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3256
Fixes: 53bfcdeecf
Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6056>
This commit is contained in:

committed by
Marge Bot

parent
cbef2dc7d3
commit
77486db867
@@ -904,6 +904,11 @@ enum surface_logical_srcs {
|
||||
SURFACE_LOGICAL_SRC_IMM_DIMS,
|
||||
/** Per-opcode immediate argument. For atomics, this is the atomic opcode */
|
||||
SURFACE_LOGICAL_SRC_IMM_ARG,
|
||||
/**
|
||||
* Some instructions with side-effects should not be predicated on
|
||||
* sample mask, e.g. lowered stores to scratch.
|
||||
*/
|
||||
SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK,
|
||||
|
||||
SURFACE_LOGICAL_NUM_SRCS
|
||||
};
|
||||
|
@@ -5380,7 +5380,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
||||
const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
|
||||
const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
||||
const fs_reg &allow_sample_mask =
|
||||
inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
|
||||
assert(arg.file == IMM);
|
||||
assert(allow_sample_mask.file == IMM);
|
||||
|
||||
/* We must have exactly one of surface and surface_handle */
|
||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||
@@ -5404,8 +5407,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
surface.ud == GEN8_BTI_STATELESS_NON_COHERENT);
|
||||
|
||||
const bool has_side_effects = inst->has_side_effects();
|
||||
fs_reg sample_mask = has_side_effects ? sample_mask_reg(bld) :
|
||||
fs_reg(brw_imm_d(0xffff));
|
||||
|
||||
fs_reg sample_mask = allow_sample_mask.ud ? sample_mask_reg(bld) :
|
||||
fs_reg(brw_imm_d(0xffff));
|
||||
|
||||
/* From the BDW PRM Volume 7, page 147:
|
||||
*
|
||||
|
@@ -3774,6 +3774,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(3); /* num components */
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = brw_imm_ud(0);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
fs_inst *inst =
|
||||
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
|
||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||
@@ -3808,6 +3809,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[0]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
|
||||
/* Make dest unsigned because that's what the temporary will be */
|
||||
dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
|
||||
@@ -3844,6 +3846,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
fs_reg data = get_nir_src(instr->src[0]);
|
||||
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
|
||||
@@ -4125,6 +4128,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
if (instr->intrinsic == nir_intrinsic_image_load ||
|
||||
instr->intrinsic == nir_intrinsic_bindless_image_load) {
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
fs_inst *inst =
|
||||
bld.emit(SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
|
||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||
@@ -4133,6 +4137,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
instr->intrinsic == nir_intrinsic_bindless_image_store) {
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
|
||||
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[3]);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
|
||||
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||
} else {
|
||||
@@ -4155,6 +4160,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
data = tmp;
|
||||
}
|
||||
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
bld.emit(SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||
@@ -4214,6 +4220,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
|
||||
fs_inst *inst =
|
||||
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
|
||||
@@ -4230,6 +4237,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[2]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
|
||||
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||
@@ -4651,6 +4659,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
get_nir_ssbo_intrinsic_index(bld, instr);
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
|
||||
/* Make dest unsigned because that's what the temporary will be */
|
||||
dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
|
||||
@@ -4687,6 +4696,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
get_nir_ssbo_intrinsic_index(bld, instr);
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[2]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
fs_reg data = get_nir_src(instr->src[0]);
|
||||
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
|
||||
@@ -4825,6 +4835,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
const fs_reg nir_addr = get_nir_src(instr->src[0]);
|
||||
|
||||
/* Make dest unsigned because that's what the temporary will be */
|
||||
@@ -4870,6 +4881,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size);
|
||||
/**
|
||||
* While this instruction has side-effects, it should not be predicated
|
||||
* on sample mask, because otherwise fs helper invocations would
|
||||
* load undefined values from scratch memory. And scratch memory
|
||||
* load-stores are produced from operations without side-effects, thus
|
||||
* they should not have different behaviour in the helper invocations.
|
||||
*/
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
|
||||
const fs_reg nir_addr = get_nir_src(instr->src[1]);
|
||||
|
||||
fs_reg data = get_nir_src(instr->src[0]);
|
||||
@@ -5318,6 +5337,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
fs_reg data;
|
||||
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
||||
@@ -5350,6 +5370,7 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
fs_reg data = get_nir_src(instr->src[2]);
|
||||
if (op == BRW_AOP_FCMPWR) {
|
||||
@@ -5378,6 +5399,7 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
fs_reg data;
|
||||
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
||||
@@ -5419,6 +5441,7 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
|
||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||
|
||||
fs_reg data = get_nir_src(instr->src[1]);
|
||||
if (op == BRW_AOP_FCMPWR) {
|
||||
|
Reference in New Issue
Block a user