intel/fs: Disable sample mask predication for scratch stores

Scratch stores are being lowered to the instructions with side-effects,
however they should be enabled in fs helper invocations, since they
are produced from operations which don't imply side-effects.

To fix this - we move the decision of whether the sample mask predication
is enable to the point where logical brw instructions are created.

GLSL example of the issue:

 int tmp[1024];
 ...
 do {
   // changes to tmp
 } while (some_condition(tmp))

If `tmp` is lowered to scrach memory, `some_condition` would be
undefined if scratch write is predicated on sample mask, making
possible for the while loop to become infinite and hang the GPU.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3256
Fixes: 53bfcdeecf
Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6056>
This commit is contained in:
Danylo Piliaiev
2020-07-23 15:15:34 +03:00
committed by Marge Bot
parent cbef2dc7d3
commit 77486db867
3 changed files with 34 additions and 2 deletions

View File

@@ -904,6 +904,11 @@ enum surface_logical_srcs {
SURFACE_LOGICAL_SRC_IMM_DIMS,
/** Per-opcode immediate argument. For atomics, this is the atomic opcode */
SURFACE_LOGICAL_SRC_IMM_ARG,
/**
* Some instructions with side-effects should not be predicated on
* sample mask, e.g. lowered stores to scratch.
*/
SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK,
SURFACE_LOGICAL_NUM_SRCS
};

View File

@@ -5380,7 +5380,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
const fs_reg &allow_sample_mask =
inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
assert(arg.file == IMM);
assert(allow_sample_mask.file == IMM);
/* We must have exactly one of surface and surface_handle */
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
@@ -5404,8 +5407,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
surface.ud == GEN8_BTI_STATELESS_NON_COHERENT);
const bool has_side_effects = inst->has_side_effects();
fs_reg sample_mask = has_side_effects ? sample_mask_reg(bld) :
fs_reg(brw_imm_d(0xffff));
fs_reg sample_mask = allow_sample_mask.ud ? sample_mask_reg(bld) :
fs_reg(brw_imm_d(0xffff));
/* From the BDW PRM Volume 7, page 147:
*

View File

@@ -3774,6 +3774,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(3); /* num components */
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = brw_imm_ud(0);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
fs_inst *inst =
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -3808,6 +3809,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[0]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
/* Make dest unsigned because that's what the temporary will be */
dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -3844,6 +3846,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[0]);
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -4125,6 +4128,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (instr->intrinsic == nir_intrinsic_image_load ||
instr->intrinsic == nir_intrinsic_bindless_image_load) {
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
fs_inst *inst =
bld.emit(SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -4133,6 +4137,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
instr->intrinsic == nir_intrinsic_bindless_image_store) {
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[3]);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
} else {
@@ -4155,6 +4160,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
data = tmp;
}
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -4214,6 +4220,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
fs_inst *inst =
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
@@ -4230,6 +4237,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[2]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -4651,6 +4659,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
get_nir_ssbo_intrinsic_index(bld, instr);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
/* Make dest unsigned because that's what the temporary will be */
dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -4687,6 +4696,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
get_nir_ssbo_intrinsic_index(bld, instr);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[2]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[0]);
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -4825,6 +4835,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
const fs_reg nir_addr = get_nir_src(instr->src[0]);
/* Make dest unsigned because that's what the temporary will be */
@@ -4870,6 +4881,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size);
/**
* While this instruction has side-effects, it should not be predicated
* on sample mask, because otherwise fs helper invocations would
* load undefined values from scratch memory. And scratch memory
* load-stores are produced from operations without side-effects, thus
* they should not have different behaviour in the helper invocations.
*/
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
const fs_reg nir_addr = get_nir_src(instr->src[1]);
fs_reg data = get_nir_src(instr->src[0]);
@@ -5318,6 +5337,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
@@ -5350,6 +5370,7 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[2]);
if (op == BRW_AOP_FCMPWR) {
@@ -5378,6 +5399,7 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
@@ -5419,6 +5441,7 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[1]);
if (op == BRW_AOP_FCMPWR) {