intel/compiler: fine-grained control of dispatch widths

Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20535>
This commit is contained in:
Marcin Ślusarz
2023-01-05 15:39:28 +01:00
committed by Marge Bot
parent bf3112805c
commit bed18ab3e2
7 changed files with 189 additions and 14 deletions

View File

@@ -549,6 +549,42 @@ Intel driver environment variables
overrode shader with sha1 <SHA-1>" in stderr replacing the original overrode shader with sha1 <SHA-1>" in stderr replacing the original
assembly. assembly.
:envvar:`INTEL_SIMD_DEBUG`
a comma-separated list of named flags, which control simd dispatch widths:
``fs8``
allow generation of SIMD8 fragment shader
``fs16``
allow generation of SIMD16 fragment shader
``fs32``
allow generation of SIMD32 fragment shader
``cs8``
allow generation of SIMD8 compute shader
``cs16``
allow generation of SIMD16 compute shader
``cs32``
allow generation of SIMD32 compute shader
``ts8``
allow generation of SIMD8 task shader
``ts16``
allow generation of SIMD16 task shader
``ts32``
allow generation of SIMD32 task shader
``ms8``
allow generation of SIMD8 mesh shader
``ms16``
allow generation of SIMD16 mesh shader
``ms32``
allow generation of SIMD32 mesh shader
``rt8``
allow generation of SIMD8 ray-tracing shader
``rt16``
allow generation of SIMD16 ray-tracing shader
``rt32``
allow generation of SIMD32 ray-tracing shader
If none of widths for particular shader stage was specified, then all
widths are allowed.
DRI environment variables DRI environment variables
------------------------- -------------------------

View File

@@ -225,14 +225,29 @@ uint64_t
brw_get_compiler_config_value(const struct brw_compiler *compiler) brw_get_compiler_config_value(const struct brw_compiler *compiler)
{ {
uint64_t config = 0; uint64_t config = 0;
unsigned bits = 0;
insert_u64_bit(&config, compiler->precise_trig); insert_u64_bit(&config, compiler->precise_trig);
bits++;
uint64_t mask = DEBUG_DISK_CACHE_MASK; uint64_t mask = DEBUG_DISK_CACHE_MASK;
bits += util_bitcount64(mask);
while (mask != 0) { while (mask != 0) {
const uint64_t bit = 1ULL << (ffsll(mask) - 1); const uint64_t bit = 1ULL << (ffsll(mask) - 1);
insert_u64_bit(&config, INTEL_DEBUG(bit)); insert_u64_bit(&config, INTEL_DEBUG(bit));
mask &= ~bit; mask &= ~bit;
} }
mask = SIMD_DISK_CACHE_MASK;
bits += util_bitcount64(mask);
while (mask != 0) {
const uint64_t bit = 1ULL << (ffsll(mask) - 1);
insert_u64_bit(&config, (intel_simd & bit) != 0);
mask &= ~bit;
}
assert(bits <= util_bitcount64(UINT64_MAX));
return config; return config;
} }

View File

@@ -7551,7 +7551,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) { if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {
params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg); params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg);
return NULL; return NULL;
} else if (!INTEL_DEBUG(DEBUG_NO8)) { } else if (INTEL_SIMD(FS, 8)) {
simd8_cfg = v8->cfg; simd8_cfg = v8->cfg;
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs; prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs;
prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used); prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);
@@ -7565,7 +7565,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
* See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1917 * See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1917
*/ */
if (devinfo->ver == 8 && prog_data->dual_src_blend && if (devinfo->ver == 8 && prog_data->dual_src_blend &&
!INTEL_DEBUG(DEBUG_NO8)) { INTEL_SIMD(FS, 8)) {
assert(!params->use_rep_send); assert(!params->use_rep_send);
v8->limit_dispatch_width(8, "gfx8 workaround: " v8->limit_dispatch_width(8, "gfx8 workaround: "
"using SIMD8 when dual src blending.\n"); "using SIMD8 when dual src blending.\n");
@@ -7585,7 +7585,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (!has_spilled && if (!has_spilled &&
v8->max_dispatch_width >= 16 && v8->max_dispatch_width >= 16 &&
(!INTEL_DEBUG(DEBUG_NO16) || params->use_rep_send)) { (INTEL_SIMD(FS, 16) || params->use_rep_send)) {
/* Try a SIMD16 compile */ /* Try a SIMD16 compile */
v16 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base, v16 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base, nir, 16, &prog_data->base, nir, 16,
@@ -7612,7 +7612,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (!has_spilled && if (!has_spilled &&
v8->max_dispatch_width >= 32 && !params->use_rep_send && v8->max_dispatch_width >= 32 && !params->use_rep_send &&
devinfo->ver >= 6 && !simd16_failed && devinfo->ver >= 6 && !simd16_failed &&
!INTEL_DEBUG(DEBUG_NO32)) { INTEL_SIMD(FS, 32)) {
/* Try a SIMD32 compile */ /* Try a SIMD32 compile */
v32 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base, v32 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base, nir, 32, &prog_data->base, nir, 32,

View File

@@ -138,10 +138,33 @@ brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd)
return false; return false;
} }
static const bool env_skip[] = { uint64_t start;
INTEL_DEBUG(DEBUG_NO8) != 0, switch (cs_prog_data->base.stage) {
INTEL_DEBUG(DEBUG_NO16) != 0, case MESA_SHADER_COMPUTE:
INTEL_DEBUG(DEBUG_NO32) != 0, start = DEBUG_CS_SIMD8;
break;
case MESA_SHADER_TASK:
start = DEBUG_TS_SIMD8;
break;
case MESA_SHADER_MESH:
start = DEBUG_MS_SIMD8;
break;
case MESA_SHADER_RAYGEN:
case MESA_SHADER_ANY_HIT:
case MESA_SHADER_CLOSEST_HIT:
case MESA_SHADER_MISS:
case MESA_SHADER_INTERSECTION:
case MESA_SHADER_CALLABLE:
start = DEBUG_RT_SIMD8;
break;
default:
unreachable(!"unknown shader stage in brw_simd_should_compile");
}
const bool env_skip[] = {
(intel_simd & (start << 0)) == 0,
(intel_simd & (start << 1)) == 0,
(intel_simd & (start << 2)) == 0,
}; };
static_assert(ARRAY_SIZE(env_skip) == SIMD_COUNT); static_assert(ARRAY_SIZE(env_skip) == SIMD_COUNT);

View File

@@ -51,6 +51,7 @@ protected:
.prog_data = prog_data, .prog_data = prog_data,
} }
{ {
brw_process_intel_debug_variable();
} }
~SIMDSelectionTest() { ~SIMDSelectionTest() {

View File

@@ -41,6 +41,10 @@
uint64_t intel_debug = 0; uint64_t intel_debug = 0;
#define DEBUG_NO16 (1ull << 16)
#define DEBUG_NO8 (1ull << 20)
#define DEBUG_NO32 (1ull << 39)
static const struct debug_control debug_control[] = { static const struct debug_control debug_control[] = {
{ "tex", DEBUG_TEXTURE}, { "tex", DEBUG_TEXTURE},
{ "blit", DEBUG_BLIT}, { "blit", DEBUG_BLIT},
@@ -97,6 +101,26 @@ static const struct debug_control debug_control[] = {
{ NULL, 0 } { NULL, 0 }
}; };
uint64_t intel_simd = 0;
static const struct debug_control simd_control[] = {
{ "fs8", DEBUG_FS_SIMD8 },
{ "fs16", DEBUG_FS_SIMD16 },
{ "fs32", DEBUG_FS_SIMD32 },
{ "cs8", DEBUG_CS_SIMD8 },
{ "cs16", DEBUG_CS_SIMD16 },
{ "cs32", DEBUG_CS_SIMD32 },
{ "ts8", DEBUG_TS_SIMD8 },
{ "ts16", DEBUG_TS_SIMD16 },
{ "ts32", DEBUG_TS_SIMD32 },
{ "ms8", DEBUG_MS_SIMD8 },
{ "ms16", DEBUG_MS_SIMD16 },
{ "ms32", DEBUG_MS_SIMD32 },
{ "rt8", DEBUG_RT_SIMD8 },
{ "rt16", DEBUG_RT_SIMD16 },
{ "rt32", DEBUG_RT_SIMD32 },
};
uint64_t uint64_t
intel_debug_flag_for_shader_stage(gl_shader_stage stage) intel_debug_flag_for_shader_stage(gl_shader_stage stage)
{ {
@@ -122,10 +146,57 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
return flags[stage]; return flags[stage];
} }
#define DEBUG_FS_SIMD (DEBUG_FS_SIMD8 | DEBUG_FS_SIMD16 | DEBUG_FS_SIMD32)
#define DEBUG_CS_SIMD (DEBUG_CS_SIMD8 | DEBUG_CS_SIMD16 | DEBUG_CS_SIMD32)
#define DEBUG_TS_SIMD (DEBUG_TS_SIMD8 | DEBUG_TS_SIMD16 | DEBUG_TS_SIMD32)
#define DEBUG_MS_SIMD (DEBUG_MS_SIMD8 | DEBUG_MS_SIMD16 | DEBUG_MS_SIMD32)
#define DEBUG_RT_SIMD (DEBUG_RT_SIMD8 | DEBUG_RT_SIMD16 | DEBUG_RT_SIMD32)
#define DEBUG_SIMD8_ALL \
(DEBUG_FS_SIMD8 | \
DEBUG_CS_SIMD8 | \
DEBUG_TS_SIMD8 | \
DEBUG_MS_SIMD8 | \
DEBUG_RT_SIMD8)
#define DEBUG_SIMD16_ALL \
(DEBUG_FS_SIMD16 | \
DEBUG_CS_SIMD16 | \
DEBUG_TS_SIMD16 | \
DEBUG_MS_SIMD16 | \
DEBUG_RT_SIMD16)
#define DEBUG_SIMD32_ALL \
(DEBUG_FS_SIMD32 | \
DEBUG_CS_SIMD32 | \
DEBUG_TS_SIMD32 | \
DEBUG_MS_SIMD32 | \
DEBUG_RT_SIMD32)
static void static void
brw_process_intel_debug_variable_once(void) brw_process_intel_debug_variable_once(void)
{ {
intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control); intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
intel_simd = parse_debug_string(getenv("INTEL_SIMD_DEBUG"), simd_control);
if (!(intel_simd & DEBUG_FS_SIMD))
intel_simd |= DEBUG_FS_SIMD;
if (!(intel_simd & DEBUG_CS_SIMD))
intel_simd |= DEBUG_CS_SIMD;
if (!(intel_simd & DEBUG_TS_SIMD))
intel_simd |= DEBUG_TS_SIMD;
if (!(intel_simd & DEBUG_MS_SIMD))
intel_simd |= DEBUG_MS_SIMD;
if (!(intel_simd & DEBUG_RT_SIMD))
intel_simd |= DEBUG_RT_SIMD;
if (intel_debug & DEBUG_NO8)
intel_simd &= ~DEBUG_SIMD8_ALL;
if (intel_debug & DEBUG_NO16)
intel_simd &= ~DEBUG_SIMD16_ALL;
if (intel_debug & DEBUG_NO32)
intel_simd &= ~DEBUG_SIMD32_ALL;
intel_debug &= ~(DEBUG_NO8 | DEBUG_NO16 | DEBUG_NO32);
} }
void void

View File

@@ -61,11 +61,13 @@ extern uint64_t intel_debug;
#define DEBUG_CLIP (1ull << 13) #define DEBUG_CLIP (1ull << 13)
#define DEBUG_STALL (1ull << 14) #define DEBUG_STALL (1ull << 14)
#define DEBUG_BLORP (1ull << 15) #define DEBUG_BLORP (1ull << 15)
#define DEBUG_NO16 (1ull << 16) /* internal to intel_debug.c, replaced by INTEL_SIMD */
/*#define DEBUG_NO16 (1ull << 16)*/
#define DEBUG_NO_DUAL_OBJECT_GS (1ull << 17) #define DEBUG_NO_DUAL_OBJECT_GS (1ull << 17)
#define DEBUG_OPTIMIZER (1ull << 18) #define DEBUG_OPTIMIZER (1ull << 18)
#define DEBUG_ANNOTATION (1ull << 19) #define DEBUG_ANNOTATION (1ull << 19)
#define DEBUG_NO8 (1ull << 20) /* internal to intel_debug.c, replaced by INTEL_SIMD */
/*#define DEBUG_NO8 (1ull << 20)*/
#define DEBUG_NO_OACONFIG (1ull << 21) #define DEBUG_NO_OACONFIG (1ull << 21)
#define DEBUG_SPILL_FS (1ull << 22) #define DEBUG_SPILL_FS (1ull << 22)
#define DEBUG_SPILL_VEC4 (1ull << 23) #define DEBUG_SPILL_VEC4 (1ull << 23)
@@ -84,7 +86,8 @@ extern uint64_t intel_debug;
#define DEBUG_BT (1ull << 36) #define DEBUG_BT (1ull << 36)
#define DEBUG_PIPE_CONTROL (1ull << 37) #define DEBUG_PIPE_CONTROL (1ull << 37)
#define DEBUG_NO_FAST_CLEAR (1ull << 38) #define DEBUG_NO_FAST_CLEAR (1ull << 38)
#define DEBUG_NO32 (1ull << 39) /* internal to intel_debug.c, replaced by INTEL_SIMD */
/*#define DEBUG_NO32 (1ull << 39)*/
#define DEBUG_RT (1ull << 40) #define DEBUG_RT (1ull << 40)
#define DEBUG_TASK (1ull << 41) #define DEBUG_TASK (1ull << 41)
#define DEBUG_MESH (1ull << 42) #define DEBUG_MESH (1ull << 42)
@@ -97,9 +100,35 @@ extern uint64_t intel_debug;
/* These flags may affect program generation */ /* These flags may affect program generation */
#define DEBUG_DISK_CACHE_MASK \ #define DEBUG_DISK_CACHE_MASK \
(DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \ (DEBUG_NO_DUAL_OBJECT_GS | DEBUG_SPILL_FS | \
DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | \ DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64)
DEBUG_NO32)
extern uint64_t intel_simd;
#define INTEL_SIMD(type, size) (!!(intel_simd & (DEBUG_ ## type ## _SIMD ## size)))
/* VS, TCS, TES and GS stages are dispatched in one size */
#define DEBUG_FS_SIMD8 (1ull << 0)
#define DEBUG_FS_SIMD16 (1ull << 1)
#define DEBUG_FS_SIMD32 (1ull << 2)
#define DEBUG_CS_SIMD8 (1ull << 3)
#define DEBUG_CS_SIMD16 (1ull << 4)
#define DEBUG_CS_SIMD32 (1ull << 5)
#define DEBUG_TS_SIMD8 (1ull << 6)
#define DEBUG_TS_SIMD16 (1ull << 7)
#define DEBUG_TS_SIMD32 (1ull << 8)
#define DEBUG_MS_SIMD8 (1ull << 9)
#define DEBUG_MS_SIMD16 (1ull << 10)
#define DEBUG_MS_SIMD32 (1ull << 11)
#define DEBUG_RT_SIMD8 (1ull << 12)
#define DEBUG_RT_SIMD16 (1ull << 13)
#define DEBUG_RT_SIMD32 (1ull << 14)
#define SIMD_DISK_CACHE_MASK ((1ull << 15) - 1)
#ifdef HAVE_ANDROID_PLATFORM #ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA" #define LOG_TAG "INTEL-MESA"