From 432e263284a779f23e69f1b1201a3e7c730d021c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20=C5=9Alusarz?= Date: Sat, 21 Jan 2023 12:49:44 +0100 Subject: [PATCH] intel/compiler: fine-grained control of dispatch widths Reviewed-by: Matt Turner Reviewed-by: Lionel Landwerlin [v2] Part-of: --- docs/envvars.rst | 37 +++++++++++ src/intel/compiler/brw_compiler.c | 15 +++++ src/intel/compiler/brw_fs.cpp | 8 +-- src/intel/compiler/brw_simd_selection.cpp | 43 +++++++++++-- src/intel/compiler/test_simd_selection.cpp | 1 + src/intel/dev/intel_debug.c | 72 ++++++++++++++++++++++ src/intel/dev/intel_debug.h | 38 ++++++++++-- 7 files changed, 200 insertions(+), 14 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index 0058f10b6db..d5c6cd4b7df 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -621,6 +621,43 @@ Intel driver environment variables overrode shader with sha1 " in stderr replacing the original assembly. +.. envvar:: INTEL_SIMD_DEBUG + + a comma-separated list of named flags, which control simd dispatch widths: + + ``fs8`` + allow generation of SIMD8 fragment shader + ``fs16`` + allow generation of SIMD16 fragment shader + ``fs32`` + allow generation of SIMD32 fragment shader + ``cs8`` + allow generation of SIMD8 compute shader + ``cs16`` + allow generation of SIMD16 compute shader + ``cs32`` + allow generation of SIMD32 compute shader + ``ts8`` + allow generation of SIMD8 task shader + ``ts16`` + allow generation of SIMD16 task shader + ``ts32`` + allow generation of SIMD32 task shader + ``ms8`` + allow generation of SIMD8 mesh shader + ``ms16`` + allow generation of SIMD16 mesh shader + ``ms32`` + allow generation of SIMD32 mesh shader + ``rt8`` + allow generation of SIMD8 ray-tracing shader + ``rt16`` + allow generation of SIMD16 ray-tracing shader + ``rt32`` + allow generation of SIMD32 ray-tracing shader + + If none of widths for particular shader stage was specified, then all + widths are allowed. DRI environment variables ------------------------- diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 1cfc23842ea..2daadcb1b58 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -225,14 +225,29 @@ uint64_t brw_get_compiler_config_value(const struct brw_compiler *compiler) { uint64_t config = 0; + unsigned bits = 0; + insert_u64_bit(&config, compiler->precise_trig); + bits++; uint64_t mask = DEBUG_DISK_CACHE_MASK; + bits += util_bitcount64(mask); while (mask != 0) { const uint64_t bit = 1ULL << (ffsll(mask) - 1); insert_u64_bit(&config, INTEL_DEBUG(bit)); mask &= ~bit; } + + mask = SIMD_DISK_CACHE_MASK; + bits += util_bitcount64(mask); + while (mask != 0) { + const uint64_t bit = 1ULL << (ffsll(mask) - 1); + insert_u64_bit(&config, (intel_simd & bit) != 0); + mask &= ~bit; + } + + assert(bits <= util_bitcount64(UINT64_MAX)); + return config; } diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index e1d4420b23b..33289af0db1 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7394,7 +7394,7 @@ brw_compile_fs(const struct brw_compiler *compiler, if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) { params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg); return NULL; - } else if (!INTEL_DEBUG(DEBUG_NO8)) { + } else if (INTEL_SIMD(FS, 8)) { simd8_cfg = v8->cfg; prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs; prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used); @@ -7408,7 +7408,7 @@ brw_compile_fs(const struct brw_compiler *compiler, * See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1917 */ if (devinfo->ver == 8 && prog_data->dual_src_blend && - !INTEL_DEBUG(DEBUG_NO8)) { + INTEL_SIMD(FS, 8)) { assert(!params->use_rep_send); v8->limit_dispatch_width(8, "gfx8 workaround: " "using SIMD8 when dual src blending.\n"); @@ -7428,7 +7428,7 @@ brw_compile_fs(const struct brw_compiler *compiler, if (!has_spilled && v8->max_dispatch_width >= 16 && - (!INTEL_DEBUG(DEBUG_NO16) || params->use_rep_send)) { + (INTEL_SIMD(FS, 16) || params->use_rep_send)) { /* Try a SIMD16 compile */ v16 = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base, nir, 16, @@ -7455,7 +7455,7 @@ brw_compile_fs(const struct brw_compiler *compiler, if (!has_spilled && v8->max_dispatch_width >= 32 && !params->use_rep_send && devinfo->ver >= 6 && !simd16_failed && - !INTEL_DEBUG(DEBUG_NO32)) { + INTEL_SIMD(FS, 32)) { /* Try a SIMD32 compile */ v32 = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base, nir, 32, diff --git a/src/intel/compiler/brw_simd_selection.cpp b/src/intel/compiler/brw_simd_selection.cpp index a3b63795242..1515e538b74 100644 --- a/src/intel/compiler/brw_simd_selection.cpp +++ b/src/intel/compiler/brw_simd_selection.cpp @@ -57,6 +57,17 @@ get_cs_prog_data(brw_simd_selection_state &state) return nullptr; } +struct brw_stage_prog_data * +get_prog_data(brw_simd_selection_state &state) +{ + if (std::holds_alternative(state.prog_data)) + return &std::get(state.prog_data)->base; + else if (std::holds_alternative(state.prog_data)) + return &std::get(state.prog_data)->base; + else + return nullptr; +} + } bool @@ -66,6 +77,7 @@ brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd) assert(!state.compiled[simd]); const auto cs_prog_data = get_cs_prog_data(state); + const auto prog_data = get_prog_data(state); const unsigned width = 8u << simd; /* For shaders with variable size workgroup, in most cases we can compile @@ -138,10 +150,33 @@ brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd) return false; } - static const bool env_skip[] = { - INTEL_DEBUG(DEBUG_NO8) != 0, - INTEL_DEBUG(DEBUG_NO16) != 0, - INTEL_DEBUG(DEBUG_NO32) != 0, + uint64_t start; + switch (prog_data->stage) { + case MESA_SHADER_COMPUTE: + start = DEBUG_CS_SIMD8; + break; + case MESA_SHADER_TASK: + start = DEBUG_TS_SIMD8; + break; + case MESA_SHADER_MESH: + start = DEBUG_MS_SIMD8; + break; + case MESA_SHADER_RAYGEN: + case MESA_SHADER_ANY_HIT: + case MESA_SHADER_CLOSEST_HIT: + case MESA_SHADER_MISS: + case MESA_SHADER_INTERSECTION: + case MESA_SHADER_CALLABLE: + start = DEBUG_RT_SIMD8; + break; + default: + unreachable("unknown shader stage in brw_simd_should_compile"); + } + + const bool env_skip[] = { + (intel_simd & (start << 0)) == 0, + (intel_simd & (start << 1)) == 0, + (intel_simd & (start << 2)) == 0, }; static_assert(ARRAY_SIZE(env_skip) == SIMD_COUNT); diff --git a/src/intel/compiler/test_simd_selection.cpp b/src/intel/compiler/test_simd_selection.cpp index 079e2a94f7e..abd3ce070db 100644 --- a/src/intel/compiler/test_simd_selection.cpp +++ b/src/intel/compiler/test_simd_selection.cpp @@ -51,6 +51,7 @@ protected: .prog_data = prog_data, } { + brw_process_intel_debug_variable(); } ~SIMDSelectionTest() { diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index 1fafaaecaa4..7650a62956d 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -41,6 +41,10 @@ uint64_t intel_debug = 0; +#define DEBUG_NO16 (1ull << 16) +#define DEBUG_NO8 (1ull << 20) +#define DEBUG_NO32 (1ull << 39) + static const struct debug_control debug_control[] = { { "tex", DEBUG_TEXTURE}, { "blit", DEBUG_BLIT}, @@ -97,6 +101,27 @@ static const struct debug_control debug_control[] = { { NULL, 0 } }; +uint64_t intel_simd = 0; + +static const struct debug_control simd_control[] = { + { "fs8", DEBUG_FS_SIMD8 }, + { "fs16", DEBUG_FS_SIMD16 }, + { "fs32", DEBUG_FS_SIMD32 }, + { "cs8", DEBUG_CS_SIMD8 }, + { "cs16", DEBUG_CS_SIMD16 }, + { "cs32", DEBUG_CS_SIMD32 }, + { "ts8", DEBUG_TS_SIMD8 }, + { "ts16", DEBUG_TS_SIMD16 }, + { "ts32", DEBUG_TS_SIMD32 }, + { "ms8", DEBUG_MS_SIMD8 }, + { "ms16", DEBUG_MS_SIMD16 }, + { "ms32", DEBUG_MS_SIMD32 }, + { "rt8", DEBUG_RT_SIMD8 }, + { "rt16", DEBUG_RT_SIMD16 }, + { "rt32", DEBUG_RT_SIMD32 }, + { NULL, 0 } +}; + uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage) { @@ -122,10 +147,57 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) return flags[stage]; } +#define DEBUG_FS_SIMD (DEBUG_FS_SIMD8 | DEBUG_FS_SIMD16 | DEBUG_FS_SIMD32) +#define DEBUG_CS_SIMD (DEBUG_CS_SIMD8 | DEBUG_CS_SIMD16 | DEBUG_CS_SIMD32) +#define DEBUG_TS_SIMD (DEBUG_TS_SIMD8 | DEBUG_TS_SIMD16 | DEBUG_TS_SIMD32) +#define DEBUG_MS_SIMD (DEBUG_MS_SIMD8 | DEBUG_MS_SIMD16 | DEBUG_MS_SIMD32) +#define DEBUG_RT_SIMD (DEBUG_RT_SIMD8 | DEBUG_RT_SIMD16 | DEBUG_RT_SIMD32) + +#define DEBUG_SIMD8_ALL \ + (DEBUG_FS_SIMD8 | \ + DEBUG_CS_SIMD8 | \ + DEBUG_TS_SIMD8 | \ + DEBUG_MS_SIMD8 | \ + DEBUG_RT_SIMD8) + +#define DEBUG_SIMD16_ALL \ + (DEBUG_FS_SIMD16 | \ + DEBUG_CS_SIMD16 | \ + DEBUG_TS_SIMD16 | \ + DEBUG_MS_SIMD16 | \ + DEBUG_RT_SIMD16) + +#define DEBUG_SIMD32_ALL \ + (DEBUG_FS_SIMD32 | \ + DEBUG_CS_SIMD32 | \ + DEBUG_TS_SIMD32 | \ + DEBUG_MS_SIMD32 | \ + DEBUG_RT_SIMD32) + static void brw_process_intel_debug_variable_once(void) { intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control); + intel_simd = parse_debug_string(getenv("INTEL_SIMD_DEBUG"), simd_control); + + if (!(intel_simd & DEBUG_FS_SIMD)) + intel_simd |= DEBUG_FS_SIMD; + if (!(intel_simd & DEBUG_CS_SIMD)) + intel_simd |= DEBUG_CS_SIMD; + if (!(intel_simd & DEBUG_TS_SIMD)) + intel_simd |= DEBUG_TS_SIMD; + if (!(intel_simd & DEBUG_MS_SIMD)) + intel_simd |= DEBUG_MS_SIMD; + if (!(intel_simd & DEBUG_RT_SIMD)) + intel_simd |= DEBUG_RT_SIMD; + + if (intel_debug & DEBUG_NO8) + intel_simd &= ~DEBUG_SIMD8_ALL; + if (intel_debug & DEBUG_NO16) + intel_simd &= ~DEBUG_SIMD16_ALL; + if (intel_debug & DEBUG_NO32) + intel_simd &= ~DEBUG_SIMD32_ALL; + intel_debug &= ~(DEBUG_NO8 | DEBUG_NO16 | DEBUG_NO32); } void diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index 14b32b54e5f..9afde9780b4 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -61,11 +61,11 @@ extern uint64_t intel_debug; #define DEBUG_CLIP (1ull << 13) #define DEBUG_STALL (1ull << 14) #define DEBUG_BLORP (1ull << 15) -#define DEBUG_NO16 (1ull << 16) +/* reserved (1ull << 16) */ #define DEBUG_NO_DUAL_OBJECT_GS (1ull << 17) #define DEBUG_OPTIMIZER (1ull << 18) #define DEBUG_ANNOTATION (1ull << 19) -#define DEBUG_NO8 (1ull << 20) +/* reserved (1ull << 20) */ #define DEBUG_NO_OACONFIG (1ull << 21) #define DEBUG_SPILL_FS (1ull << 22) #define DEBUG_SPILL_VEC4 (1ull << 23) @@ -84,7 +84,7 @@ extern uint64_t intel_debug; #define DEBUG_BT (1ull << 36) #define DEBUG_PIPE_CONTROL (1ull << 37) #define DEBUG_NO_FAST_CLEAR (1ull << 38) -#define DEBUG_NO32 (1ull << 39) +/* reserved (1ull << 39) */ #define DEBUG_RT (1ull << 40) #define DEBUG_TASK (1ull << 41) #define DEBUG_MESH (1ull << 42) @@ -97,9 +97,35 @@ extern uint64_t intel_debug; /* These flags may affect program generation */ #define DEBUG_DISK_CACHE_MASK \ - (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \ - DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | \ - DEBUG_NO32) + (DEBUG_NO_DUAL_OBJECT_GS | DEBUG_SPILL_FS | \ + DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64) + +extern uint64_t intel_simd; + +#define INTEL_SIMD(type, size) (!!(intel_simd & (DEBUG_ ## type ## _SIMD ## size))) + +/* VS, TCS, TES and GS stages are dispatched in one size */ +#define DEBUG_FS_SIMD8 (1ull << 0) +#define DEBUG_FS_SIMD16 (1ull << 1) +#define DEBUG_FS_SIMD32 (1ull << 2) + +#define DEBUG_CS_SIMD8 (1ull << 3) +#define DEBUG_CS_SIMD16 (1ull << 4) +#define DEBUG_CS_SIMD32 (1ull << 5) + +#define DEBUG_TS_SIMD8 (1ull << 6) +#define DEBUG_TS_SIMD16 (1ull << 7) +#define DEBUG_TS_SIMD32 (1ull << 8) + +#define DEBUG_MS_SIMD8 (1ull << 9) +#define DEBUG_MS_SIMD16 (1ull << 10) +#define DEBUG_MS_SIMD32 (1ull << 11) + +#define DEBUG_RT_SIMD8 (1ull << 12) +#define DEBUG_RT_SIMD16 (1ull << 13) +#define DEBUG_RT_SIMD32 (1ull << 14) + +#define SIMD_DISK_CACHE_MASK ((1ull << 15) - 1) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA"