radv: add RADV_DEBUG=splitfma
This splits application-provided FMA in vertex/geometry/tesselation/mesh shaders. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14458>
This commit is contained in:
@@ -651,6 +651,8 @@ RADV driver environment variables
|
|||||||
dump shader statistics
|
dump shader statistics
|
||||||
``spirv``
|
``spirv``
|
||||||
dump SPIR-V
|
dump SPIR-V
|
||||||
|
``splitfma``
|
||||||
|
split application-provided fused multiply-add in geometry stages
|
||||||
``startup``
|
``startup``
|
||||||
display info at startup
|
display info at startup
|
||||||
``syncshaders``
|
``syncshaders``
|
||||||
|
@@ -65,6 +65,7 @@ enum {
|
|||||||
RADV_DEBUG_NO_NGGC = 1ull << 34,
|
RADV_DEBUG_NO_NGGC = 1ull << 34,
|
||||||
RADV_DEBUG_DUMP_PROLOGS = 1ull << 35,
|
RADV_DEBUG_DUMP_PROLOGS = 1ull << 35,
|
||||||
RADV_DEBUG_NO_DMA_BLIT = 1ull << 36,
|
RADV_DEBUG_NO_DMA_BLIT = 1ull << 36,
|
||||||
|
RADV_DEBUG_SPLIT_FMA = 1ull << 37,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@@ -850,6 +850,7 @@ static const struct debug_control radv_debug_options[] = {
|
|||||||
{"img", RADV_DEBUG_IMG},
|
{"img", RADV_DEBUG_IMG},
|
||||||
{"noumr", RADV_DEBUG_NO_UMR},
|
{"noumr", RADV_DEBUG_NO_UMR},
|
||||||
{"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
|
{"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
|
||||||
|
{"splitfma", RADV_DEBUG_SPLIT_FMA},
|
||||||
{"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
|
{"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
|
||||||
{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
|
{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
|
||||||
{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
|
{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
|
||||||
@@ -909,6 +910,7 @@ static const driOptionDescription radv_dri_options[] = {
|
|||||||
DRI_CONF_RADV_ZERO_VRAM(false)
|
DRI_CONF_RADV_ZERO_VRAM(false)
|
||||||
DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
|
DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
|
||||||
DRI_CONF_RADV_INVARIANT_GEOM(false)
|
DRI_CONF_RADV_INVARIANT_GEOM(false)
|
||||||
|
DRI_CONF_RADV_SPLIT_FMA(false)
|
||||||
DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
|
DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
|
||||||
DRI_CONF_RADV_DISABLE_DCC(false)
|
DRI_CONF_RADV_DISABLE_DCC(false)
|
||||||
DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
|
DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
|
||||||
@@ -951,6 +953,9 @@ radv_init_dri_options(struct radv_instance *instance)
|
|||||||
if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
|
if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
|
||||||
instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
|
instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
|
||||||
|
|
||||||
|
if (driQueryOptionb(&instance->dri_options, "radv_split_fma"))
|
||||||
|
instance->debug_flags |= RADV_DEBUG_SPLIT_FMA;
|
||||||
|
|
||||||
if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
|
if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
|
||||||
instance->debug_flags |= RADV_DEBUG_NO_DCC;
|
instance->debug_flags |= RADV_DEBUG_NO_DCC;
|
||||||
|
|
||||||
|
@@ -290,6 +290,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
|
|||||||
hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS;
|
hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS;
|
||||||
if (device->robust_buffer_access2) /* affects load/store vectorizer */
|
if (device->robust_buffer_access2) /* affects load/store vectorizer */
|
||||||
hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2;
|
hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2;
|
||||||
|
if (device->instance->debug_flags & RADV_DEBUG_SPLIT_FMA)
|
||||||
|
hash_flags |= RADV_HASH_SHADER_SPLIT_FMA;
|
||||||
return hash_flags;
|
return hash_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -312,7 +312,7 @@ struct radv_physical_device {
|
|||||||
dev_t render_devid;
|
dev_t render_devid;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
nir_shader_compiler_options nir_options;
|
nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct radv_instance {
|
struct radv_instance {
|
||||||
@@ -1709,7 +1709,8 @@ struct radv_event {
|
|||||||
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
|
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
|
||||||
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
|
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
|
||||||
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
|
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
|
||||||
#define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16)
|
#define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16)
|
||||||
|
#define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
|
||||||
|
|
||||||
struct radv_pipeline_key;
|
struct radv_pipeline_key;
|
||||||
|
|
||||||
|
@@ -49,10 +49,12 @@
|
|||||||
#include "ac_llvm_util.h"
|
#include "ac_llvm_util.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void
|
static void
|
||||||
radv_get_nir_options(struct radv_physical_device *device)
|
get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage stage)
|
||||||
{
|
{
|
||||||
device->nir_options = (nir_shader_compiler_options){
|
bool split_fma = (stage <= MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH) &&
|
||||||
|
device->instance->debug_flags & RADV_DEBUG_SPLIT_FMA;
|
||||||
|
device->nir_options[stage] = (nir_shader_compiler_options){
|
||||||
.vertex_id_zero_based = true,
|
.vertex_id_zero_based = true,
|
||||||
.lower_scmp = true,
|
.lower_scmp = true,
|
||||||
.lower_flrp16 = true,
|
.lower_flrp16 = true,
|
||||||
@@ -77,9 +79,9 @@ radv_get_nir_options(struct radv_physical_device *device)
|
|||||||
.lower_unpack_unorm_2x16 = true,
|
.lower_unpack_unorm_2x16 = true,
|
||||||
.lower_unpack_unorm_4x8 = true,
|
.lower_unpack_unorm_4x8 = true,
|
||||||
.lower_unpack_half_2x16 = true,
|
.lower_unpack_half_2x16 = true,
|
||||||
.lower_ffma16 = device->rad_info.chip_class < GFX9,
|
.lower_ffma16 = split_fma || device->rad_info.chip_class < GFX9,
|
||||||
.lower_ffma32 = device->rad_info.chip_class < GFX10_3,
|
.lower_ffma32 = split_fma || device->rad_info.chip_class < GFX10_3,
|
||||||
.lower_ffma64 = false,
|
.lower_ffma64 = split_fma,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_mul_2x32_64 = true,
|
.lower_mul_2x32_64 = true,
|
||||||
.lower_rotate = true,
|
.lower_rotate = true,
|
||||||
@@ -103,6 +105,13 @@ radv_get_nir_options(struct radv_physical_device *device)
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
radv_get_nir_options(struct radv_physical_device *device)
|
||||||
|
{
|
||||||
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_VULKAN_SHADER_STAGES; stage++)
|
||||||
|
get_nir_options_for_stage(device, stage);
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_meta_shader(nir_shader *nir)
|
is_meta_shader(nir_shader *nir)
|
||||||
{
|
{
|
||||||
@@ -464,7 +473,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
|
|||||||
* and just use the NIR shader. We don't want to alter meta and RT
|
* and just use the NIR shader. We don't want to alter meta and RT
|
||||||
* shaders IR directly, so clone it first. */
|
* shaders IR directly, so clone it first. */
|
||||||
nir = nir_shader_clone(NULL, module->nir);
|
nir = nir_shader_clone(NULL, module->nir);
|
||||||
nir->options = &device->physical_device->nir_options;
|
nir->options = &device->physical_device->nir_options[stage];
|
||||||
nir_validate_shader(nir, "in internal shader");
|
nir_validate_shader(nir, "in internal shader");
|
||||||
|
|
||||||
assert(exec_list_length(&nir->functions) == 1);
|
assert(exec_list_length(&nir->functions) == 1);
|
||||||
@@ -558,7 +567,8 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
nir = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, stage,
|
nir = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, stage,
|
||||||
entrypoint_name, &spirv_options, &device->physical_device->nir_options);
|
entrypoint_name, &spirv_options,
|
||||||
|
&device->physical_device->nir_options[stage]);
|
||||||
assert(nir->info.stage == stage);
|
assert(nir->info.stage == stage);
|
||||||
nir_validate_shader(nir, "after spirv_to_nir");
|
nir_validate_shader(nir, "after spirv_to_nir");
|
||||||
|
|
||||||
|
@@ -544,6 +544,10 @@
|
|||||||
DRI_CONF_OPT_B(radv_invariant_geom, def, \
|
DRI_CONF_OPT_B(radv_invariant_geom, def, \
|
||||||
"Mark geometry-affecting outputs as invariant")
|
"Mark geometry-affecting outputs as invariant")
|
||||||
|
|
||||||
|
#define DRI_CONF_RADV_SPLIT_FMA(def) \
|
||||||
|
DRI_CONF_OPT_B(radv_split_fma, def, \
|
||||||
|
"Split application-provided fused multiply-add in geometry stages")
|
||||||
|
|
||||||
#define DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(def) \
|
#define DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(def) \
|
||||||
DRI_CONF_OPT_B(radv_disable_tc_compat_htile_general, def, \
|
DRI_CONF_OPT_B(radv_disable_tc_compat_htile_general, def, \
|
||||||
"Disable TC-compat HTILE in GENERAL layout")
|
"Disable TC-compat HTILE in GENERAL layout")
|
||||||
|
Reference in New Issue
Block a user