ac: unify denorm setting enforcement
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4196>
This commit is contained in:
@@ -58,11 +58,13 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
|||||||
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
||||||
|
|
||||||
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
||||||
|
/* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
|
||||||
conf->float_mode = G_00B028_FLOAT_MODE(value);
|
conf->float_mode = G_00B028_FLOAT_MODE(value);
|
||||||
conf->rsrc1 = value;
|
conf->rsrc1 = value;
|
||||||
break;
|
break;
|
||||||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
||||||
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
||||||
|
/* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
|
||||||
conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
|
conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
|
||||||
conf->rsrc2 = value;
|
conf->rsrc2 = value;
|
||||||
break;
|
break;
|
||||||
@@ -124,4 +126,15 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
|||||||
/* sgprs spills aren't spilling */
|
/* sgprs spills aren't spilling */
|
||||||
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
|
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||||
|
* cost.
|
||||||
|
*
|
||||||
|
* Don't enable denormals for 32-bit floats, because:
|
||||||
|
* - denormals disable output modifiers
|
||||||
|
* - denormals break v_mad_f32
|
||||||
|
* - GFX6 & GFX7 would be very slow
|
||||||
|
*/
|
||||||
|
conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
|
||||||
|
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
||||||
}
|
}
|
||||||
|
@@ -966,20 +966,6 @@ radv_shader_variant_create(struct radv_device *device,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
|
||||||
* cost.
|
|
||||||
*
|
|
||||||
* If denormals are enabled, all floating-point output modifiers are
|
|
||||||
* ignored.
|
|
||||||
*
|
|
||||||
* Don't enable denormals for 32-bit floats, because:
|
|
||||||
* - Floating-point output modifiers would be ignored by the hw.
|
|
||||||
* - Some opcodes don't support denormals, such as v_mad_f32. We would
|
|
||||||
* have to stop using those.
|
|
||||||
* - GFX6 & GFX7 would be very slow.
|
|
||||||
*/
|
|
||||||
config.float_mode |= V_00B028_FP_64_DENORMS;
|
|
||||||
|
|
||||||
if (rtld_binary.lds_size > 0) {
|
if (rtld_binary.lds_size > 0) {
|
||||||
unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256;
|
unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256;
|
||||||
config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity;
|
config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity;
|
||||||
|
@@ -130,24 +130,7 @@ bool si_compile_llvm(struct si_screen *sscreen,
|
|||||||
|
|
||||||
bool ok = ac_rtld_read_config(&rtld, conf);
|
bool ok = ac_rtld_read_config(&rtld, conf);
|
||||||
ac_rtld_close(&rtld);
|
ac_rtld_close(&rtld);
|
||||||
if (!ok)
|
return ok;
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
|
||||||
* cost.
|
|
||||||
*
|
|
||||||
* If denormals are enabled, all floating-point output modifiers are
|
|
||||||
* ignored.
|
|
||||||
*
|
|
||||||
* Don't enable denormals for 32-bit floats, because:
|
|
||||||
* - Floating-point output modifiers would be ignored by the hw.
|
|
||||||
* - Some opcodes don't support denormals, such as v_mad_f32. We would
|
|
||||||
* have to stop using those.
|
|
||||||
* - GFX6 & GFX7 would be very slow.
|
|
||||||
*/
|
|
||||||
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_llvm_context_init(struct si_shader_context *ctx,
|
void si_llvm_context_init(struct si_shader_context *ctx,
|
||||||
|
Reference in New Issue
Block a user