radv: Move more stuff to variant create time.
Due to them depending on the linker result. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
@@ -3886,70 +3886,12 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
|
|||||||
fprintf(stderr, "compile failed\n");
|
fprintf(stderr, "compile failed\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options->dump_shader)
|
|
||||||
fprintf(stderr, "disasm:\n%s\n", binary.disasm_string);
|
|
||||||
|
|
||||||
ac_shader_binary_read_config(&binary, &config, 0, options->supports_spill);
|
ac_shader_binary_read_config(&binary, &config, 0, options->supports_spill);
|
||||||
|
|
||||||
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
|
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
|
||||||
LLVMDisposeModule(llvm_module);
|
LLVMDisposeModule(llvm_module);
|
||||||
LLVMContextDispose(ctx);
|
LLVMContextDispose(ctx);
|
||||||
|
|
||||||
if (stage == MESA_SHADER_FRAGMENT) {
|
|
||||||
shader_info->num_input_vgprs = 0;
|
|
||||||
if (G_0286CC_PERSP_SAMPLE_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 2;
|
|
||||||
if (G_0286CC_PERSP_CENTER_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 2;
|
|
||||||
if (G_0286CC_PERSP_CENTROID_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 2;
|
|
||||||
if (G_0286CC_PERSP_PULL_MODEL_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 3;
|
|
||||||
if (G_0286CC_LINEAR_SAMPLE_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 2;
|
|
||||||
if (G_0286CC_LINEAR_CENTER_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 2;
|
|
||||||
if (G_0286CC_LINEAR_CENTROID_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 2;
|
|
||||||
if (G_0286CC_LINE_STIPPLE_TEX_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_POS_X_FLOAT_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_POS_Y_FLOAT_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_POS_Z_FLOAT_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_POS_W_FLOAT_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_FRONT_FACE_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_ANCILLARY_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_SAMPLE_COVERAGE_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
if (G_0286CC_POS_FIXED_PT_ENA(config.spi_ps_input_addr))
|
|
||||||
shader_info->num_input_vgprs += 1;
|
|
||||||
}
|
|
||||||
config.num_vgprs = MAX2(config.num_vgprs, shader_info->num_input_vgprs);
|
|
||||||
|
|
||||||
/* +3 for scratch wave offset and VCC */
|
|
||||||
config.num_sgprs = MAX2(config.num_sgprs,
|
|
||||||
shader_info->num_input_sgprs + 3);
|
|
||||||
|
|
||||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
|
||||||
* cost.
|
|
||||||
*
|
|
||||||
* If denormals are enabled, all floating-point output modifiers are
|
|
||||||
* ignored.
|
|
||||||
*
|
|
||||||
* Don't enable denormals for 32-bit floats, because:
|
|
||||||
* - Floating-point output modifiers would be ignored by the hw.
|
|
||||||
* - Some opcodes don't support denormals, such as v_mad_f32. We would
|
|
||||||
* have to stop using those.
|
|
||||||
* - GFX6 & GFX7 would be very slow.
|
|
||||||
*/
|
|
||||||
config.float_mode |= V_00B028_FP_64_DENORMS;
|
|
||||||
|
|
||||||
size_t disasm_size = binary.disasm_string ? strlen(binary.disasm_string) : 0;
|
size_t disasm_size = binary.disasm_string ? strlen(binary.disasm_string) : 0;
|
||||||
size_t llvm_ir_size = binary.llvm_ir_string ? strlen(binary.llvm_ir_string) : 0;
|
size_t llvm_ir_size = binary.llvm_ir_string ? strlen(binary.llvm_ir_string) : 0;
|
||||||
size_t alloc_size = sizeof(struct radv_shader_binary_legacy) + binary.code_size +
|
size_t alloc_size = sizeof(struct radv_shader_binary_legacy) + binary.code_size +
|
||||||
|
@@ -478,8 +478,65 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
|
|||||||
{
|
{
|
||||||
bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
|
bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
|
||||||
unsigned vgpr_comp_cnt = 0;
|
unsigned vgpr_comp_cnt = 0;
|
||||||
|
unsigned num_input_vgprs = info->num_input_vgprs;
|
||||||
|
|
||||||
|
if (stage == MESA_SHADER_FRAGMENT) {
|
||||||
|
num_input_vgprs = 0;
|
||||||
|
if (G_0286CC_PERSP_SAMPLE_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 2;
|
||||||
|
if (G_0286CC_PERSP_CENTER_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 2;
|
||||||
|
if (G_0286CC_PERSP_CENTROID_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 2;
|
||||||
|
if (G_0286CC_PERSP_PULL_MODEL_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 3;
|
||||||
|
if (G_0286CC_LINEAR_SAMPLE_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 2;
|
||||||
|
if (G_0286CC_LINEAR_CENTER_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 2;
|
||||||
|
if (G_0286CC_LINEAR_CENTROID_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 2;
|
||||||
|
if (G_0286CC_LINE_STIPPLE_TEX_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_POS_X_FLOAT_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_POS_Y_FLOAT_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_POS_Z_FLOAT_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_POS_W_FLOAT_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_FRONT_FACE_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_ANCILLARY_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_SAMPLE_COVERAGE_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
if (G_0286CC_POS_FIXED_PT_ENA(config_in->spi_ps_input_addr))
|
||||||
|
num_input_vgprs += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
|
||||||
|
/* +3 for scratch wave offset and VCC */
|
||||||
|
unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
|
||||||
|
|
||||||
*config_out = *config_in;
|
*config_out = *config_in;
|
||||||
|
config_out->num_vgprs = num_vgprs;
|
||||||
|
config_out->num_sgprs = num_sgprs;
|
||||||
|
|
||||||
|
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||||
|
* cost.
|
||||||
|
*
|
||||||
|
* If denormals are enabled, all floating-point output modifiers are
|
||||||
|
* ignored.
|
||||||
|
*
|
||||||
|
* Don't enable denormals for 32-bit floats, because:
|
||||||
|
* - Floating-point output modifiers would be ignored by the hw.
|
||||||
|
* - Some opcodes don't support denormals, such as v_mad_f32. We would
|
||||||
|
* have to stop using those.
|
||||||
|
* - GFX6 & GFX7 would be very slow.
|
||||||
|
*/
|
||||||
|
config_out->float_mode |= V_00B028_FP_64_DENORMS;
|
||||||
|
|
||||||
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
|
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
|
||||||
S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) |
|
S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) |
|
||||||
@@ -490,10 +547,10 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
|
|||||||
S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) |
|
S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) |
|
||||||
S_00B12C_SO_EN(!!info->info.so.num_outputs);
|
S_00B12C_SO_EN(!!info->info.so.num_outputs);
|
||||||
|
|
||||||
config_out->rsrc1 = S_00B848_VGPRS((config_in->num_vgprs - 1) / 4) |
|
config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / 4) |
|
||||||
S_00B848_SGPRS((config_in->num_sgprs - 1) / 8) |
|
S_00B848_SGPRS((num_sgprs - 1) / 8) |
|
||||||
S_00B848_DX10_CLAMP(1) |
|
S_00B848_DX10_CLAMP(1) |
|
||||||
S_00B848_FLOAT_MODE(config_in->float_mode);
|
S_00B848_FLOAT_MODE(config_out->float_mode);
|
||||||
|
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case MESA_SHADER_TESS_EVAL:
|
case MESA_SHADER_TESS_EVAL:
|
||||||
@@ -807,6 +864,11 @@ shader_variant_compile(struct radv_device *device,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (options->dump_shader) {
|
||||||
|
fprintf(stderr, "disasm:\n%s\n", variant->disasm_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (device->keep_shader_info) {
|
if (device->keep_shader_info) {
|
||||||
if (!gs_copy_shader && !module->nir) {
|
if (!gs_copy_shader && !module->nir) {
|
||||||
variant->nir = *shaders;
|
variant->nir = *shaders;
|
||||||
|
Reference in New Issue
Block a user