ac/llvm: set target features per function instead of per target machine
This is a cleanup that allows the removal of the wave32 target machine and the wave32 pass manager. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10813>
This commit is contained in:
@@ -69,8 +69,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
|
|||||||
ctx->wave_size = wave_size;
|
ctx->wave_size = wave_size;
|
||||||
ctx->ballot_mask_bits = ballot_mask_bits;
|
ctx->ballot_mask_bits = ballot_mask_bits;
|
||||||
ctx->float_mode = float_mode;
|
ctx->float_mode = float_mode;
|
||||||
ctx->module =
|
ctx->module = ac_create_module(compiler->tm, ctx->context);
|
||||||
ac_create_module(wave_size == 32 ? compiler->tm_wave32 : compiler->tm, ctx->context);
|
|
||||||
ctx->builder = ac_create_builder(ctx->context, float_mode);
|
ctx->builder = ac_create_builder(ctx->context, float_mode);
|
||||||
|
|
||||||
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
|
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
|
||||||
|
@@ -188,18 +188,11 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
|||||||
const char **out_triple)
|
const char **out_triple)
|
||||||
{
|
{
|
||||||
assert(family >= CHIP_TAHITI);
|
assert(family >= CHIP_TAHITI);
|
||||||
char features[256];
|
|
||||||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
||||||
LLVMTargetRef target = ac_get_llvm_target(triple);
|
LLVMTargetRef target = ac_get_llvm_target(triple);
|
||||||
|
|
||||||
snprintf(features, sizeof(features), "+DumpCode%s%s",
|
|
||||||
family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32)
|
|
||||||
? ",+wavefrontsize64,-wavefrontsize32"
|
|
||||||
: "",
|
|
||||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
|
||||||
|
|
||||||
LLVMTargetMachineRef tm =
|
LLVMTargetMachineRef tm =
|
||||||
LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level,
|
LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), "", level,
|
||||||
LLVMRelocDefault, LLVMCodeModelDefault);
|
LLVMRelocDefault, LLVMCodeModelDefault);
|
||||||
|
|
||||||
if (out_triple)
|
if (out_triple)
|
||||||
@@ -317,6 +310,20 @@ void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
|
|||||||
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
|
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
char features[2048];
|
||||||
|
|
||||||
|
snprintf(features, sizeof(features), "+DumpCode%s%s",
|
||||||
|
/* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */
|
||||||
|
ctx->chip_class == GFX9 ? ",-promote-alloca" : "",
|
||||||
|
/* Wave32 is the default. */
|
||||||
|
ctx->chip_class >= GFX10 && ctx->wave_size == 64 ?
|
||||||
|
",+wavefrontsize64,-wavefrontsize32" : "");
|
||||||
|
|
||||||
|
LLVMAddTargetDependentFunctionAttr(F, "target-features", features);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned ac_count_scratch_private_memory(LLVMValueRef function)
|
unsigned ac_count_scratch_private_memory(LLVMValueRef function)
|
||||||
{
|
{
|
||||||
unsigned private_mem_vgprs = 0;
|
unsigned private_mem_vgprs = 0;
|
||||||
@@ -362,14 +369,6 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (family >= CHIP_NAVI10) {
|
|
||||||
assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
|
|
||||||
compiler->tm_wave32 =
|
|
||||||
ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL);
|
|
||||||
if (!compiler->tm_wave32)
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
compiler->target_library_info = ac_create_target_library_info(triple);
|
compiler->target_library_info = ac_create_target_library_info(triple);
|
||||||
if (!compiler->target_library_info)
|
if (!compiler->target_library_info)
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -388,7 +387,6 @@ fail:
|
|||||||
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
||||||
{
|
{
|
||||||
ac_destroy_llvm_passes(compiler->passes);
|
ac_destroy_llvm_passes(compiler->passes);
|
||||||
ac_destroy_llvm_passes(compiler->passes_wave32);
|
|
||||||
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
||||||
|
|
||||||
if (compiler->passmgr)
|
if (compiler->passmgr)
|
||||||
@@ -399,6 +397,4 @@ void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
|||||||
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
||||||
if (compiler->tm)
|
if (compiler->tm)
|
||||||
LLVMDisposeTargetMachine(compiler->tm);
|
LLVMDisposeTargetMachine(compiler->tm);
|
||||||
if (compiler->tm_wave32)
|
|
||||||
LLVMDisposeTargetMachine(compiler->tm_wave32);
|
|
||||||
}
|
}
|
||||||
|
@@ -61,12 +61,10 @@ enum ac_func_attr
|
|||||||
|
|
||||||
enum ac_target_machine_options
|
enum ac_target_machine_options
|
||||||
{
|
{
|
||||||
AC_TM_SUPPORTS_SPILL = (1 << 0),
|
AC_TM_SUPPORTS_SPILL = 1 << 0,
|
||||||
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
|
AC_TM_CHECK_IR = 1 << 1,
|
||||||
AC_TM_CHECK_IR = (1 << 4),
|
AC_TM_ENABLE_GLOBAL_ISEL = 1 << 2,
|
||||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
|
AC_TM_CREATE_LOW_OPT = 1 << 3,
|
||||||
AC_TM_CREATE_LOW_OPT = (1 << 6),
|
|
||||||
AC_TM_WAVE32 = (1 << 7),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ac_float_mode
|
enum ac_float_mode
|
||||||
@@ -85,10 +83,6 @@ struct ac_llvm_compiler {
|
|||||||
LLVMTargetMachineRef tm;
|
LLVMTargetMachineRef tm;
|
||||||
struct ac_compiler_passes *passes;
|
struct ac_compiler_passes *passes;
|
||||||
|
|
||||||
/* Wave32 compiler for GFX10. */
|
|
||||||
LLVMTargetMachineRef tm_wave32;
|
|
||||||
struct ac_compiler_passes *passes_wave32;
|
|
||||||
|
|
||||||
/* Optional compiler for faster compilation with fewer optimizations.
|
/* Optional compiler for faster compilation with fewer optimizations.
|
||||||
* LLVM modules can be created with "tm" too. There is no difference.
|
* LLVM modules can be created with "tm" too. There is no difference.
|
||||||
*/
|
*/
|
||||||
@@ -115,6 +109,7 @@ void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
|
|||||||
|
|
||||||
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
|
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
|
||||||
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
|
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
|
||||||
|
void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx);
|
||||||
|
|
||||||
static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
|
static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
|
||||||
{
|
{
|
||||||
|
@@ -47,12 +47,6 @@ class radv_llvm_per_thread_info {
|
|||||||
if (!passes)
|
if (!passes)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (llvm_info.tm_wave32) {
|
|
||||||
passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
|
|
||||||
if (!passes_wave32)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -107,6 +107,7 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil
|
|||||||
}
|
}
|
||||||
|
|
||||||
ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
|
ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
|
||||||
|
ac_llvm_set_target_features(main_function, ctx);
|
||||||
|
|
||||||
return main_function;
|
return main_function;
|
||||||
}
|
}
|
||||||
|
@@ -142,7 +142,6 @@ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compil
|
|||||||
|
|
||||||
enum ac_target_machine_options tm_options =
|
enum ac_target_machine_options tm_options =
|
||||||
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
|
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
|
||||||
(!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
|
|
||||||
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
|
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
|
||||||
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
|
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
|
||||||
|
|
||||||
@@ -150,8 +149,6 @@ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compil
|
|||||||
ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
|
ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
|
||||||
compiler->passes = ac_create_llvm_passes(compiler->tm);
|
compiler->passes = ac_create_llvm_passes(compiler->tm);
|
||||||
|
|
||||||
if (compiler->tm_wave32)
|
|
||||||
compiler->passes_wave32 = ac_create_llvm_passes(compiler->tm_wave32);
|
|
||||||
if (compiler->low_opt_tm)
|
if (compiler->low_opt_tm)
|
||||||
compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
|
compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
|
||||||
}
|
}
|
||||||
@@ -1266,9 +1263,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||||||
sscreen->pbb_persistent_states_per_bin <= 32);
|
sscreen->pbb_persistent_states_per_bin <= 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* LLVM doesn't support VGPR indexing on GFX9. */
|
|
||||||
sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class != GFX9;
|
|
||||||
|
|
||||||
(void)simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
|
(void)simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
|
||||||
sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
|
sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
|
||||||
|
|
||||||
|
@@ -548,7 +548,6 @@ struct si_screen {
|
|||||||
bool commutative_blend_add;
|
bool commutative_blend_add;
|
||||||
bool dpbb_allowed;
|
bool dpbb_allowed;
|
||||||
bool dfsm_allowed;
|
bool dfsm_allowed;
|
||||||
bool llvm_has_working_vgpr_indexing;
|
|
||||||
bool use_ngg;
|
bool use_ngg;
|
||||||
bool use_ngg_culling;
|
bool use_ngg_culling;
|
||||||
bool use_ngg_streamout;
|
bool use_ngg_streamout;
|
||||||
|
@@ -93,9 +93,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
|
|||||||
if (!si_replace_shader(count, binary)) {
|
if (!si_replace_shader(count, binary)) {
|
||||||
struct ac_compiler_passes *passes = compiler->passes;
|
struct ac_compiler_passes *passes = compiler->passes;
|
||||||
|
|
||||||
if (ac->wave_size == 32)
|
if (less_optimized && compiler->low_opt_passes)
|
||||||
passes = compiler->passes_wave32;
|
|
||||||
else if (less_optimized && compiler->low_opt_passes)
|
|
||||||
passes = compiler->low_opt_passes;
|
passes = compiler->low_opt_passes;
|
||||||
|
|
||||||
struct si_llvm_diagnostics diag = {debug};
|
struct si_llvm_diagnostics diag = {debug};
|
||||||
@@ -190,6 +188,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
|
|||||||
}
|
}
|
||||||
|
|
||||||
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
|
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
|
||||||
|
ac_llvm_set_target_features(ctx->main_fn, &ctx->ac);
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)
|
void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||||
|
Reference in New Issue
Block a user