ac/llvm: port functions to use ac_llvm_pointer
Reviewed-by: Mihai Preda <mhpreda@gmail.com> Reviewed-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19035>
This commit is contained in:

committed by
Marge Bot

parent
174caabab7
commit
0f00f74b20
@@ -40,6 +40,7 @@ enum ac_arg_regfile
|
||||
|
||||
enum ac_arg_type
|
||||
{
|
||||
AC_ARG_INVALID = -1,
|
||||
AC_ARG_FLOAT,
|
||||
AC_ARG_INT,
|
||||
AC_ARG_CONST_PTR, /* Pointer to i8 array */
|
||||
|
@@ -4320,10 +4320,10 @@ LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
|
||||
return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), "");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
|
||||
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func, LLVMValueRef *args,
|
||||
unsigned num_args)
|
||||
{
|
||||
LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
|
||||
LLVMValueRef ret = LLVMBuildCall2(ctx->builder, fn_type, func, args, num_args, "");
|
||||
LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
|
||||
return ret;
|
||||
}
|
||||
@@ -4538,6 +4538,7 @@ static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac
|
||||
base = ctx->v8i32;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -4550,7 +4551,7 @@ static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac
|
||||
}
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
|
||||
struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
|
||||
enum ac_llvm_calling_convention convention, const char *name,
|
||||
LLVMTypeRef ret_type, LLVMModuleRef module)
|
||||
{
|
||||
@@ -4583,14 +4584,17 @@ LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_con
|
||||
}
|
||||
}
|
||||
|
||||
ctx->main_function = main_function;
|
||||
ctx->main_function = (struct ac_llvm_pointer) {
|
||||
.value = main_function,
|
||||
.pointee_type = main_function_type
|
||||
};
|
||||
|
||||
/* Enable denormals for FP16 and FP64: */
|
||||
LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee");
|
||||
/* Disable denormals for FP32: */
|
||||
LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
|
||||
"preserve-sign,preserve-sign");
|
||||
return main_function;
|
||||
return ctx->main_function;
|
||||
}
|
||||
|
||||
void ac_build_s_endpgm(struct ac_llvm_context *ctx)
|
||||
|
@@ -83,7 +83,7 @@ struct ac_llvm_context {
|
||||
LLVMModuleRef module;
|
||||
LLVMBuilderRef builder;
|
||||
|
||||
LLVMValueRef main_function;
|
||||
struct ac_llvm_pointer main_function;
|
||||
|
||||
LLVMTypeRef voidt;
|
||||
LLVMTypeRef i1;
|
||||
@@ -561,8 +561,8 @@ LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
|
||||
|
||||
LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx);
|
||||
|
||||
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
|
||||
unsigned num_args);
|
||||
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func,
|
||||
LLVMValueRef *args, unsigned num_args);
|
||||
|
||||
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
||||
LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope);
|
||||
@@ -595,7 +595,7 @@ LLVMTypeRef ac_arg_type_to_pointee_type(struct ac_llvm_context *ctx, enum ac_arg
|
||||
static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg)
|
||||
{
|
||||
assert(arg.used);
|
||||
return LLVMGetParam(ctx->main_function, arg.arg_index);
|
||||
return LLVMGetParam(ctx->main_function.value, arg.arg_index);
|
||||
}
|
||||
|
||||
static inline LLVMTypeRef ac_get_arg_pointee_type(struct ac_llvm_context *ctx, const struct ac_shader_args *args, struct ac_arg arg)
|
||||
@@ -613,9 +613,9 @@ enum ac_llvm_calling_convention
|
||||
AC_LLVM_AMDGPU_HS = 93,
|
||||
};
|
||||
|
||||
LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
|
||||
enum ac_llvm_calling_convention convention, const char *name,
|
||||
LLVMTypeRef ret_type, LLVMModuleRef module);
|
||||
struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
|
||||
enum ac_llvm_calling_convention convention, const char *name,
|
||||
LLVMTypeRef ret_type, LLVMModuleRef module);
|
||||
void ac_build_s_endpgm(struct ac_llvm_context *ctx);
|
||||
|
||||
void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd,
|
||||
|
@@ -52,7 +52,7 @@ struct radv_shader_context {
|
||||
|
||||
unsigned max_workgroup_size;
|
||||
LLVMContextRef context;
|
||||
LLVMValueRef main_function;
|
||||
struct ac_llvm_pointer main_function;
|
||||
|
||||
LLVMValueRef descriptor_sets[MAX_SETS];
|
||||
|
||||
@@ -83,20 +83,20 @@ radv_shader_context_from_abi(struct ac_shader_abi *abi)
|
||||
return container_of(abi, struct radv_shader_context, abi);
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
static struct ac_llvm_pointer
|
||||
create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder,
|
||||
const struct ac_shader_args *args, enum ac_llvm_calling_convention convention,
|
||||
unsigned max_workgroup_size, const struct radv_nir_compiler_options *options)
|
||||
{
|
||||
LLVMValueRef main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
|
||||
struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
|
||||
|
||||
if (options->address32_hi) {
|
||||
ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-32bit-address-high-bits",
|
||||
ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits",
|
||||
options->address32_hi);
|
||||
}
|
||||
|
||||
ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
|
||||
ac_llvm_set_target_features(main_function, ctx);
|
||||
ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size);
|
||||
ac_llvm_set_target_features(main_function.value, ctx);
|
||||
|
||||
return main_function;
|
||||
}
|
||||
@@ -170,7 +170,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
|
||||
|
||||
ctx->main_function =
|
||||
create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
|
||||
get_llvm_calling_convention(ctx->main_function, stage),
|
||||
get_llvm_calling_convention(ctx->main_function.value, stage),
|
||||
ctx->max_workgroup_size, ctx->options);
|
||||
|
||||
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
|
||||
@@ -1599,7 +1599,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
|
||||
LLVMBasicBlockRef end_bb;
|
||||
LLVMValueRef switch_inst;
|
||||
|
||||
end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function, "end");
|
||||
end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function.value, "end");
|
||||
switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
|
||||
|
||||
for (unsigned stream = 0; stream < 4; stream++) {
|
||||
|
@@ -56,7 +56,7 @@ struct si_shader_context {
|
||||
LLVMBasicBlockRef merged_wrap_if_entry_block;
|
||||
int merged_wrap_if_label;
|
||||
|
||||
LLVMValueRef main_fn;
|
||||
struct ac_llvm_pointer main_fn;
|
||||
LLVMTypeRef return_type;
|
||||
|
||||
struct ac_arg const_and_shader_buffers;
|
||||
@@ -218,9 +218,11 @@ void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
|
||||
LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
|
||||
unsigned bitwidth);
|
||||
LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle);
|
||||
void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
|
||||
void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts,
|
||||
unsigned num_parts, unsigned main_part,
|
||||
unsigned next_shader_first_part, bool same_thread_count);
|
||||
unsigned next_shader_first_part,
|
||||
enum ac_arg_type *main_arg_types,
|
||||
bool same_thread_count);
|
||||
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
|
||||
struct nir_shader *nir, bool free_nir, bool ngg_cull_shader);
|
||||
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
|
||||
|
@@ -185,16 +185,16 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
|
||||
ctx->return_value = LLVMGetUndef(ctx->return_type);
|
||||
|
||||
if (ctx->screen->info.address32_hi) {
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-32bit-address-high-bits",
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "amdgpu-32bit-address-high-bits",
|
||||
ctx->screen->info.address32_hi);
|
||||
}
|
||||
|
||||
if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg &&
|
||||
si_shader_uses_streamout(ctx->shader))
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "amdgpu-gds-size", 256);
|
||||
|
||||
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
|
||||
ac_llvm_set_target_features(ctx->main_fn, &ctx->ac);
|
||||
ac_llvm_set_workgroup_size(ctx->main_fn.value, max_workgroup_size);
|
||||
ac_llvm_set_target_features(ctx->main_fn.value, &ctx->ac);
|
||||
}
|
||||
|
||||
void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
@@ -216,7 +216,7 @@ void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shade
|
||||
/* Reserve register locations for VGPR inputs the PS prolog may need. */
|
||||
if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->shader->is_monolithic) {
|
||||
ac_llvm_add_target_dep_function_attr(
|
||||
ctx->main_fn, "InitialPSInputAddr",
|
||||
ctx->main_fn.value, "InitialPSInputAddr",
|
||||
S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) |
|
||||
S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) |
|
||||
S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) |
|
||||
@@ -314,7 +314,7 @@ LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx)
|
||||
LLVMValueRef ptr[2], list;
|
||||
bool merged_shader = si_is_merged_shader(ctx->shader);
|
||||
|
||||
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS);
|
||||
ptr[0] = LLVMGetParam(ctx->main_fn.value, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS);
|
||||
list =
|
||||
LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
|
||||
return list;
|
||||
@@ -420,9 +420,10 @@ static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
|
||||
* Given a list of shader part functions, build a wrapper function that
|
||||
* runs them in sequence to form a monolithic shader.
|
||||
*/
|
||||
void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
|
||||
void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts,
|
||||
unsigned num_parts, unsigned main_part,
|
||||
unsigned next_shader_first_part, bool same_thread_count)
|
||||
unsigned next_shader_first_part,
|
||||
enum ac_arg_type *main_arg_types, bool same_thread_count)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
/* PS epilog has one arg per color component; gfx9 merged shader
|
||||
@@ -440,8 +441,8 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
memset(&ctx->args, 0, sizeof(ctx->args));
|
||||
|
||||
for (unsigned i = 0; i < num_parts; ++i) {
|
||||
ac_add_function_attr(ctx->ac.context, parts[i], -1, AC_FUNC_ATTR_ALWAYSINLINE);
|
||||
LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
|
||||
ac_add_function_attr(ctx->ac.context, parts[i].value, -1, AC_FUNC_ATTR_ALWAYSINLINE);
|
||||
LLVMSetLinkage(parts[i].value, LLVMPrivateLinkage);
|
||||
}
|
||||
|
||||
/* The parameters of the wrapper function correspond to those of the
|
||||
@@ -452,11 +453,11 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
num_sgprs = 0;
|
||||
num_vgprs = 0;
|
||||
|
||||
function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
|
||||
function_type = parts[0].pointee_type;
|
||||
num_first_params = LLVMCountParamTypes(function_type);
|
||||
|
||||
for (unsigned i = 0; i < num_first_params; ++i) {
|
||||
LLVMValueRef param = LLVMGetParam(parts[0], i);
|
||||
LLVMValueRef param = LLVMGetParam(parts[0].value, i);
|
||||
|
||||
if (ac_is_sgpr_param(param)) {
|
||||
assert(num_vgprs == 0);
|
||||
@@ -468,31 +469,11 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
|
||||
gprs = 0;
|
||||
while (gprs < num_sgprs + num_vgprs) {
|
||||
LLVMValueRef param = LLVMGetParam(parts[main_part], ctx->args.arg_count);
|
||||
LLVMValueRef param = LLVMGetParam(parts[main_part].value, ctx->args.arg_count);
|
||||
LLVMTypeRef type = LLVMTypeOf(param);
|
||||
unsigned size = ac_get_type_size(type) / 4;
|
||||
|
||||
/* This is going to get casted anyways, so we don't have to
|
||||
* have the exact same type. But we do have to preserve the
|
||||
* pointer-ness so that LLVM knows about it.
|
||||
*/
|
||||
enum ac_arg_type arg_type = AC_ARG_INT;
|
||||
if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
|
||||
type = LLVMGetElementType(type);
|
||||
|
||||
if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
|
||||
if (LLVMGetVectorSize(type) == 4)
|
||||
arg_type = AC_ARG_CONST_DESC_PTR;
|
||||
else if (LLVMGetVectorSize(type) == 8)
|
||||
arg_type = AC_ARG_CONST_IMAGE_PTR;
|
||||
else
|
||||
assert(0);
|
||||
} else if (type == ctx->ac.f32) {
|
||||
arg_type = AC_ARG_CONST_FLOAT_PTR;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
enum ac_arg_type arg_type = main_arg_types[ctx->args.arg_count];
|
||||
assert(arg_type != AC_ARG_INVALID);
|
||||
|
||||
ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL);
|
||||
|
||||
@@ -507,7 +488,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
unsigned num_returns = 0;
|
||||
LLVMTypeRef returns[AC_MAX_ARGS], last_func_type, return_type;
|
||||
|
||||
last_func_type = LLVMGetElementType(LLVMTypeOf(parts[num_parts - 1]));
|
||||
last_func_type = parts[num_parts - 1].pointee_type;
|
||||
return_type = LLVMGetReturnType(last_func_type);
|
||||
|
||||
switch (LLVMGetTypeKind(return_type)) {
|
||||
@@ -535,7 +516,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
num_out_sgpr = 0;
|
||||
|
||||
for (unsigned i = 0; i < ctx->args.arg_count; ++i) {
|
||||
LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
|
||||
LLVMValueRef param = LLVMGetParam(ctx->main_fn.value, i);
|
||||
LLVMTypeRef param_type = LLVMTypeOf(param);
|
||||
LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
|
||||
unsigned size = ac_get_type_size(param_type) / 4;
|
||||
@@ -579,7 +560,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
LLVMValueRef in[AC_MAX_ARGS];
|
||||
LLVMTypeRef ret_type;
|
||||
unsigned out_idx = 0;
|
||||
unsigned num_params = LLVMCountParams(parts[part]);
|
||||
unsigned num_params = LLVMCountParams(parts[part].value);
|
||||
|
||||
/* Merged shaders are executed conditionally depending
|
||||
* on the number of enabled threads passed in the input SGPRs. */
|
||||
@@ -609,13 +590,13 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
unsigned param_size;
|
||||
LLVMValueRef arg = NULL;
|
||||
|
||||
param = LLVMGetParam(parts[part], param_idx);
|
||||
param = LLVMGetParam(parts[part].value, param_idx);
|
||||
param_type = LLVMTypeOf(param);
|
||||
param_size = ac_get_type_size(param_type) / 4;
|
||||
is_sgpr = ac_is_sgpr_param(param);
|
||||
|
||||
if (is_sgpr) {
|
||||
ac_add_function_attr(ctx->ac.context, parts[part], param_idx + 1, AC_FUNC_ATTR_INREG);
|
||||
ac_add_function_attr(ctx->ac.context, parts[part].value, param_idx + 1, AC_FUNC_ATTR_INREG);
|
||||
} else if (out_idx < num_out_sgpr) {
|
||||
/* Skip returned SGPRs the current part doesn't
|
||||
* declare on the input. */
|
||||
@@ -647,7 +628,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
||||
out_idx += param_size;
|
||||
}
|
||||
|
||||
ret = ac_build_call(&ctx->ac, parts[part], in, num_params);
|
||||
ret = ac_build_call(&ctx->ac, parts[part].pointee_type, parts[part].value, in, num_params);
|
||||
|
||||
if (!same_thread_count &&
|
||||
si_is_multi_part_shader(ctx->shader) && part + 1 == next_shader_first_part) {
|
||||
@@ -991,7 +972,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
||||
si_llvm_init_ps_callbacks(ctx);
|
||||
|
||||
unsigned colors_read = ctx->shader->selector->info.colors_read;
|
||||
LLVMValueRef main_fn = ctx->main_fn;
|
||||
LLVMValueRef main_fn = ctx->main_fn.value;
|
||||
|
||||
LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
|
||||
|
||||
@@ -1265,14 +1246,14 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
|
||||
ctx.so = *so;
|
||||
|
||||
LLVMValueRef ngg_cull_main_fn = NULL;
|
||||
struct ac_llvm_pointer ngg_cull_main_fn = {};
|
||||
if (sel->stage <= MESA_SHADER_TESS_EVAL && shader->key.ge.opt.ngg_culling) {
|
||||
if (!si_llvm_translate_nir(&ctx, shader, nir, false, true)) {
|
||||
si_llvm_dispose(&ctx);
|
||||
return false;
|
||||
}
|
||||
ngg_cull_main_fn = ctx.main_fn;
|
||||
ctx.main_fn = NULL;
|
||||
ctx.main_fn.value = NULL;
|
||||
}
|
||||
|
||||
if (!si_llvm_translate_nir(&ctx, shader, nir, free_nir, false)) {
|
||||
@@ -1281,12 +1262,18 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
}
|
||||
|
||||
if (shader->is_monolithic && sel->stage == MESA_SHADER_VERTEX) {
|
||||
LLVMValueRef parts[4];
|
||||
struct ac_llvm_pointer parts[4];
|
||||
unsigned num_parts = 0;
|
||||
bool first_is_prolog = false;
|
||||
LLVMValueRef main_fn = ctx.main_fn;
|
||||
struct ac_llvm_pointer main_fn = ctx.main_fn;
|
||||
|
||||
if (ngg_cull_main_fn) {
|
||||
/* Preserve main arguments. */
|
||||
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
|
||||
for (int i = 0; i < ctx.args.arg_count; i++)
|
||||
main_arg_types[i] = ctx.args.args[i].type;
|
||||
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
|
||||
|
||||
if (ngg_cull_main_fn.value) {
|
||||
if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true, false)) {
|
||||
union si_shader_part_key prolog_key;
|
||||
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true,
|
||||
@@ -1311,9 +1298,15 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
}
|
||||
parts[num_parts++] = main_fn;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, false);
|
||||
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn) {
|
||||
LLVMValueRef parts[3], prolog, main_fn = ctx.main_fn;
|
||||
si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, main_arg_types, false);
|
||||
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn.value) {
|
||||
struct ac_llvm_pointer parts[3], prolog, main_fn = ctx.main_fn;
|
||||
|
||||
/* Preserve main arguments. */
|
||||
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
|
||||
for (int i = 0; i < ctx.args.arg_count; i++)
|
||||
main_arg_types[i] = ctx.args.args[i].type;
|
||||
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
|
||||
|
||||
/* We reuse the VS prolog code for TES just to load the input VGPRs from LDS. */
|
||||
union si_shader_part_key prolog_key;
|
||||
@@ -1330,11 +1323,14 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
parts[1] = prolog;
|
||||
parts[2] = main_fn;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
|
||||
si_build_wrapper_function(&ctx, parts, 3, 0, 0, main_arg_types, false);
|
||||
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_CTRL) {
|
||||
/* Preserve main arguments. */
|
||||
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX9) {
|
||||
struct si_shader_selector *ls = shader->key.ge.part.tcs.ls;
|
||||
LLVMValueRef parts[4];
|
||||
struct ac_llvm_pointer parts[4];
|
||||
bool vs_needs_prolog =
|
||||
si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false, false);
|
||||
|
||||
@@ -1366,6 +1362,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
shader->info.uses_instanceid |= ls->info.uses_instanceid;
|
||||
parts[1] = ctx.main_fn;
|
||||
|
||||
for (int i = 0; i < ctx.args.arg_count; i++)
|
||||
main_arg_types[i] = ctx.args.args[i].type;
|
||||
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
|
||||
|
||||
/* LS prolog */
|
||||
if (vs_needs_prolog) {
|
||||
union si_shader_part_key vs_prolog_key;
|
||||
@@ -1382,26 +1382,33 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
|
||||
si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog,
|
||||
vs_needs_prolog, vs_needs_prolog ? 2 : 1,
|
||||
main_arg_types,
|
||||
shader->key.ge.opt.same_patch_vertices);
|
||||
} else {
|
||||
LLVMValueRef parts[2];
|
||||
struct ac_llvm_pointer parts[2];
|
||||
union si_shader_part_key epilog_key;
|
||||
|
||||
parts[0] = ctx.main_fn;
|
||||
|
||||
for (int i = 0; i < ctx.args.arg_count; i++)
|
||||
main_arg_types[i] = ctx.args.args[i].type;
|
||||
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
|
||||
|
||||
memset(&epilog_key, 0, sizeof(epilog_key));
|
||||
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
|
||||
si_llvm_build_tcs_epilog(&ctx, &epilog_key);
|
||||
parts[1] = ctx.main_fn;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, 2, 0, 0, false);
|
||||
si_build_wrapper_function(&ctx, parts, 2, 0, 0, main_arg_types, false);
|
||||
}
|
||||
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_GEOMETRY) {
|
||||
if (ctx.screen->info.gfx_level >= GFX9) {
|
||||
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
|
||||
|
||||
struct si_shader_selector *es = shader->key.ge.part.gs.es;
|
||||
LLVMValueRef es_prolog = NULL;
|
||||
LLVMValueRef es_main = NULL;
|
||||
LLVMValueRef gs_main = ctx.main_fn;
|
||||
struct ac_llvm_pointer es_prolog = {};
|
||||
struct ac_llvm_pointer es_main = {};
|
||||
struct ac_llvm_pointer gs_main = ctx.main_fn;
|
||||
|
||||
/* ES main part */
|
||||
struct si_shader shader_es = {};
|
||||
@@ -1426,6 +1433,11 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
shader->info.uses_instanceid |= es->info.uses_instanceid;
|
||||
es_main = ctx.main_fn;
|
||||
|
||||
/* Preserve main (= es_main) arguments. */
|
||||
for (int i = 0; i < ctx.args.arg_count; i++)
|
||||
main_arg_types[i] = ctx.args.args[i].type;
|
||||
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
|
||||
|
||||
/* ES prolog */
|
||||
if (es->stage == MESA_SHADER_VERTEX &&
|
||||
si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false, true)) {
|
||||
@@ -1442,16 +1454,16 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
ctx.stage = MESA_SHADER_GEOMETRY;
|
||||
|
||||
/* Prepare the array of shader parts. */
|
||||
LLVMValueRef parts[4];
|
||||
struct ac_llvm_pointer parts[4];
|
||||
unsigned num_parts = 0, main_part;
|
||||
|
||||
if (es_prolog)
|
||||
if (es_prolog.value)
|
||||
parts[num_parts++] = es_prolog;
|
||||
|
||||
parts[main_part = num_parts++] = es_main;
|
||||
parts[num_parts++] = gs_main;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, false);
|
||||
si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, main_arg_types, false);
|
||||
} else {
|
||||
/* Nothing to do for gfx6-8. The shader has only 1 part and it's ctx.main_fn. */
|
||||
}
|
||||
@@ -1462,7 +1474,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
si_llvm_optimize_module(&ctx);
|
||||
|
||||
/* Make sure the input is a pointer and not integer followed by inttoptr. */
|
||||
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) == LLVMPointerTypeKind);
|
||||
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn.value, 0))) == LLVMPointerTypeKind);
|
||||
|
||||
/* Compile to bytecode. */
|
||||
if (!si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler, &ctx.ac, debug,
|
||||
|
@@ -453,7 +453,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
LLVMBasicBlockRef end_bb;
|
||||
LLVMValueRef switch_inst;
|
||||
|
||||
end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");
|
||||
end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn.value, "end");
|
||||
switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
|
||||
|
||||
for (int stream = 0; stream < 4; stream++) {
|
||||
|
@@ -206,7 +206,7 @@ static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
|
||||
LLVMRealPredicate cond = cond_map[ctx->shader->key.ps.part.epilog.alpha_func];
|
||||
assert(cond);
|
||||
|
||||
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF);
|
||||
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn.value, SI_PARAM_ALPHA_REF);
|
||||
if (LLVMTypeOf(alpha) == ctx->ac.f16)
|
||||
alpha_ref = LLVMBuildFPTrunc(ctx->ac.builder, alpha_ref, ctx->ac.f16, "");
|
||||
|
||||
@@ -500,7 +500,7 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
|
||||
|
||||
/* Set SGPRs. */
|
||||
ret = LLVMBuildInsertValue(
|
||||
builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF)),
|
||||
builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn.value, SI_PARAM_ALPHA_REF)),
|
||||
SI_SGPR_ALPHA_REF, "");
|
||||
|
||||
/* Set VGPRs */
|
||||
@@ -609,7 +609,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
|
||||
|
||||
/* Create the function. */
|
||||
si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0);
|
||||
func = ctx->main_fn;
|
||||
func = ctx->main_fn.value;
|
||||
|
||||
/* Copy inputs to outputs. This should be no-op, as the registers match,
|
||||
* but it will prevent the compiler from overwriting them unintentionally.
|
||||
@@ -849,7 +849,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
||||
/* Create the function. */
|
||||
si_llvm_create_func(ctx, "ps_epilog", NULL, 0, 0);
|
||||
/* Disable elimination of unused inputs. */
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "InitialPSInputAddr", 0xffffff);
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "InitialPSInputAddr", 0xffffff);
|
||||
|
||||
/* Prepare color. */
|
||||
unsigned vgpr = ctx->args.num_sgprs_used;
|
||||
@@ -861,7 +861,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
||||
|
||||
if (color_type != SI_TYPE_ANY32) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
color[write_i][i] = LLVMGetParam(ctx->main_fn, vgpr + i / 2);
|
||||
color[write_i][i] = LLVMGetParam(ctx->main_fn.value, vgpr + i / 2);
|
||||
color[write_i][i] = LLVMBuildBitCast(ctx->ac.builder, color[write_i][i],
|
||||
ctx->ac.v2f16, "");
|
||||
color[write_i][i] = ac_llvm_extract_elem(&ctx->ac, color[write_i][i], i % 2);
|
||||
@@ -869,7 +869,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
||||
vgpr += 4;
|
||||
} else {
|
||||
for (i = 0; i < 4; i++)
|
||||
color[write_i][i] = LLVMGetParam(ctx->main_fn, vgpr++);
|
||||
color[write_i][i] = LLVMGetParam(ctx->main_fn.value, vgpr++);
|
||||
}
|
||||
|
||||
si_llvm_build_clamp_alpha_test(ctx, color[write_i], write_i);
|
||||
@@ -888,11 +888,11 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
||||
util_bitcount(key->ps_epilog.colors_written) * 4;
|
||||
|
||||
if (key->ps_epilog.writes_z)
|
||||
depth = LLVMGetParam(ctx->main_fn, vgpr_index++);
|
||||
depth = LLVMGetParam(ctx->main_fn.value, vgpr_index++);
|
||||
if (key->ps_epilog.writes_stencil)
|
||||
stencil = LLVMGetParam(ctx->main_fn, vgpr_index++);
|
||||
stencil = LLVMGetParam(ctx->main_fn.value, vgpr_index++);
|
||||
if (key->ps_epilog.writes_samplemask)
|
||||
samplemask = LLVMGetParam(ctx->main_fn, vgpr_index++);
|
||||
samplemask = LLVMGetParam(ctx->main_fn.value, vgpr_index++);
|
||||
|
||||
ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, mrtz_alpha, false,
|
||||
&exp.args[exp.num++]);
|
||||
@@ -932,9 +932,14 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
|
||||
|
||||
void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader)
|
||||
{
|
||||
LLVMValueRef parts[3];
|
||||
struct ac_llvm_pointer parts[3];
|
||||
unsigned num_parts = 0, main_index;
|
||||
LLVMValueRef main_fn = ctx->main_fn;
|
||||
struct ac_llvm_pointer main_fn = ctx->main_fn;
|
||||
/* Preserve main arguments. */
|
||||
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
|
||||
for (int i = 0; i < ctx->args.arg_count; i++)
|
||||
main_arg_types[i] = ctx->args.args[i].type;
|
||||
|
||||
|
||||
union si_shader_part_key prolog_key;
|
||||
si_get_ps_prolog_key(shader, &prolog_key, false);
|
||||
@@ -952,7 +957,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
|
||||
si_llvm_build_ps_epilog(ctx, &epilog_key);
|
||||
parts[num_parts++] = ctx->main_fn;
|
||||
|
||||
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, false);
|
||||
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false);
|
||||
}
|
||||
|
||||
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)
|
||||
|
@@ -275,7 +275,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
||||
|
||||
LLVMValueRef value[4];
|
||||
for (unsigned i = component; i < component + num_components; i++) {
|
||||
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
|
||||
value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i);
|
||||
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
|
||||
}
|
||||
|
||||
|
@@ -76,11 +76,11 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
|
||||
if (divisor_is_one || divisor_is_fetched) {
|
||||
/* Add StartInstance. */
|
||||
index = LLVMBuildAdd(ctx->ac.builder, index,
|
||||
LLVMGetParam(ctx->main_fn, start_instance), "");
|
||||
LLVMGetParam(ctx->main_fn.value, start_instance), "");
|
||||
} else {
|
||||
/* VertexID + BaseVertex */
|
||||
index = LLVMBuildAdd(ctx->ac.builder, vertex_id,
|
||||
LLVMGetParam(ctx->main_fn, base_vertex), "");
|
||||
LLVMGetParam(ctx->main_fn.value, base_vertex), "");
|
||||
}
|
||||
|
||||
return index;
|
||||
@@ -103,8 +103,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
||||
unsigned param_vs_blit_inputs = ctx->vs_blit_inputs.arg_index;
|
||||
if (input_index == 0) {
|
||||
/* Position: */
|
||||
LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs);
|
||||
LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 1);
|
||||
LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs);
|
||||
LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 1);
|
||||
|
||||
LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0);
|
||||
LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1);
|
||||
@@ -116,7 +116,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
||||
|
||||
out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->ac.f32, "");
|
||||
out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->ac.f32, "");
|
||||
out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 2);
|
||||
out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 2);
|
||||
out[3] = ctx->ac.f32_1;
|
||||
return;
|
||||
}
|
||||
@@ -126,19 +126,19 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
||||
|
||||
if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
out[i] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3 + i);
|
||||
out[i] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3 + i);
|
||||
}
|
||||
} else {
|
||||
assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD);
|
||||
LLVMValueRef x1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3);
|
||||
LLVMValueRef y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 4);
|
||||
LLVMValueRef x2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 5);
|
||||
LLVMValueRef y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 6);
|
||||
LLVMValueRef x1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3);
|
||||
LLVMValueRef y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 4);
|
||||
LLVMValueRef x2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 5);
|
||||
LLVMValueRef y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 6);
|
||||
|
||||
out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, "");
|
||||
out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, "");
|
||||
out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 7);
|
||||
out[3] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 8);
|
||||
out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 7);
|
||||
out[3] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -183,8 +183,9 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
||||
input_index, ctx->instance_divisor_constbuf,
|
||||
ctx->args.start_instance.arg_index,
|
||||
ctx->args.base_vertex.arg_index);
|
||||
} else
|
||||
vertex_index = LLVMGetParam(ctx->main_fn, ctx->vertex_index0.arg_index + input_index);
|
||||
} else {
|
||||
vertex_index = LLVMGetParam(ctx->main_fn.value, ctx->vertex_index0.arg_index + input_index);
|
||||
}
|
||||
|
||||
/* Use the open-coded implementation for all loads of doubles and
|
||||
* of dword-sized data that needs fixups. We need to insert conversion
|
||||
@@ -961,7 +962,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
|
||||
|
||||
/* Create the function. */
|
||||
si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0);
|
||||
func = ctx->main_fn;
|
||||
func = ctx->main_fn.value;
|
||||
|
||||
for (i = 0; i < num_input_vgprs; i++) {
|
||||
input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]);
|
||||
|
Reference in New Issue
Block a user