ac/llvm: port functions to use ac_llvm_pointer

Reviewed-by: Mihai Preda <mhpreda@gmail.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19035>
This commit is contained in:
Pierre-Eric Pelloux-Prayer
2022-10-04 15:15:54 +02:00
committed by Marge Bot
parent 174caabab7
commit 0f00f74b20
10 changed files with 138 additions and 113 deletions

View File

@@ -40,6 +40,7 @@ enum ac_arg_regfile
enum ac_arg_type
{
AC_ARG_INVALID = -1,
AC_ARG_FLOAT,
AC_ARG_INT,
AC_ARG_CONST_PTR, /* Pointer to i8 array */

View File

@@ -4320,10 +4320,10 @@ LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), "");
}
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func, LLVMValueRef *args,
unsigned num_args)
{
LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
LLVMValueRef ret = LLVMBuildCall2(ctx->builder, fn_type, func, args, num_args, "");
LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
return ret;
}
@@ -4538,6 +4538,7 @@ static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac
base = ctx->v8i32;
break;
default:
assert(false);
return NULL;
}
@@ -4550,7 +4551,7 @@ static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac
}
}
LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
enum ac_llvm_calling_convention convention, const char *name,
LLVMTypeRef ret_type, LLVMModuleRef module)
{
@@ -4583,14 +4584,17 @@ LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_con
}
}
ctx->main_function = main_function;
ctx->main_function = (struct ac_llvm_pointer) {
.value = main_function,
.pointee_type = main_function_type
};
/* Enable denormals for FP16 and FP64: */
LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee");
/* Disable denormals for FP32: */
LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
"preserve-sign,preserve-sign");
return main_function;
return ctx->main_function;
}
void ac_build_s_endpgm(struct ac_llvm_context *ctx)

View File

@@ -83,7 +83,7 @@ struct ac_llvm_context {
LLVMModuleRef module;
LLVMBuilderRef builder;
LLVMValueRef main_function;
struct ac_llvm_pointer main_function;
LLVMTypeRef voidt;
LLVMTypeRef i1;
@@ -561,8 +561,8 @@ LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx);
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
unsigned num_args);
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func,
LLVMValueRef *args, unsigned num_args);
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope);
@@ -595,7 +595,7 @@ LLVMTypeRef ac_arg_type_to_pointee_type(struct ac_llvm_context *ctx, enum ac_arg
static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg)
{
assert(arg.used);
return LLVMGetParam(ctx->main_function, arg.arg_index);
return LLVMGetParam(ctx->main_function.value, arg.arg_index);
}
static inline LLVMTypeRef ac_get_arg_pointee_type(struct ac_llvm_context *ctx, const struct ac_shader_args *args, struct ac_arg arg)
@@ -613,9 +613,9 @@ enum ac_llvm_calling_convention
AC_LLVM_AMDGPU_HS = 93,
};
LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
enum ac_llvm_calling_convention convention, const char *name,
LLVMTypeRef ret_type, LLVMModuleRef module);
struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
enum ac_llvm_calling_convention convention, const char *name,
LLVMTypeRef ret_type, LLVMModuleRef module);
void ac_build_s_endpgm(struct ac_llvm_context *ctx);
void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd,

View File

@@ -52,7 +52,7 @@ struct radv_shader_context {
unsigned max_workgroup_size;
LLVMContextRef context;
LLVMValueRef main_function;
struct ac_llvm_pointer main_function;
LLVMValueRef descriptor_sets[MAX_SETS];
@@ -83,20 +83,20 @@ radv_shader_context_from_abi(struct ac_shader_abi *abi)
return container_of(abi, struct radv_shader_context, abi);
}
static LLVMValueRef
static struct ac_llvm_pointer
create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder,
const struct ac_shader_args *args, enum ac_llvm_calling_convention convention,
unsigned max_workgroup_size, const struct radv_nir_compiler_options *options)
{
LLVMValueRef main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
if (options->address32_hi) {
ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-32bit-address-high-bits",
ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits",
options->address32_hi);
}
ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
ac_llvm_set_target_features(main_function, ctx);
ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size);
ac_llvm_set_target_features(main_function.value, ctx);
return main_function;
}
@@ -170,7 +170,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
ctx->main_function =
create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
get_llvm_calling_convention(ctx->main_function, stage),
get_llvm_calling_convention(ctx->main_function.value, stage),
ctx->max_workgroup_size, ctx->options);
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
@@ -1599,7 +1599,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
LLVMBasicBlockRef end_bb;
LLVMValueRef switch_inst;
end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function, "end");
end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function.value, "end");
switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
for (unsigned stream = 0; stream < 4; stream++) {

View File

@@ -56,7 +56,7 @@ struct si_shader_context {
LLVMBasicBlockRef merged_wrap_if_entry_block;
int merged_wrap_if_label;
LLVMValueRef main_fn;
struct ac_llvm_pointer main_fn;
LLVMTypeRef return_type;
struct ac_arg const_and_shader_buffers;
@@ -218,9 +218,11 @@ void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
unsigned bitwidth);
LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle);
void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts,
unsigned num_parts, unsigned main_part,
unsigned next_shader_first_part, bool same_thread_count);
unsigned next_shader_first_part,
enum ac_arg_type *main_arg_types,
bool same_thread_count);
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
struct nir_shader *nir, bool free_nir, bool ngg_cull_shader);
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,

View File

@@ -185,16 +185,16 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
ctx->return_value = LLVMGetUndef(ctx->return_type);
if (ctx->screen->info.address32_hi) {
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-32bit-address-high-bits",
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "amdgpu-32bit-address-high-bits",
ctx->screen->info.address32_hi);
}
if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg &&
si_shader_uses_streamout(ctx->shader))
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "amdgpu-gds-size", 256);
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
ac_llvm_set_target_features(ctx->main_fn, &ctx->ac);
ac_llvm_set_workgroup_size(ctx->main_fn.value, max_workgroup_size);
ac_llvm_set_target_features(ctx->main_fn.value, &ctx->ac);
}
void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)
@@ -216,7 +216,7 @@ void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shade
/* Reserve register locations for VGPR inputs the PS prolog may need. */
if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->shader->is_monolithic) {
ac_llvm_add_target_dep_function_attr(
ctx->main_fn, "InitialPSInputAddr",
ctx->main_fn.value, "InitialPSInputAddr",
S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) |
S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) |
S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) |
@@ -314,7 +314,7 @@ LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx)
LLVMValueRef ptr[2], list;
bool merged_shader = si_is_merged_shader(ctx->shader);
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS);
ptr[0] = LLVMGetParam(ctx->main_fn.value, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS);
list =
LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
return list;
@@ -420,9 +420,10 @@ static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
* Given a list of shader part functions, build a wrapper function that
* runs them in sequence to form a monolithic shader.
*/
void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts,
unsigned num_parts, unsigned main_part,
unsigned next_shader_first_part, bool same_thread_count)
unsigned next_shader_first_part,
enum ac_arg_type *main_arg_types, bool same_thread_count)
{
LLVMBuilderRef builder = ctx->ac.builder;
/* PS epilog has one arg per color component; gfx9 merged shader
@@ -440,8 +441,8 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
memset(&ctx->args, 0, sizeof(ctx->args));
for (unsigned i = 0; i < num_parts; ++i) {
ac_add_function_attr(ctx->ac.context, parts[i], -1, AC_FUNC_ATTR_ALWAYSINLINE);
LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
ac_add_function_attr(ctx->ac.context, parts[i].value, -1, AC_FUNC_ATTR_ALWAYSINLINE);
LLVMSetLinkage(parts[i].value, LLVMPrivateLinkage);
}
/* The parameters of the wrapper function correspond to those of the
@@ -452,11 +453,11 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
num_sgprs = 0;
num_vgprs = 0;
function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
function_type = parts[0].pointee_type;
num_first_params = LLVMCountParamTypes(function_type);
for (unsigned i = 0; i < num_first_params; ++i) {
LLVMValueRef param = LLVMGetParam(parts[0], i);
LLVMValueRef param = LLVMGetParam(parts[0].value, i);
if (ac_is_sgpr_param(param)) {
assert(num_vgprs == 0);
@@ -468,31 +469,11 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
gprs = 0;
while (gprs < num_sgprs + num_vgprs) {
LLVMValueRef param = LLVMGetParam(parts[main_part], ctx->args.arg_count);
LLVMValueRef param = LLVMGetParam(parts[main_part].value, ctx->args.arg_count);
LLVMTypeRef type = LLVMTypeOf(param);
unsigned size = ac_get_type_size(type) / 4;
/* This is going to get casted anyways, so we don't have to
* have the exact same type. But we do have to preserve the
* pointer-ness so that LLVM knows about it.
*/
enum ac_arg_type arg_type = AC_ARG_INT;
if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
type = LLVMGetElementType(type);
if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
if (LLVMGetVectorSize(type) == 4)
arg_type = AC_ARG_CONST_DESC_PTR;
else if (LLVMGetVectorSize(type) == 8)
arg_type = AC_ARG_CONST_IMAGE_PTR;
else
assert(0);
} else if (type == ctx->ac.f32) {
arg_type = AC_ARG_CONST_FLOAT_PTR;
} else {
assert(0);
}
}
enum ac_arg_type arg_type = main_arg_types[ctx->args.arg_count];
assert(arg_type != AC_ARG_INVALID);
ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL);
@@ -507,7 +488,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
unsigned num_returns = 0;
LLVMTypeRef returns[AC_MAX_ARGS], last_func_type, return_type;
last_func_type = LLVMGetElementType(LLVMTypeOf(parts[num_parts - 1]));
last_func_type = parts[num_parts - 1].pointee_type;
return_type = LLVMGetReturnType(last_func_type);
switch (LLVMGetTypeKind(return_type)) {
@@ -535,7 +516,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
num_out_sgpr = 0;
for (unsigned i = 0; i < ctx->args.arg_count; ++i) {
LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
LLVMValueRef param = LLVMGetParam(ctx->main_fn.value, i);
LLVMTypeRef param_type = LLVMTypeOf(param);
LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
unsigned size = ac_get_type_size(param_type) / 4;
@@ -579,7 +560,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
LLVMValueRef in[AC_MAX_ARGS];
LLVMTypeRef ret_type;
unsigned out_idx = 0;
unsigned num_params = LLVMCountParams(parts[part]);
unsigned num_params = LLVMCountParams(parts[part].value);
/* Merged shaders are executed conditionally depending
* on the number of enabled threads passed in the input SGPRs. */
@@ -609,13 +590,13 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
unsigned param_size;
LLVMValueRef arg = NULL;
param = LLVMGetParam(parts[part], param_idx);
param = LLVMGetParam(parts[part].value, param_idx);
param_type = LLVMTypeOf(param);
param_size = ac_get_type_size(param_type) / 4;
is_sgpr = ac_is_sgpr_param(param);
if (is_sgpr) {
ac_add_function_attr(ctx->ac.context, parts[part], param_idx + 1, AC_FUNC_ATTR_INREG);
ac_add_function_attr(ctx->ac.context, parts[part].value, param_idx + 1, AC_FUNC_ATTR_INREG);
} else if (out_idx < num_out_sgpr) {
/* Skip returned SGPRs the current part doesn't
* declare on the input. */
@@ -647,7 +628,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
out_idx += param_size;
}
ret = ac_build_call(&ctx->ac, parts[part], in, num_params);
ret = ac_build_call(&ctx->ac, parts[part].pointee_type, parts[part].value, in, num_params);
if (!same_thread_count &&
si_is_multi_part_shader(ctx->shader) && part + 1 == next_shader_first_part) {
@@ -991,7 +972,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
si_llvm_init_ps_callbacks(ctx);
unsigned colors_read = ctx->shader->selector->info.colors_read;
LLVMValueRef main_fn = ctx->main_fn;
LLVMValueRef main_fn = ctx->main_fn.value;
LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
@@ -1265,14 +1246,14 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
ctx.so = *so;
LLVMValueRef ngg_cull_main_fn = NULL;
struct ac_llvm_pointer ngg_cull_main_fn = {};
if (sel->stage <= MESA_SHADER_TESS_EVAL && shader->key.ge.opt.ngg_culling) {
if (!si_llvm_translate_nir(&ctx, shader, nir, false, true)) {
si_llvm_dispose(&ctx);
return false;
}
ngg_cull_main_fn = ctx.main_fn;
ctx.main_fn = NULL;
ctx.main_fn.value = NULL;
}
if (!si_llvm_translate_nir(&ctx, shader, nir, free_nir, false)) {
@@ -1281,12 +1262,18 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
}
if (shader->is_monolithic && sel->stage == MESA_SHADER_VERTEX) {
LLVMValueRef parts[4];
struct ac_llvm_pointer parts[4];
unsigned num_parts = 0;
bool first_is_prolog = false;
LLVMValueRef main_fn = ctx.main_fn;
struct ac_llvm_pointer main_fn = ctx.main_fn;
if (ngg_cull_main_fn) {
/* Preserve main arguments. */
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
if (ngg_cull_main_fn.value) {
if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true, false)) {
union si_shader_part_key prolog_key;
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true,
@@ -1311,9 +1298,15 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
}
parts[num_parts++] = main_fn;
si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, false);
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn) {
LLVMValueRef parts[3], prolog, main_fn = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, main_arg_types, false);
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn.value) {
struct ac_llvm_pointer parts[3], prolog, main_fn = ctx.main_fn;
/* Preserve main arguments. */
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
/* We reuse the VS prolog code for TES just to load the input VGPRs from LDS. */
union si_shader_part_key prolog_key;
@@ -1330,11 +1323,14 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
parts[1] = prolog;
parts[2] = main_fn;
si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
si_build_wrapper_function(&ctx, parts, 3, 0, 0, main_arg_types, false);
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_CTRL) {
/* Preserve main arguments. */
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
if (sscreen->info.gfx_level >= GFX9) {
struct si_shader_selector *ls = shader->key.ge.part.tcs.ls;
LLVMValueRef parts[4];
struct ac_llvm_pointer parts[4];
bool vs_needs_prolog =
si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false, false);
@@ -1366,6 +1362,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader->info.uses_instanceid |= ls->info.uses_instanceid;
parts[1] = ctx.main_fn;
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
/* LS prolog */
if (vs_needs_prolog) {
union si_shader_part_key vs_prolog_key;
@@ -1382,26 +1382,33 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog,
vs_needs_prolog, vs_needs_prolog ? 2 : 1,
main_arg_types,
shader->key.ge.opt.same_patch_vertices);
} else {
LLVMValueRef parts[2];
struct ac_llvm_pointer parts[2];
union si_shader_part_key epilog_key;
parts[0] = ctx.main_fn;
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
si_llvm_build_tcs_epilog(&ctx, &epilog_key);
parts[1] = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, 2, 0, 0, false);
si_build_wrapper_function(&ctx, parts, 2, 0, 0, main_arg_types, false);
}
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_GEOMETRY) {
if (ctx.screen->info.gfx_level >= GFX9) {
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
struct si_shader_selector *es = shader->key.ge.part.gs.es;
LLVMValueRef es_prolog = NULL;
LLVMValueRef es_main = NULL;
LLVMValueRef gs_main = ctx.main_fn;
struct ac_llvm_pointer es_prolog = {};
struct ac_llvm_pointer es_main = {};
struct ac_llvm_pointer gs_main = ctx.main_fn;
/* ES main part */
struct si_shader shader_es = {};
@@ -1426,6 +1433,11 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader->info.uses_instanceid |= es->info.uses_instanceid;
es_main = ctx.main_fn;
/* Preserve main (= es_main) arguments. */
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
/* ES prolog */
if (es->stage == MESA_SHADER_VERTEX &&
si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false, true)) {
@@ -1442,16 +1454,16 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
ctx.stage = MESA_SHADER_GEOMETRY;
/* Prepare the array of shader parts. */
LLVMValueRef parts[4];
struct ac_llvm_pointer parts[4];
unsigned num_parts = 0, main_part;
if (es_prolog)
if (es_prolog.value)
parts[num_parts++] = es_prolog;
parts[main_part = num_parts++] = es_main;
parts[num_parts++] = gs_main;
si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, false);
si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, main_arg_types, false);
} else {
/* Nothing to do for gfx6-8. The shader has only 1 part and it's ctx.main_fn. */
}
@@ -1462,7 +1474,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_optimize_module(&ctx);
/* Make sure the input is a pointer and not integer followed by inttoptr. */
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) == LLVMPointerTypeKind);
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn.value, 0))) == LLVMPointerTypeKind);
/* Compile to bytecode. */
if (!si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler, &ctx.ac, debug,

View File

@@ -453,7 +453,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
LLVMBasicBlockRef end_bb;
LLVMValueRef switch_inst;
end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");
end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn.value, "end");
switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
for (int stream = 0; stream < 4; stream++) {

View File

@@ -206,7 +206,7 @@ static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
LLVMRealPredicate cond = cond_map[ctx->shader->key.ps.part.epilog.alpha_func];
assert(cond);
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF);
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn.value, SI_PARAM_ALPHA_REF);
if (LLVMTypeOf(alpha) == ctx->ac.f16)
alpha_ref = LLVMBuildFPTrunc(ctx->ac.builder, alpha_ref, ctx->ac.f16, "");
@@ -500,7 +500,7 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
/* Set SGPRs. */
ret = LLVMBuildInsertValue(
builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF)),
builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn.value, SI_PARAM_ALPHA_REF)),
SI_SGPR_ALPHA_REF, "");
/* Set VGPRs */
@@ -609,7 +609,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
/* Create the function. */
si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0);
func = ctx->main_fn;
func = ctx->main_fn.value;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
@@ -849,7 +849,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
/* Create the function. */
si_llvm_create_func(ctx, "ps_epilog", NULL, 0, 0);
/* Disable elimination of unused inputs. */
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "InitialPSInputAddr", 0xffffff);
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "InitialPSInputAddr", 0xffffff);
/* Prepare color. */
unsigned vgpr = ctx->args.num_sgprs_used;
@@ -861,7 +861,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
if (color_type != SI_TYPE_ANY32) {
for (i = 0; i < 4; i++) {
color[write_i][i] = LLVMGetParam(ctx->main_fn, vgpr + i / 2);
color[write_i][i] = LLVMGetParam(ctx->main_fn.value, vgpr + i / 2);
color[write_i][i] = LLVMBuildBitCast(ctx->ac.builder, color[write_i][i],
ctx->ac.v2f16, "");
color[write_i][i] = ac_llvm_extract_elem(&ctx->ac, color[write_i][i], i % 2);
@@ -869,7 +869,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
vgpr += 4;
} else {
for (i = 0; i < 4; i++)
color[write_i][i] = LLVMGetParam(ctx->main_fn, vgpr++);
color[write_i][i] = LLVMGetParam(ctx->main_fn.value, vgpr++);
}
si_llvm_build_clamp_alpha_test(ctx, color[write_i], write_i);
@@ -888,11 +888,11 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
util_bitcount(key->ps_epilog.colors_written) * 4;
if (key->ps_epilog.writes_z)
depth = LLVMGetParam(ctx->main_fn, vgpr_index++);
depth = LLVMGetParam(ctx->main_fn.value, vgpr_index++);
if (key->ps_epilog.writes_stencil)
stencil = LLVMGetParam(ctx->main_fn, vgpr_index++);
stencil = LLVMGetParam(ctx->main_fn.value, vgpr_index++);
if (key->ps_epilog.writes_samplemask)
samplemask = LLVMGetParam(ctx->main_fn, vgpr_index++);
samplemask = LLVMGetParam(ctx->main_fn.value, vgpr_index++);
ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, mrtz_alpha, false,
&exp.args[exp.num++]);
@@ -932,9 +932,14 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader)
{
LLVMValueRef parts[3];
struct ac_llvm_pointer parts[3];
unsigned num_parts = 0, main_index;
LLVMValueRef main_fn = ctx->main_fn;
struct ac_llvm_pointer main_fn = ctx->main_fn;
/* Preserve main arguments. */
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
for (int i = 0; i < ctx->args.arg_count; i++)
main_arg_types[i] = ctx->args.args[i].type;
union si_shader_part_key prolog_key;
si_get_ps_prolog_key(shader, &prolog_key, false);
@@ -952,7 +957,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
si_llvm_build_ps_epilog(ctx, &epilog_key);
parts[num_parts++] = ctx->main_fn;
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, false);
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false);
}
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)

View File

@@ -275,7 +275,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
LLVMValueRef value[4];
for (unsigned i = component; i < component + num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i);
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
}

View File

@@ -76,11 +76,11 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
if (divisor_is_one || divisor_is_fetched) {
/* Add StartInstance. */
index = LLVMBuildAdd(ctx->ac.builder, index,
LLVMGetParam(ctx->main_fn, start_instance), "");
LLVMGetParam(ctx->main_fn.value, start_instance), "");
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(ctx->ac.builder, vertex_id,
LLVMGetParam(ctx->main_fn, base_vertex), "");
LLVMGetParam(ctx->main_fn.value, base_vertex), "");
}
return index;
@@ -103,8 +103,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
unsigned param_vs_blit_inputs = ctx->vs_blit_inputs.arg_index;
if (input_index == 0) {
/* Position: */
LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs);
LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 1);
LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs);
LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 1);
LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0);
LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1);
@@ -116,7 +116,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->ac.f32, "");
out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->ac.f32, "");
out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 2);
out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 2);
out[3] = ctx->ac.f32_1;
return;
}
@@ -126,19 +126,19 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
for (int i = 0; i < 4; i++) {
out[i] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3 + i);
out[i] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3 + i);
}
} else {
assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD);
LLVMValueRef x1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3);
LLVMValueRef y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 4);
LLVMValueRef x2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 5);
LLVMValueRef y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 6);
LLVMValueRef x1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3);
LLVMValueRef y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 4);
LLVMValueRef x2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 5);
LLVMValueRef y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 6);
out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, "");
out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, "");
out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 7);
out[3] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 8);
out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 7);
out[3] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 8);
}
return;
}
@@ -183,8 +183,9 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
input_index, ctx->instance_divisor_constbuf,
ctx->args.start_instance.arg_index,
ctx->args.base_vertex.arg_index);
} else
vertex_index = LLVMGetParam(ctx->main_fn, ctx->vertex_index0.arg_index + input_index);
} else {
vertex_index = LLVMGetParam(ctx->main_fn.value, ctx->vertex_index0.arg_index + input_index);
}
/* Use the open-coded implementation for all loads of doubles and
* of dword-sized data that needs fixups. We need to insert conversion
@@ -961,7 +962,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
/* Create the function. */
si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0);
func = ctx->main_fn;
func = ctx->main_fn.value;
for (i = 0; i < num_input_vgprs; i++) {
input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]);