ac/llvm: remove LLVM pass ac_optimize_vs_outputs
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
This commit is contained in:
@@ -2714,268 +2714,6 @@ LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef
|
||||
return result;
|
||||
}
|
||||
|
||||
#define AC_EXP_TARGET 0
|
||||
#define AC_EXP_ENABLED_CHANNELS 1
|
||||
#define AC_EXP_OUT0 2
|
||||
|
||||
enum ac_ir_type
|
||||
{
|
||||
AC_IR_UNDEF,
|
||||
AC_IR_CONST,
|
||||
AC_IR_VALUE,
|
||||
};
|
||||
|
||||
struct ac_vs_exp_chan {
|
||||
LLVMValueRef value;
|
||||
float const_float;
|
||||
enum ac_ir_type type;
|
||||
};
|
||||
|
||||
struct ac_vs_exp_inst {
|
||||
unsigned offset;
|
||||
LLVMValueRef inst;
|
||||
struct ac_vs_exp_chan chan[4];
|
||||
};
|
||||
|
||||
struct ac_vs_exports {
|
||||
unsigned num;
|
||||
struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
/* Return true if the PARAM export has been eliminated. */
|
||||
static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, uint32_t num_outputs,
|
||||
struct ac_vs_exp_inst *exp)
|
||||
{
|
||||
unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
|
||||
bool is_zero[4] = {0}, is_one[4] = {0};
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
/* It's a constant expression. Undef outputs are eliminated too. */
|
||||
if (exp->chan[i].type == AC_IR_UNDEF) {
|
||||
is_zero[i] = true;
|
||||
is_one[i] = true;
|
||||
} else if (exp->chan[i].type == AC_IR_CONST) {
|
||||
if (exp->chan[i].const_float == 0)
|
||||
is_zero[i] = true;
|
||||
else if (exp->chan[i].const_float == 1)
|
||||
is_one[i] = true;
|
||||
else
|
||||
return false; /* other constant */
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Only certain combinations of 0 and 1 can be eliminated. */
|
||||
if (is_zero[0] && is_zero[1] && is_zero[2])
|
||||
default_val = is_zero[3] ? 0 : 1;
|
||||
else if (is_one[0] && is_one[1] && is_one[2])
|
||||
default_val = is_zero[3] ? 2 : 3;
|
||||
else
|
||||
return false;
|
||||
|
||||
/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
|
||||
LLVMInstructionEraseFromParent(exp->inst);
|
||||
|
||||
/* Change OFFSET to DEFAULT_VAL. */
|
||||
for (i = 0; i < num_outputs; i++) {
|
||||
if (vs_output_param_offset[i] == exp->offset) {
|
||||
vs_output_param_offset[i] = AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
|
||||
uint8_t *vs_output_param_offset, uint32_t num_outputs,
|
||||
struct ac_vs_exports *processed,
|
||||
struct ac_vs_exp_inst *exp)
|
||||
{
|
||||
unsigned p, copy_back_channels = 0;
|
||||
|
||||
/* See if the output is already in the list of processed outputs.
|
||||
* The LLVMValueRef comparison relies on SSA.
|
||||
*/
|
||||
for (p = 0; p < processed->num; p++) {
|
||||
bool different = false;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
|
||||
struct ac_vs_exp_chan *c2 = &exp->chan[j];
|
||||
|
||||
/* Treat undef as a match. */
|
||||
if (c2->type == AC_IR_UNDEF)
|
||||
continue;
|
||||
|
||||
/* If c1 is undef but c2 isn't, we can copy c2 to c1
|
||||
* and consider the instruction duplicated.
|
||||
*/
|
||||
if (c1->type == AC_IR_UNDEF) {
|
||||
copy_back_channels |= 1 << j;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Test whether the channels are not equal. */
|
||||
if (c1->type != c2->type ||
|
||||
(c1->type == AC_IR_CONST && c1->const_float != c2->const_float) ||
|
||||
(c1->type == AC_IR_VALUE && c1->value != c2->value)) {
|
||||
different = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!different)
|
||||
break;
|
||||
|
||||
copy_back_channels = 0;
|
||||
}
|
||||
if (p == processed->num)
|
||||
return false;
|
||||
|
||||
/* If a match was found, but the matching export has undef where the new
|
||||
* one has a normal value, copy the normal value to the undef channel.
|
||||
*/
|
||||
struct ac_vs_exp_inst *match = &processed->exp[p];
|
||||
|
||||
/* Get current enabled channels mask. */
|
||||
LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
|
||||
unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
|
||||
|
||||
while (copy_back_channels) {
|
||||
unsigned chan = u_bit_scan(©_back_channels);
|
||||
|
||||
assert(match->chan[chan].type == AC_IR_UNDEF);
|
||||
LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, exp->chan[chan].value);
|
||||
match->chan[chan] = exp->chan[chan];
|
||||
|
||||
/* Update number of enabled channels because the original mask
|
||||
* is not always 0xf.
|
||||
*/
|
||||
enabled_channels |= (1 << chan);
|
||||
LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
|
||||
LLVMConstInt(ctx->i32, enabled_channels, 0));
|
||||
}
|
||||
|
||||
/* The PARAM export is duplicated. Kill it. */
|
||||
LLVMInstructionEraseFromParent(exp->inst);
|
||||
|
||||
/* Change OFFSET to the matching export. */
|
||||
for (unsigned i = 0; i < num_outputs; i++) {
|
||||
if (vs_output_param_offset[i] == exp->offset) {
|
||||
vs_output_param_offset[i] = match->offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn,
|
||||
uint8_t *vs_output_param_offset, uint32_t num_outputs,
|
||||
uint32_t skip_output_mask, uint8_t *num_param_exports)
|
||||
{
|
||||
LLVMBasicBlockRef bb;
|
||||
bool removed_any = false;
|
||||
struct ac_vs_exports exports;
|
||||
|
||||
exports.num = 0;
|
||||
|
||||
/* Process all LLVM instructions. */
|
||||
bb = LLVMGetFirstBasicBlock(main_fn);
|
||||
while (bb) {
|
||||
LLVMValueRef inst = LLVMGetFirstInstruction(bb);
|
||||
|
||||
while (inst) {
|
||||
LLVMValueRef cur = inst;
|
||||
inst = LLVMGetNextInstruction(inst);
|
||||
struct ac_vs_exp_inst exp;
|
||||
|
||||
if (LLVMGetInstructionOpcode(cur) != LLVMCall)
|
||||
continue;
|
||||
|
||||
LLVMValueRef callee = ac_llvm_get_called_value(cur);
|
||||
|
||||
if (!ac_llvm_is_function(callee))
|
||||
continue;
|
||||
|
||||
const char *name = LLVMGetValueName(callee);
|
||||
unsigned num_args = LLVMCountParams(callee);
|
||||
|
||||
/* Check if this is an export instruction. */
|
||||
if ((num_args != 9 && num_args != 8) ||
|
||||
(strcmp(name, "llvm.SI.export") && strcmp(name, "llvm.amdgcn.exp.f32")))
|
||||
continue;
|
||||
|
||||
LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
|
||||
unsigned target = LLVMConstIntGetZExtValue(arg);
|
||||
|
||||
if (target < V_008DFC_SQ_EXP_PARAM)
|
||||
continue;
|
||||
|
||||
target -= V_008DFC_SQ_EXP_PARAM;
|
||||
|
||||
/* Parse the instruction. */
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
exp.offset = target;
|
||||
exp.inst = cur;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
|
||||
|
||||
exp.chan[i].value = v;
|
||||
|
||||
if (LLVMIsUndef(v)) {
|
||||
exp.chan[i].type = AC_IR_UNDEF;
|
||||
} else if (LLVMIsAConstantFP(v)) {
|
||||
LLVMBool loses_info;
|
||||
exp.chan[i].type = AC_IR_CONST;
|
||||
exp.chan[i].const_float = LLVMConstRealGetDouble(v, &loses_info);
|
||||
} else {
|
||||
exp.chan[i].type = AC_IR_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Eliminate constant and duplicated PARAM exports. */
|
||||
if (!((1u << target) & skip_output_mask) &&
|
||||
(ac_eliminate_const_output(vs_output_param_offset, num_outputs, &exp) ||
|
||||
ac_eliminate_duplicated_output(ctx, vs_output_param_offset, num_outputs, &exports,
|
||||
&exp))) {
|
||||
removed_any = true;
|
||||
} else {
|
||||
exports.exp[exports.num++] = exp;
|
||||
}
|
||||
}
|
||||
bb = LLVMGetNextBasicBlock(bb);
|
||||
}
|
||||
|
||||
/* Remove holes in export memory due to removed PARAM exports.
|
||||
* This is done by renumbering all PARAM exports.
|
||||
*/
|
||||
if (removed_any) {
|
||||
uint8_t old_offset[VARYING_SLOT_MAX];
|
||||
unsigned out, i;
|
||||
|
||||
/* Make a copy of the offsets. We need the old version while
|
||||
* we are modifying some of them. */
|
||||
memcpy(old_offset, vs_output_param_offset, sizeof(old_offset));
|
||||
|
||||
for (i = 0; i < exports.num; i++) {
|
||||
unsigned offset = exports.exp[i].offset;
|
||||
|
||||
/* Update vs_output_param_offset. Multiple outputs can
|
||||
* have the same offset.
|
||||
*/
|
||||
for (out = 0; out < num_outputs; out++) {
|
||||
if (old_offset[out] == offset)
|
||||
vs_output_param_offset[out] = i;
|
||||
}
|
||||
|
||||
/* Change the PARAM offset in the instruction. */
|
||||
LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
|
||||
LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_PARAM + i, 0));
|
||||
}
|
||||
*num_param_exports = exports.num;
|
||||
}
|
||||
}
|
||||
|
||||
void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
|
||||
{
|
||||
LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
|
||||
|
@@ -460,9 +460,6 @@ LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
|
||||
LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0);
|
||||
|
||||
void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn,
|
||||
uint8_t *vs_output_param_offset, uint32_t num_outputs,
|
||||
uint32_t skip_output_mask, uint8_t *num_param_exports);
|
||||
void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
|
||||
|
||||
void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
|
||||
|
Reference in New Issue
Block a user