nir: Make nir_build_alu() variants per 1-4 arg count.
This saves a bunch of generated code to pack up the extra NULLs to get to 4 args, and saves executing the conditions in nir_build_alu() to then skip those NULLs. Saves another 27kb on disk. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13916>
This commit is contained in:
@@ -140,6 +140,63 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
return nir_builder_alu_instr_finish_and_insert(build, instr);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_build_alu1(nir_builder *build, nir_op op, nir_ssa_def *src0)
|
||||
{
|
||||
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
|
||||
if (!instr)
|
||||
return NULL;
|
||||
|
||||
instr->src[0].src = nir_src_for_ssa(src0);
|
||||
|
||||
return nir_builder_alu_instr_finish_and_insert(build, instr);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_build_alu2(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1)
|
||||
{
|
||||
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
|
||||
if (!instr)
|
||||
return NULL;
|
||||
|
||||
instr->src[0].src = nir_src_for_ssa(src0);
|
||||
instr->src[1].src = nir_src_for_ssa(src1);
|
||||
|
||||
return nir_builder_alu_instr_finish_and_insert(build, instr);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_build_alu3(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1, nir_ssa_def *src2)
|
||||
{
|
||||
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
|
||||
if (!instr)
|
||||
return NULL;
|
||||
|
||||
instr->src[0].src = nir_src_for_ssa(src0);
|
||||
instr->src[1].src = nir_src_for_ssa(src1);
|
||||
instr->src[2].src = nir_src_for_ssa(src2);
|
||||
|
||||
return nir_builder_alu_instr_finish_and_insert(build, instr);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_build_alu4(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
|
||||
{
|
||||
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
|
||||
if (!instr)
|
||||
return NULL;
|
||||
|
||||
instr->src[0].src = nir_src_for_ssa(src0);
|
||||
instr->src[1].src = nir_src_for_ssa(src1);
|
||||
instr->src[2].src = nir_src_for_ssa(src2);
|
||||
instr->src[3].src = nir_src_for_ssa(src3);
|
||||
|
||||
return nir_builder_alu_instr_finish_and_insert(build, instr);
|
||||
}
|
||||
|
||||
/* for the couple special cases with more than 4 src args: */
|
||||
nir_ssa_def *
|
||||
nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs)
|
||||
|
@@ -109,10 +109,24 @@ nir_builder_last_instr(nir_builder *build)
|
||||
return build->cursor.instr;
|
||||
}
|
||||
|
||||
/* General nir_build_alu() taking a variable arg count with NULLs for the rest. */
|
||||
nir_ssa_def *
|
||||
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3);
|
||||
|
||||
/* Fixed-arg-count variants to reduce size of codegen. */
|
||||
nir_ssa_def *
|
||||
nir_build_alu1(nir_builder *build, nir_op op, nir_ssa_def *src0);
|
||||
nir_ssa_def *
|
||||
nir_build_alu2(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1);
|
||||
nir_ssa_def *
|
||||
nir_build_alu3(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1, nir_ssa_def *src2);
|
||||
nir_ssa_def *
|
||||
nir_build_alu4(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3);
|
||||
|
||||
nir_ssa_def *nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs);
|
||||
|
||||
nir_instr *nir_builder_last_instr(nir_builder *build);
|
||||
|
@@ -29,10 +29,7 @@ def src_decl_list(num_srcs):
|
||||
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
|
||||
|
||||
def src_list(num_srcs):
|
||||
if num_srcs <= 4:
|
||||
return ', '.join('src' + str(i) if i < num_srcs else 'NULL' for i in range(4))
|
||||
else:
|
||||
return ', '.join('src' + str(i) for i in range(num_srcs))
|
||||
return ', '.join('src' + str(i) for i in range(num_srcs))
|
||||
%>
|
||||
|
||||
% for name, opcode in sorted(opcodes.items()):
|
||||
@@ -40,7 +37,7 @@ static inline nir_ssa_def *
|
||||
nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
|
||||
{
|
||||
% if opcode.num_inputs <= 4:
|
||||
return nir_build_alu(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
|
||||
return nir_build_alu${opcode.num_inputs}(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
|
||||
% else:
|
||||
nir_ssa_def *srcs[${opcode.num_inputs}] = {${src_list(opcode.num_inputs)}};
|
||||
return nir_build_alu_src_arr(build, nir_op_${name}, srcs);
|
||||
|
Reference in New Issue
Block a user