nir: Make nir_build_alu() variants per 1-4 arg count.

This saves a bunch of generated code to pack up the extra NULLs to get to
4 args, and saves executing the conditions in nir_build_alu() to then skip
those NULLs.

Saves another 27kb on disk.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13916>
This commit is contained in:
Emma Anholt
2021-11-22 11:45:23 -08:00
committed by Marge Bot
parent e770ec1182
commit 06fe04b4d7
3 changed files with 73 additions and 5 deletions

View File

@@ -140,6 +140,63 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
return nir_builder_alu_instr_finish_and_insert(build, instr); return nir_builder_alu_instr_finish_and_insert(build, instr);
} }
nir_ssa_def *
nir_build_alu1(nir_builder *build, nir_op op, nir_ssa_def *src0)
{
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
if (!instr)
return NULL;
instr->src[0].src = nir_src_for_ssa(src0);
return nir_builder_alu_instr_finish_and_insert(build, instr);
}
nir_ssa_def *
nir_build_alu2(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1)
{
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
if (!instr)
return NULL;
instr->src[0].src = nir_src_for_ssa(src0);
instr->src[1].src = nir_src_for_ssa(src1);
return nir_builder_alu_instr_finish_and_insert(build, instr);
}
nir_ssa_def *
nir_build_alu3(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1, nir_ssa_def *src2)
{
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
if (!instr)
return NULL;
instr->src[0].src = nir_src_for_ssa(src0);
instr->src[1].src = nir_src_for_ssa(src1);
instr->src[2].src = nir_src_for_ssa(src2);
return nir_builder_alu_instr_finish_and_insert(build, instr);
}
nir_ssa_def *
nir_build_alu4(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
{
nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
if (!instr)
return NULL;
instr->src[0].src = nir_src_for_ssa(src0);
instr->src[1].src = nir_src_for_ssa(src1);
instr->src[2].src = nir_src_for_ssa(src2);
instr->src[3].src = nir_src_for_ssa(src3);
return nir_builder_alu_instr_finish_and_insert(build, instr);
}
/* for the couple special cases with more than 4 src args: */ /* for the couple special cases with more than 4 src args: */
nir_ssa_def * nir_ssa_def *
nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs) nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs)

View File

@@ -109,10 +109,24 @@ nir_builder_last_instr(nir_builder *build)
return build->cursor.instr; return build->cursor.instr;
} }
/* General nir_build_alu() taking a variable arg count with NULLs for the rest. */
nir_ssa_def * nir_ssa_def *
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3); nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3);
/* Fixed-arg-count variants to reduce size of codegen. */
nir_ssa_def *
nir_build_alu1(nir_builder *build, nir_op op, nir_ssa_def *src0);
nir_ssa_def *
nir_build_alu2(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1);
nir_ssa_def *
nir_build_alu3(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1, nir_ssa_def *src2);
nir_ssa_def *
nir_build_alu4(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3);
nir_ssa_def *nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs); nir_ssa_def *nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs);
nir_instr *nir_builder_last_instr(nir_builder *build); nir_instr *nir_builder_last_instr(nir_builder *build);

View File

@@ -29,10 +29,7 @@ def src_decl_list(num_srcs):
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs)) return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
def src_list(num_srcs): def src_list(num_srcs):
if num_srcs <= 4: return ', '.join('src' + str(i) for i in range(num_srcs))
return ', '.join('src' + str(i) if i < num_srcs else 'NULL' for i in range(4))
else:
return ', '.join('src' + str(i) for i in range(num_srcs))
%> %>
% for name, opcode in sorted(opcodes.items()): % for name, opcode in sorted(opcodes.items()):
@@ -40,7 +37,7 @@ static inline nir_ssa_def *
nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)}) nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
{ {
% if opcode.num_inputs <= 4: % if opcode.num_inputs <= 4:
return nir_build_alu(build, nir_op_${name}, ${src_list(opcode.num_inputs)}); return nir_build_alu${opcode.num_inputs}(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
% else: % else:
nir_ssa_def *srcs[${opcode.num_inputs}] = {${src_list(opcode.num_inputs)}}; nir_ssa_def *srcs[${opcode.num_inputs}] = {${src_list(opcode.num_inputs)}};
return nir_build_alu_src_arr(build, nir_op_${name}, srcs); return nir_build_alu_src_arr(build, nir_op_${name}, srcs);