nir: Rename replicated-result dot-product instructions

All these instructions replicate the result of a N-component dot-product
to a vec4.  Naming them fdot_replicatedN gives the impression that are
some sort of abstract dot-product that replicates the result to a vecN.
They also deviate from fdph_replicated... which nobody would reasonably
consider naming fdot_replicatedh.

Naming these opcodes fdotN_replicated more closely matches what they
are, and it matches the pattern of fdph_replicated.

I believe that the only reason these opcodes were named this way was
because it simplified the implementation of the binop_reduce function in
nir_opcodes.py.  I made some fairly simple changes to that function, and
I think the end result is ok.

The bulk of the changes come from the sed rename:

    sed --in-place -e 's/fdot_replicated\([234]\)/fdot\1_replicated/g' \
        $(grep -r 'fdot_replicated[234]' src/)

v2: Use a named parameter to binop_reduce instead of using
isinstance(name, str).  Suggested by Jason.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5725>
This commit is contained in:
Ian Romanick
2020-06-20 14:33:57 -07:00
committed by Marge Bot
parent 8cee9ce750
commit 67956689bb
5 changed files with 22 additions and 21 deletions

View File

@@ -3193,7 +3193,7 @@ typedef struct nir_shader_compiler_options {
/* Does the native fdot instruction replicate its result for four /* Does the native fdot instruction replicate its result for four
* components? If so, then opt_algebraic_late will turn all fdotN * components? If so, then opt_algebraic_late will turn all fdotN
* instructions into fdot_replicatedN instructions. * instructions into fdotN_replicated instructions.
*/ */
bool fdot_replicates; bool fdot_replicates;

View File

@@ -107,9 +107,9 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
static bool static bool
has_replicated_dest(nir_alu_instr *alu) has_replicated_dest(nir_alu_instr *alu)
{ {
return alu->op == nir_op_fdot_replicated2 || return alu->op == nir_op_fdot2_replicated ||
alu->op == nir_op_fdot_replicated3 || alu->op == nir_op_fdot3_replicated ||
alu->op == nir_op_fdot_replicated4 || alu->op == nir_op_fdot4_replicated ||
alu->op == nir_op_fdph_replicated; alu->op == nir_op_fdph_replicated;
} }

View File

@@ -541,7 +541,7 @@ def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
False, "", const_expr) False, "", const_expr)
def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
reduce_expr, final_expr): reduce_expr, final_expr, suffix=""):
def final(src): def final(src):
return final_expr.format(src= "(" + src + ")") return final_expr.format(src= "(" + src + ")")
def reduce_(src0, src1): def reduce_(src0, src1):
@@ -554,10 +554,10 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
return srcs[start] return srcs[start]
return reduce_(pairwise_reduce(start, size // 2), pairwise_reduce(start + size // 2, size // 2)) return reduce_(pairwise_reduce(start, size // 2), pairwise_reduce(start + size // 2, size // 2))
for size in [2, 4, 8, 16]: for size in [2, 4, 8, 16]:
opcode(name + str(size), output_size, output_type, opcode(name + str(size) + suffix, output_size, output_type,
[size, size], [src_type, src_type], False, _2src_commutative, [size, size], [src_type, src_type], False, _2src_commutative,
final(pairwise_reduce(0, size))) final(pairwise_reduce(0, size)))
opcode(name + "3", output_size, output_type, opcode(name + "3" + suffix, output_size, output_type,
[3, 3], [src_type, src_type], False, _2src_commutative, [3, 3], [src_type, src_type], False, _2src_commutative,
final(reduce_(reduce_(srcs[0], srcs[1]), srcs[2]))) final(reduce_(reduce_(srcs[0], srcs[1]), srcs[2])))
@@ -825,8 +825,9 @@ binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
"{src}") "{src}")
binop_reduce("fdot_replicated", 4, tfloat, tfloat, binop_reduce("fdot", 4, tfloat, tfloat,
"{src0} * {src1}", "{src0} + {src1}", "{src}") "{src0} * {src1}", "{src0} + {src1}", "{src}",
suffix="_replicated")
opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "", opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "",
"src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")

View File

@@ -2074,9 +2074,9 @@ late_optimizations = [
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), (('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'),
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'), (('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'),
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), (('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'),
(('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
(('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)), (('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
@@ -2252,9 +2252,9 @@ distribute_src_mods = [
# Try to remove some spurious negations rather than pushing them down. # Try to remove some spurious negations rather than pushing them down.
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
(('fdot_replicated2', ('fneg', a), ('fneg', b)), ('fdot_replicated2', a, b)), (('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
(('fdot_replicated3', ('fneg', a), ('fneg', b)), ('fdot_replicated3', a, b)), (('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
(('fdot_replicated4', ('fneg', a), ('fneg', b)), ('fdot_replicated4', a, b)), (('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)),
(('fneg', ('fneg', a)), a), (('fneg', ('fneg', a)), a),
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)), (('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
@@ -2269,9 +2269,9 @@ distribute_src_mods = [
(('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))), (('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))),
(('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))), (('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))),
(('fneg', ('fdot_replicated2(is_used_once)', a, b)), ('fdot_replicated2', ('fneg', a), b)), (('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b)),
(('fneg', ('fdot_replicated3(is_used_once)', a, b)), ('fdot_replicated3', ('fneg', a), b)), (('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b)),
(('fneg', ('fdot_replicated4(is_used_once)', a, b)), ('fdot_replicated4', ('fneg', a), b)), (('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b)),
# fdph works mostly like fdot, but to get the correct result, the negation # fdph works mostly like fdot, but to get the correct result, the negation
# must be applied to the second source. # must be applied to the second source.

View File

@@ -1857,17 +1857,17 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
inst->predicate = predicate; inst->predicate = predicate;
break; break;
case nir_op_fdot_replicated2: case nir_op_fdot2_replicated:
try_immediate_source(instr, op, true); try_immediate_source(instr, op, true);
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
break; break;
case nir_op_fdot_replicated3: case nir_op_fdot3_replicated:
try_immediate_source(instr, op, true); try_immediate_source(instr, op, true);
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
break; break;
case nir_op_fdot_replicated4: case nir_op_fdot4_replicated:
try_immediate_source(instr, op, true); try_immediate_source(instr, op, true);
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
break; break;