nir: Rename replicated-result dot-product instructions
All these instructions replicate the result of a N-component dot-product to a vec4. Naming them fdot_replicatedN gives the impression that are some sort of abstract dot-product that replicates the result to a vecN. They also deviate from fdph_replicated... which nobody would reasonably consider naming fdot_replicatedh. Naming these opcodes fdotN_replicated more closely matches what they are, and it matches the pattern of fdph_replicated. I believe that the only reason these opcodes were named this way was because it simplified the implementation of the binop_reduce function in nir_opcodes.py. I made some fairly simple changes to that function, and I think the end result is ok. The bulk of the changes come from the sed rename: sed --in-place -e 's/fdot_replicated\([234]\)/fdot\1_replicated/g' \ $(grep -r 'fdot_replicated[234]' src/) v2: Use a named parameter to binop_reduce instead of using isinstance(name, str). Suggested by Jason. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5725>
This commit is contained in:
@@ -3193,7 +3193,7 @@ typedef struct nir_shader_compiler_options {
|
|||||||
|
|
||||||
/* Does the native fdot instruction replicate its result for four
|
/* Does the native fdot instruction replicate its result for four
|
||||||
* components? If so, then opt_algebraic_late will turn all fdotN
|
* components? If so, then opt_algebraic_late will turn all fdotN
|
||||||
* instructions into fdot_replicatedN instructions.
|
* instructions into fdotN_replicated instructions.
|
||||||
*/
|
*/
|
||||||
bool fdot_replicates;
|
bool fdot_replicates;
|
||||||
|
|
||||||
|
@@ -107,9 +107,9 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
|
|||||||
static bool
|
static bool
|
||||||
has_replicated_dest(nir_alu_instr *alu)
|
has_replicated_dest(nir_alu_instr *alu)
|
||||||
{
|
{
|
||||||
return alu->op == nir_op_fdot_replicated2 ||
|
return alu->op == nir_op_fdot2_replicated ||
|
||||||
alu->op == nir_op_fdot_replicated3 ||
|
alu->op == nir_op_fdot3_replicated ||
|
||||||
alu->op == nir_op_fdot_replicated4 ||
|
alu->op == nir_op_fdot4_replicated ||
|
||||||
alu->op == nir_op_fdph_replicated;
|
alu->op == nir_op_fdph_replicated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -541,7 +541,7 @@ def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
|
|||||||
False, "", const_expr)
|
False, "", const_expr)
|
||||||
|
|
||||||
def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
|
def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
|
||||||
reduce_expr, final_expr):
|
reduce_expr, final_expr, suffix=""):
|
||||||
def final(src):
|
def final(src):
|
||||||
return final_expr.format(src= "(" + src + ")")
|
return final_expr.format(src= "(" + src + ")")
|
||||||
def reduce_(src0, src1):
|
def reduce_(src0, src1):
|
||||||
@@ -554,10 +554,10 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
|
|||||||
return srcs[start]
|
return srcs[start]
|
||||||
return reduce_(pairwise_reduce(start, size // 2), pairwise_reduce(start + size // 2, size // 2))
|
return reduce_(pairwise_reduce(start, size // 2), pairwise_reduce(start + size // 2, size // 2))
|
||||||
for size in [2, 4, 8, 16]:
|
for size in [2, 4, 8, 16]:
|
||||||
opcode(name + str(size), output_size, output_type,
|
opcode(name + str(size) + suffix, output_size, output_type,
|
||||||
[size, size], [src_type, src_type], False, _2src_commutative,
|
[size, size], [src_type, src_type], False, _2src_commutative,
|
||||||
final(pairwise_reduce(0, size)))
|
final(pairwise_reduce(0, size)))
|
||||||
opcode(name + "3", output_size, output_type,
|
opcode(name + "3" + suffix, output_size, output_type,
|
||||||
[3, 3], [src_type, src_type], False, _2src_commutative,
|
[3, 3], [src_type, src_type], False, _2src_commutative,
|
||||||
final(reduce_(reduce_(srcs[0], srcs[1]), srcs[2])))
|
final(reduce_(reduce_(srcs[0], srcs[1]), srcs[2])))
|
||||||
|
|
||||||
@@ -825,8 +825,9 @@ binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
|
|||||||
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
|
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
|
||||||
"{src}")
|
"{src}")
|
||||||
|
|
||||||
binop_reduce("fdot_replicated", 4, tfloat, tfloat,
|
binop_reduce("fdot", 4, tfloat, tfloat,
|
||||||
"{src0} * {src1}", "{src0} + {src1}", "{src}")
|
"{src0} * {src1}", "{src0} + {src1}", "{src}",
|
||||||
|
suffix="_replicated")
|
||||||
|
|
||||||
opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "",
|
opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "",
|
||||||
"src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
|
"src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
|
||||||
|
@@ -2074,9 +2074,9 @@ late_optimizations = [
|
|||||||
|
|
||||||
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
|
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
|
||||||
|
|
||||||
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
|
(('fdot2', a, b), ('fdot2_replicated', a, b), 'options->fdot_replicates'),
|
||||||
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
|
(('fdot3', a, b), ('fdot3_replicated', a, b), 'options->fdot_replicates'),
|
||||||
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
|
(('fdot4', a, b), ('fdot4_replicated', a, b), 'options->fdot_replicates'),
|
||||||
(('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
|
(('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
|
||||||
|
|
||||||
(('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
|
(('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
|
||||||
@@ -2252,9 +2252,9 @@ distribute_src_mods = [
|
|||||||
# Try to remove some spurious negations rather than pushing them down.
|
# Try to remove some spurious negations rather than pushing them down.
|
||||||
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
|
(('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
|
||||||
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
|
(('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)),
|
||||||
(('fdot_replicated2', ('fneg', a), ('fneg', b)), ('fdot_replicated2', a, b)),
|
(('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b)),
|
||||||
(('fdot_replicated3', ('fneg', a), ('fneg', b)), ('fdot_replicated3', a, b)),
|
(('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b)),
|
||||||
(('fdot_replicated4', ('fneg', a), ('fneg', b)), ('fdot_replicated4', a, b)),
|
(('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b)),
|
||||||
(('fneg', ('fneg', a)), a),
|
(('fneg', ('fneg', a)), a),
|
||||||
|
|
||||||
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
|
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
|
||||||
@@ -2269,9 +2269,9 @@ distribute_src_mods = [
|
|||||||
(('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))),
|
(('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))),
|
||||||
(('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))),
|
(('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))),
|
||||||
|
|
||||||
(('fneg', ('fdot_replicated2(is_used_once)', a, b)), ('fdot_replicated2', ('fneg', a), b)),
|
(('fneg', ('fdot2_replicated(is_used_once)', a, b)), ('fdot2_replicated', ('fneg', a), b)),
|
||||||
(('fneg', ('fdot_replicated3(is_used_once)', a, b)), ('fdot_replicated3', ('fneg', a), b)),
|
(('fneg', ('fdot3_replicated(is_used_once)', a, b)), ('fdot3_replicated', ('fneg', a), b)),
|
||||||
(('fneg', ('fdot_replicated4(is_used_once)', a, b)), ('fdot_replicated4', ('fneg', a), b)),
|
(('fneg', ('fdot4_replicated(is_used_once)', a, b)), ('fdot4_replicated', ('fneg', a), b)),
|
||||||
|
|
||||||
# fdph works mostly like fdot, but to get the correct result, the negation
|
# fdph works mostly like fdot, but to get the correct result, the negation
|
||||||
# must be applied to the second source.
|
# must be applied to the second source.
|
||||||
|
@@ -1857,17 +1857,17 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
|||||||
inst->predicate = predicate;
|
inst->predicate = predicate;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fdot_replicated2:
|
case nir_op_fdot2_replicated:
|
||||||
try_immediate_source(instr, op, true);
|
try_immediate_source(instr, op, true);
|
||||||
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
|
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fdot_replicated3:
|
case nir_op_fdot3_replicated:
|
||||||
try_immediate_source(instr, op, true);
|
try_immediate_source(instr, op, true);
|
||||||
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
|
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fdot_replicated4:
|
case nir_op_fdot4_replicated:
|
||||||
try_immediate_source(instr, op, true);
|
try_immediate_source(instr, op, true);
|
||||||
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
|
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
|
||||||
break;
|
break;
|
||||||
|
Reference in New Issue
Block a user