pan/bi: Implement some extracts and inserts
Rather than lowering in NIR. Importantly for Valhall, this allows
nir_opt_algebraic to optimize various bitwise ops into extracts and inserts,
taking pressure off the low-throughout SFU pipe and moving it onto the
high-throughput CVT pipe. This will mitigate a cycle count regression from
switching to the precise idiv lowering.
This also generates more integer widening conversions which we can fold into
32-bit instructions later, to allow optimizing GLSL like "(a & 0xFFFF) + b"
Valhall:
total instructions in shared programs: 2674836 -> 2674840 (<.01%)
instructions in affected programs: 6473 -> 6477 (0.06%)
helped: 14
HURT: 6
helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.16% max: 1.37% x̄: 0.41% x̃: 0.49%
HURT stats (abs) min: 3.0 max: 3.0 x̄: 3.00 x̃: 3
HURT stats (rel) min: 1.19% max: 1.62% x̄: 1.35% x̃: 1.24%
95% mean confidence interval for instructions value: -0.68 1.08
95% mean confidence interval for instructions %-change: -0.30% 0.53%
Inconclusive result (value mean confidence interval includes 0).
total cycles in shared programs: 140627.42 -> 140627.36 (<.01%)
cycles in affected programs: 2.31 -> 2.25 (-2.70%)
helped: 1
HURT: 0
total cvt in shared programs: 14127.25 -> 14128.91 (0.01%)
cvt in affected programs: 153.50 -> 155.16 (1.08%)
helped: 0
HURT: 41
HURT stats (abs) min: 0.015625 max: 0.09375 x̄: 0.04 x̃: 0
HURT stats (rel) min: 0.27% max: 4.44% x̄: 1.61% x̃: 1.22%
95% mean confidence interval for cvt value: 0.03 0.05
95% mean confidence interval for cvt %-change: 1.29% 1.93%
Cvt are HURT.
total sfu in shared programs: 7555.69 -> 7549.31 (-0.08%)
sfu in affected programs: 107.31 -> 100.94 (-5.94%)
helped: 48
HURT: 0
helped stats (abs) min: 0.0625 max: 0.375 x̄: 0.13 x̃: 0
helped stats (rel) min: 1.34% max: 50.00% x̄: 13.57% x̃: 7.14%
95% mean confidence interval for sfu value: -0.15 -0.11
95% mean confidence interval for sfu %-change: -17.07% -10.06%
Sfu are helped.
total quadwords in shared programs: 1449912 -> 1449928 (<.01%)
quadwords in affected programs: 256 -> 272 (6.25%)
helped: 0
HURT: 2
Bifrost:
total instructions in shared programs: 2415370 -> 2415380 (<.01%)
instructions in affected programs: 1642 -> 1652 (0.61%)
helped: 2
HURT: 6
helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.40% max: 0.40% x̄: 0.40% x̃: 0.40%
HURT stats (abs) min: 2.0 max: 2.0 x̄: 2.00 x̃: 2
HURT stats (rel) min: 0.95% max: 1.27% x̄: 1.07% x̃: 1.00%
95% mean confidence interval for instructions value: 0.09 2.41
95% mean confidence interval for instructions %-change: 0.13% 1.29%
Instructions are HURT.
total tuples in shared programs: 1928495 -> 1928476
(<.01%)
tuples in affected programs: 3329 -> 3310 (-0.57%)
helped: 9
HURT: 2
helped stats (abs) min: 1.0 max: 6.0 x̄: 2.56 x̃: 2
helped stats (rel) min: 0.25% max: 2.33% x̄: 1.00% x̃: 0.75%
HURT stats (abs) min: 2.0 max: 2.0 x̄: 2.00 x̃: 2
HURT stats (rel) min: 0.48% max: 0.48% x̄: 0.48% x̃: 0.48%
95% mean confidence interval for tuples value: -3.46 0.00
95% mean confidence interval for tuples %-change: -1.35% -0.10%
Inconclusive result (value mean confidence interval includes 0).
total clauses in shared programs: 354978 -> 354983 (<.01%)
clauses in affected programs: 398 -> 403 (1.26%)
helped: 3
HURT: 8
helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
helped stats (rel) min: 2.33% max: 3.85% x̄: 2.83% x̃: 2.33%
HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
HURT stats (rel) min: 2.27% max: 3.70% x̄: 2.88% x̃: 2.78%
95% mean confidence interval for clauses value: -0.17 1.08
95% mean confidence interval for clauses %-change: -0.51% 3.16%
Inconclusive result (value mean confidence interval includes 0).
total cycles in shared programs: 166575.69 -> 166575.65 (<.01%)
cycles in affected programs: 6.88 -> 6.83 (-0.61%)
helped: 1
HURT: 0
total arith in shared programs: 73688.79 -> 73688 (<.01%)
arith in affected programs: 127.29 -> 126.50 (-0.62%)
helped: 9
HURT: 2
helped stats (abs) min: 0.04166700000000034 max: 0.25 x̄: 0.11 x̃: 0
helped stats (rel) min: 0.26% max: 2.45% x̄: 1.07% x̃: 0.80%
HURT stats (abs) min: 0.08333299999999966 max: 0.08333299999999966 x̄: 0.08 x̃: 0
HURT stats (rel) min: 0.55% max: 0.55% x̄: 0.55% x̃: 0.55%
95% mean confidence interval for arith value: -0.14 0.00
95% mean confidence interval for arith %-change: -1.44% -0.11%
Inconclusive result (value mean confidence interval includes 0).
total quadwords in shared programs: 1674514 -> 1674480 (<.01%)
quadwords in affected programs: 9086 -> 9052 (-0.37%)
helped: 23
HURT: 2
helped stats (abs) min: 1.0 max: 6.0 x̄: 1.65 x̃: 1
helped stats (rel) min: 0.15% max: 2.79% x̄: 0.63% x̃: 0.33%
HURT stats (abs) min: 2.0 max: 2.0 x̄: 2.00 x̃: 2
HURT stats (rel) min: 0.53% max: 0.53% x̄: 0.53% x̃: 0.53%
95% mean confidence interval for quadwords value: -2.08 -0.64
95% mean confidence interval for quadwords %-change: -0.86% -0.21%
Quadwords are helped.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17857>
This commit is contained in:

committed by
Marge Bot

parent
469e8c8e22
commit
eab1d36643
@@ -2625,6 +2625,56 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||
bi_mux_i32_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
|
||||
break;
|
||||
|
||||
case nir_op_extract_u8:
|
||||
case nir_op_extract_i8: {
|
||||
assert(comps == 1 && "should be scalarized");
|
||||
assert((src_sz == 16 || src_sz == 32) && "should be lowered");
|
||||
unsigned byte = nir_src_as_uint(instr->src[1].src);
|
||||
|
||||
if (s0.swizzle == BI_SWIZZLE_H11) {
|
||||
assert(byte < 2);
|
||||
byte += 2;
|
||||
} else if (s0.swizzle != BI_SWIZZLE_H01) {
|
||||
assert(s0.swizzle == BI_SWIZZLE_H00);
|
||||
}
|
||||
|
||||
assert(byte < 4);
|
||||
|
||||
s0.swizzle = BI_SWIZZLE_H01;
|
||||
|
||||
if (instr->op == nir_op_extract_i8)
|
||||
bi_s8_to_s32_to(b, dst, bi_byte(s0, byte));
|
||||
else
|
||||
bi_u8_to_u32_to(b, dst, bi_byte(s0, byte));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_extract_u16:
|
||||
case nir_op_extract_i16: {
|
||||
assert(comps == 1 && "should be scalarized");
|
||||
assert(src_sz == 32 && "should be lowered");
|
||||
unsigned half = nir_src_as_uint(instr->src[1].src);
|
||||
assert(half == 0 || half == 1);
|
||||
|
||||
if (instr->op == nir_op_extract_i16)
|
||||
bi_s16_to_s32_to(b, dst, bi_half(s0, half));
|
||||
else
|
||||
bi_u16_to_u32_to(b, dst, bi_half(s0, half));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_insert_u16: {
|
||||
assert(comps == 1 && "should be scalarized");
|
||||
unsigned half = nir_src_as_uint(instr->src[1].src);
|
||||
assert(half == 0 || half == 1);
|
||||
|
||||
if (half == 0)
|
||||
bi_u16_to_u32_to(b, dst, bi_half(s0, 0));
|
||||
else
|
||||
bi_mkvec_v2i16_to(b, dst, bi_imm_u16(0), bi_half(s0, 0));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_ishl:
|
||||
bi_lshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0));
|
||||
break;
|
||||
@@ -4319,6 +4369,11 @@ bi_vectorize_filter(const nir_instr *instr, const void *data)
|
||||
case nir_op_ushr:
|
||||
case nir_op_f2i16:
|
||||
case nir_op_f2u16:
|
||||
case nir_op_extract_u8:
|
||||
case nir_op_extract_i8:
|
||||
case nir_op_extract_u16:
|
||||
case nir_op_extract_i16:
|
||||
case nir_op_insert_u16:
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
|
@@ -52,10 +52,7 @@ static const nir_shader_compiler_options bifrost_nir_options = {
|
||||
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_rotate = true,
|
||||
|
||||
.lower_pack_half_2x16 = true,
|
||||
|
Reference in New Issue
Block a user