nir_opt_algebraic: don't use i32csel without native integer support

Otherwise nir_lower_int_to_float (or specifically nir_gather_ssa_types) will fail to recognize we already have float constants and converts them again. Example from spec/glsl-1.10/execution/vs-loop-array-index-unroll.shader_test with r300 driver (after enabling has_fused_comp_and_csel). impl main { block block_0: /* preds: */ vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) vec4 32 ssa_1 = intrinsic load_input (ssa_0) (base=0, component=0, dest_type=float32, io location=VERT_ATTRIB_POS slots=1) /* gl_Vertex */ vec3 32 ssa_2 = load_const (0x00000000, 0x3e800000, 0x3f800000) = (0.000000, 0.250000, 1.000000) vec3 32 ssa_3 = load_const (0x00000000, 0x3f000000, 0x3f800000) = (0.000000, 0.500000, 1.000000) vec3 32 ssa_4 = load_const (0x00000000, 0x3f400000, 0x3f800000) = (0.000000, 0.750000, 1.000000) vec2 32 ssa_5 = load_const (0x00000000, 0x3f800000) = (0.000000, 1.000000) vec1 32 ssa_6 = load_const (0x3f800000 = 1.000000) vec1 32 ssa_7 = intrinsic load_ubo_vec4 (ssa_0, ssa_0) (access=0, base=0, component=0) vec4 32 ssa_8 = load_const (0x00000000, 0x00000001, 0x00000002, 0x00000003) = (0.000000, 0.000000, 0.000000, 0.000000) vec4 1 ssa_9 = ilt ssa_8, ssa_7.xxxx vec3 32 ssa_10 = bcsel ssa_9.www, ssa_5.xyy, ssa_4 vec3 32 ssa_11 = bcsel ssa_9.zzz, ssa_10, ssa_3 vec3 32 ssa_12 = bcsel ssa_9.yyy, ssa_11, ssa_2 vec3 32 ssa_15 = i32csel_gt ssa_7.xxx, ssa_12, ssa_6.xxx vec4 32 ssa_14 = fsat ssa_15.xyxz intrinsic store_output (ssa_14, ssa_0) (base=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_COL0 slots=1, xfb(), xfb2()) /* gl_FrontColor */ intrinsic store_output (ssa_1, ssa_0) (base=0, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_POS slots=1, xfb(), xfb2()) /* gl_Position */ /* succs: block_1 */ block block_1: } and after nir_lower_int_to_float impl main { block block_0: /* preds: */ vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) vec4 32 ssa_1 = intrinsic load_input (ssa_0) (base=0, component=0, dest_type=float32, io location=VERT_ATTRIB_POS slots=1) /* gl_Vertex */ vec3 32 ssa_2 = load_const (0x00000000, 0x4e7a0000, 0x4e7e0000) = (0.000000, 1048576000.000000, 1065353216.000000) vec3 32 ssa_3 = load_const (0x00000000, 0x4e7c0000, 0x4e7e0000) = (0.000000, 1056964608.000000, 1065353216.000000) vec3 32 ssa_4 = load_const (0x00000000, 0x4e7d0000, 0x4e7e0000) = (0.000000, 1061158912.000000, 1065353216.000000) vec2 32 ssa_5 = load_const (0x00000000, 0x4e7e0000) = (0.000000, 1065353216.000000) vec1 32 ssa_6 = load_const (0x4e7e0000 = 1065353216.000000) vec1 32 ssa_7 = intrinsic load_ubo_vec4 (ssa_0, ssa_0) (access=0, base=0, component=0) vec4 32 ssa_8 = load_const (0x00000000, 0x3f800000, 0x40000000, 0x40400000) = (0.000000, 1.000000, 2.000000, 3.000000) vec4 1 ssa_9 = flt ssa_8, ssa_7.xxxx vec3 32 ssa_10 = bcsel ssa_9.www, ssa_5.xyy, ssa_4 vec3 32 ssa_11 = bcsel ssa_9.zzz, ssa_10, ssa_3 vec3 32 ssa_12 = bcsel ssa_9.yyy, ssa_11, ssa_2 vec3 32 ssa_13 = fcsel_gt ssa_7.xxx, ssa_12, ssa_6.xxx vec4 32 ssa_14 = fsat ssa_13.xyxz intrinsic store_output (ssa_14, ssa_0) (base=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_COL0 slots=1, xfb(), xfb2()) /* gl_FrontColor */ intrinsic store_output (ssa_1, ssa_0) (base=0, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_POS slots=1, xfb(), xfb2()) /* gl_Position */ /* succs: block_1 */ block block_1: } Reviewed-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23704>
2023-06-17 13:50:05 +02:00
parent f9a4b8e640
commit b4ca45911d
1 changed files with 4 additions and 4 deletions
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -3189,10 +3189,10 @@ late_optimizations += [
  (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
  (('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),

-  (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"),
-  (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"),
-  (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"),
-  (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"),
+  (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel && !options->no_integers"),
+  (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel && !options->no_integers"),
+  (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel && !options->no_integers"),
+  (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel && !options->no_integers"),

  (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
  (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"),