glsl: Drop the div-to-mul-rcp lowering for floats.

NIR has fdiv, and all the NIR backends have to have lower_fdiv set
appropriately already since various passes (format conversions,
tgsi_to_nir, nir_fast_normalize(), etc.) might generate one.

This causes softpipe and llvmpipe to now do actual divides, since
lower_fdiv is not set there.  Note that llvmpipe's rcp implementation is a
divide of 1.0 by x, so now we're going to be just doing div(x, y) instead
of mul(x, div(1.0, y)).

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16823>
This commit is contained in:
Emma Anholt
2022-05-31 13:48:17 -07:00
committed by Marge Bot
parent 594d3982f7
commit 464b32c030
9 changed files with 37 additions and 76 deletions

View File

@@ -35,7 +35,6 @@ struct gl_shader_program;
/* Operations for lower_instructions() */ /* Operations for lower_instructions() */
#define SUB_TO_ADD_NEG 0x01 #define SUB_TO_ADD_NEG 0x01
#define FDIV_TO_MUL_RCP 0x02
#define INT_DIV_TO_MUL_RCP 0x40 #define INT_DIV_TO_MUL_RCP 0x40
#define LDEXP_TO_ARITH 0x80 #define LDEXP_TO_ARITH 0x80
#define CARRY_TO_ARITH 0x100 #define CARRY_TO_ARITH 0x100
@@ -49,8 +48,6 @@ struct gl_shader_program;
#define FIND_LSB_TO_FLOAT_CAST 0x20000 #define FIND_LSB_TO_FLOAT_CAST 0x20000
#define FIND_MSB_TO_FLOAT_CAST 0x40000 #define FIND_MSB_TO_FLOAT_CAST 0x40000
#define IMUL_HIGH_TO_MUL 0x80000 #define IMUL_HIGH_TO_MUL 0x80000
#define DDIV_TO_MUL_RCP 0x100000
#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
#define SQRT_TO_ABS_SQRT 0x200000 #define SQRT_TO_ABS_SQRT 0x200000
/* Operations for lower_64bit_integer_instructions() */ /* Operations for lower_64bit_integer_instructions() */

View File

@@ -31,7 +31,6 @@
* *
* Currently supported transformations: * Currently supported transformations:
* - SUB_TO_ADD_NEG * - SUB_TO_ADD_NEG
* - DIV_TO_MUL_RCP
* - INT_DIV_TO_MUL_RCP * - INT_DIV_TO_MUL_RCP
* - LDEXP_TO_ARITH * - LDEXP_TO_ARITH
* - CARRY_TO_ARITH * - CARRY_TO_ARITH
@@ -48,21 +47,10 @@
* want to recognize add(op0, neg(op1)) or the other way around to * want to recognize add(op0, neg(op1)) or the other way around to
* produce a subtract anyway. * produce a subtract anyway.
* *
* FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP: * INT_DIV_TO_MUL_RCP:
* --------------------------------------------------------- * ---------------------------------------------------------
* Breaks an ir_binop_div expression down to op0 * (rcp(op1)). * Breaks an ir_binop_div expression down to f2i(i2f(op0) * (rcp(i2f(op1))).
* * Used for !NativeIntegers HW.
* Many GPUs don't have a divide instruction (945 and 965 included),
* but they do have an RCP instruction to compute an approximate
* reciprocal. By breaking the operation down, constant reciprocals
* can get constant folded.
*
* FDIV_TO_MUL_RCP lowers single-precision and half-precision
* floating point division;
* DDIV_TO_MUL_RCP only lowers double-precision floating point division.
* DIV_TO_MUL_RCP is a convenience macro that sets both flags.
* INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
* point so that RCP is possible.
* *
* LDEXP_TO_ARITH: * LDEXP_TO_ARITH:
* ------------- * -------------
@@ -112,7 +100,6 @@ private:
unsigned lower; /** Bitfield of which operations to lower */ unsigned lower; /** Bitfield of which operations to lower */
void sub_to_add_neg(ir_expression *); void sub_to_add_neg(ir_expression *);
void div_to_mul_rcp(ir_expression *);
void int_div_to_mul_rcp(ir_expression *); void int_div_to_mul_rcp(ir_expression *);
void ldexp_to_arith(ir_expression *); void ldexp_to_arith(ir_expression *);
void dldexp_to_arith(ir_expression *); void dldexp_to_arith(ir_expression *);
@@ -170,25 +157,6 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
this->progress = true; this->progress = true;
} }
void
lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
{
assert(ir->operands[1]->type->is_float_16_32_64());
/* New expression for the 1.0 / op1 */
ir_rvalue *expr;
expr = new(ir) ir_expression(ir_unop_rcp,
ir->operands[1]->type,
ir->operands[1]);
/* op0 / op1 -> op0 * (1.0 / op1) */
ir->operation = ir_binop_mul;
ir->init_num_operands();
ir->operands[1] = expr;
this->progress = true;
}
void void
lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
{ {
@@ -1550,9 +1518,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
case ir_binop_div: case ir_binop_div:
if (ir->operands[1]->type->is_integer_32() && lowering(INT_DIV_TO_MUL_RCP)) if (ir->operands[1]->type->is_integer_32() && lowering(INT_DIV_TO_MUL_RCP))
int_div_to_mul_rcp(ir); int_div_to_mul_rcp(ir);
else if ((ir->operands[1]->type->is_float_16_32() && lowering(FDIV_TO_MUL_RCP)) ||
(ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
div_to_mul_rcp(ir);
break; break;
case ir_binop_ldexp: case ir_binop_ldexp:

View File

@@ -12,7 +12,7 @@ traces:
- path: gputest/plot3d.trace - path: gputest/plot3d.trace
expectations: expectations:
- device: i915-g33 - device: i915-g33
checksum: 6a3f62f1c6cc57c91188f93f37ebf9b4 checksum: 456d98a570563af3bc61d91fd1742868
- path: gputest/triangle.trace - path: gputest/triangle.trace
expectations: expectations:
# Weird white bar behind Tux's head. # Weird white bar behind Tux's head.

View File

@@ -5,7 +5,7 @@ traces:
- path: 0ad/0ad.trace - path: 0ad/0ad.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: b29c740db174350d9be0beaaccd40453 checksum: 73171898acd606e5729b45ecf7e8def7
- path: bgfx/01-cubes.rdc - path: bgfx/01-cubes.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -17,7 +17,7 @@ traces:
- path: bgfx/03-raymarch.rdc - path: bgfx/03-raymarch.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 71c0a0fc8a3e2760014efda8c07c623e checksum: 90e324ce490d0b25526925c139e4663d
- path: bgfx/04-mesh.rdc - path: bgfx/04-mesh.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -29,7 +29,7 @@ traces:
- path: bgfx/06-bump.rdc - path: bgfx/06-bump.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 49b428a9c1a8e72f1ef5f3e91bc278db checksum: 9944c2a342f2ae67f5e6d5b61f6d0e5b
- path: bgfx/07-callback.rdc - path: bgfx/07-callback.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -37,7 +37,7 @@ traces:
- path: bgfx/09-hdr.rdc - path: bgfx/09-hdr.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: f0e52dff670caa2aad0080a8aa59ad06 checksum: 016dab082c1facafdd5512bf7b2f79db
- path: bgfx/10-font.rdc - path: bgfx/10-font.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -65,11 +65,11 @@ traces:
- path: bgfx/16-shadowmaps.rdc - path: bgfx/16-shadowmaps.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 87562fb15f341a214765e47adc910cc0 checksum: 6c31b4af0f0b55586d858681206ea87c
- path: bgfx/18-ibl.rdc - path: bgfx/18-ibl.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 47490275249793f778fc5d14899bf836 checksum: f785c003caf9ea9f84212ffa5aa08815
- path: bgfx/19-oit.rdc - path: bgfx/19-oit.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -77,7 +77,7 @@ traces:
- path: bgfx/20-nanosvg.rdc - path: bgfx/20-nanosvg.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 6b32c5f18a421412c7bbae3c65b5e0f6 checksum: 366d6325855c35a3f77718405546a00f
- path: bgfx/23-vectordisplay.rdc - path: bgfx/23-vectordisplay.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -97,7 +97,7 @@ traces:
- path: bgfx/31-rsm.rdc - path: bgfx/31-rsm.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: ef42f05c98862167a9eac6c733021e91 checksum: b59d323511488d5c098ebfa9b434c2dc
- path: bgfx/32-particles.rdc - path: bgfx/32-particles.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -105,7 +105,7 @@ traces:
- path: bgfx/33-pom.rdc - path: bgfx/33-pom.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: c2d54a830ada2ff97c7e532b22d858b4 checksum: 4d7c66e327a9e9fe3e7a2d0e7bbe152d
- path: bgfx/34-mvs.rdc - path: bgfx/34-mvs.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -117,7 +117,7 @@ traces:
- path: bgfx/36-sky.rdc - path: bgfx/36-sky.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 06b1a3eb0e4793930502d808939b2386 checksum: f16e91e0f71beda46ad0ff6a5a1ad3fc
- path: bgfx/37-gpudrivenrendering.rdc - path: bgfx/37-gpudrivenrendering.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -129,7 +129,7 @@ traces:
- path: bgfx/39-assao.rdc - path: bgfx/39-assao.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 5d9c6dd6399db34ac81951cd7152ec1c checksum: e10c52b802f42f0ec7dd3a8465883e2e
- path: bgfx/40-svt.rdc - path: bgfx/40-svt.rdc
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -141,7 +141,7 @@ traces:
- path: glmark2/jellyfish.trace - path: glmark2/jellyfish.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 0bba174c99746be068c4960cb6a9dabb checksum: da850394d7f99a895322222cc95170af
- path: glxgears/glxgears-2.trace - path: glxgears/glxgears-2.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -149,11 +149,11 @@ traces:
- path: gputest/furmark.trace - path: gputest/furmark.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: e2fea90560ce0f65efba5d38610dc7ef checksum: 58a6a276abc0e28fcb2a8acea3342712
- path: gputest/pixmark-piano.trace - path: gputest/pixmark-piano.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: b580ae01560380461a103975cab77393 checksum: 11e2a97c14c74e771483ca0d90f9bde3
- path: gputest/triangle.trace - path: gputest/triangle.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
@@ -185,7 +185,7 @@ traces:
- path: pathfinder/canvas_moire.trace - path: pathfinder/canvas_moire.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe
checksum: 2cb5be6a6f62e417f1a89c89180e5728 checksum: 25ba8f18274126670311bd3ffe058f74
- path: pathfinder/canvas_text_v2.trace - path: pathfinder/canvas_text_v2.trace
expectations: expectations:
- device: gl-vmware-llvmpipe - device: gl-vmware-llvmpipe

View File

@@ -22,7 +22,7 @@ traces:
- path: gputest/pixmark-piano.trace - path: gputest/pixmark-piano.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: a9b8aeeaf0baacfc2ab913e0dddffd3b checksum: f2338b10aebf05d7c10d805852d2e774
- path: gputest/triangle.trace - path: gputest/triangle.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl

View File

@@ -9,7 +9,7 @@ traces:
- path: glmark2/jellyfish.trace - path: glmark2/jellyfish.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 48a2ad1162bf92301cedbe53edf52a6b checksum: 2112a9a5519f39483735509f2ccc61af
- path: glxgears/glxgears-2.trace - path: glxgears/glxgears-2.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
@@ -17,11 +17,11 @@ traces:
- path: gputest/furmark.trace - path: gputest/furmark.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 6e498eb959f7da847eb6d2cbbfaea5d5 checksum: 57ddd36b117adc9216c65c10d914a37e
- path: gputest/pixmark-piano.trace - path: gputest/pixmark-piano.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 33770ade1aed893528c597f63af892f2 checksum: 0d875bda7edc01698342b157c6f51500
- path: gputest/triangle.trace - path: gputest/triangle.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
@@ -33,19 +33,19 @@ traces:
- path: 0ad/0ad.trace - path: 0ad/0ad.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 5e8f945ba7316a70a5195c1c23d35809 checksum: 350e0cf64d124ba98d90106f61775eb4
- path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=false.trace - path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=false.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 0a44720bfe9cce13c52299a3125b2aae checksum: f80431e56327354b4c88cc45c7e6633a
- path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=subdata:interleave=false.trace - path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=subdata:interleave=false.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: c109416afbc0db4ba466d5a453b8a6dc checksum: 81e12bfa4ae3b7e63b01edbed71a5941
- path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=true.trace - path: glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=true.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 1c0551f10a5dc9358f3fb9bb9f059883 checksum: 08e6d00fe3f4414ebfadc9e5f3c3bf0e
- path: glmark2/bump:bump-render=height.trace - path: glmark2/bump:bump-render=height.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
@@ -174,11 +174,11 @@ traces:
- path: gputest/pixmark-volplosion.trace - path: gputest/pixmark-volplosion.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 0c7aab484c251b0f90745ced620bed82 checksum: aef0b32ce99a3b25d35304ca08032833
- path: gputest/plot3d.trace - path: gputest/plot3d.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 2915192067704d738cdc4c2eaa88a2b1 checksum: 817a36e53edccdf946061315596e9cdd
# Times out # Times out
# - path: gputest/tessmark.trace # - path: gputest/tessmark.trace
# expectations: # expectations:
@@ -203,15 +203,15 @@ traces:
- path: humus/RaytracedShadows.trace - path: humus/RaytracedShadows.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 298e49b697e9141294ecbc6283729d86 checksum: df074a376fd3e7abc4dffdd191db8f4b
- path: humus/VolumetricFogging2.trace - path: humus/VolumetricFogging2.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 382891c83f2afe4fcbdd2bfd241c1339 checksum: 2eb71553403ad8e0171abc9dc25e5bc1
- path: itoral-gl-terrain-demo/demo.trace - path: itoral-gl-terrain-demo/demo.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: fe6124227b7f8e4e96ffbbd48c713c42 checksum: 716d4fe36a6212b161285fed8a423ee8
- path: neverball/neverball.trace - path: neverball/neverball.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
@@ -219,7 +219,7 @@ traces:
- path: pathfinder/canvas_moire.trace - path: pathfinder/canvas_moire.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: 2cb5be6a6f62e417f1a89c89180e5728 checksum: 25ba8f18274126670311bd3ffe058f74
- path: pathfinder/canvas_text_v2.trace - path: pathfinder/canvas_text_v2.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
@@ -246,7 +246,7 @@ traces:
- path: supertuxkart/supertuxkart-mansion-egl-gles.trace - path: supertuxkart/supertuxkart-mansion-egl-gles.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: b93f2d23cc42072eff674829094cbe74 checksum: 092e8ca38e58aaa83df2a9f0b7b8aee5
- path: xonotic/xonotic-keybench-high.trace - path: xonotic/xonotic-keybench-high.trace
expectations: expectations:
- device: gl-virgl - device: gl-virgl
@@ -285,7 +285,7 @@ traces:
- path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc - path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc
expectations: expectations:
- device: gl-virgl - device: gl-virgl
checksum: fca1fa683ab56352bd96d0777835ae65 checksum: 232eb48d6689c0117e3cc1660af7f32d
# ../src/mesa/main/arrayobj.c:800:_mesa_update_vao_derived_arrays: Assertion `attrib->_EffRelativeOffset < binding->Stride' failed. # ../src/mesa/main/arrayobj.c:800:_mesa_update_vao_derived_arrays: Assertion `attrib->_EffRelativeOffset < binding->Stride' failed.
#- path: ror/ror-default.trace #- path: ror/ror-default.trace
# expectations: # expectations:

View File

@@ -1897,7 +1897,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break; break;
case nir_op_fdiv: case nir_op_fdiv:
unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler"); unreachable("not reached: should be lowered by lower_fdiv in the compiler");
case nir_op_fmod: case nir_op_fmod:
unreachable("not reached: should be lowered by lower_fmod in the compiler"); unreachable("not reached: should be lowered by lower_fmod in the compiler");

View File

@@ -103,7 +103,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
shader, ctx->Extensions.KHR_blend_equation_advanced_coherent); shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
lower_instructions(ir, lower_instructions(ir,
FDIV_TO_MUL_RCP |
(have_ldexp ? 0 : LDEXP_TO_ARITH) | (have_ldexp ? 0 : LDEXP_TO_ARITH) |
(have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
CARRY_TO_ARITH | CARRY_TO_ARITH |

View File

@@ -158,7 +158,7 @@ traces:
- path: glmark2/refract.trace - path: glmark2/refract.trace
expectations: expectations:
- device: gl-panfrost-t860 - device: gl-panfrost-t860
checksum: 6557deca1a47a7a77723658ea579ac63 checksum: 427c2ec78338288526a98cedae291703
- path: glmark2/shading:shading=blinn-phong-inf.trace - path: glmark2/shading:shading=blinn-phong-inf.trace
expectations: expectations:
- device: gl-panfrost-t860 - device: gl-panfrost-t860