radv: lower 64-bit dfloor on GFX6 for fixing precision issues

GFX6 doesn't support v_floor_f64 and the precision of v_fract_f64
which is used to implement 64-bit floor is less than what Vulkan
requires.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5609>
This commit is contained in:
Samuel Pitoiset
2020-06-24 09:15:47 +02:00
committed by Marge Bot
parent c84f11e7b6
commit a102896cff
3 changed files with 10 additions and 5 deletions

View File

@@ -1,7 +1,3 @@
dEQP-VK.glsl.builtin.precision_double.floor.compute.scalar
dEQP-VK.glsl.builtin.precision_double.floor.compute.vec2
dEQP-VK.glsl.builtin.precision_double.floor.compute.vec3
dEQP-VK.glsl.builtin.precision_double.floor.compute.vec4
dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp
dEQP-VK.pipeline.depth.format.d16_unorm.compare_ops.never_zerodepthbounds_depthdisabled_stencilenabled dEQP-VK.pipeline.depth.format.d16_unorm.compare_ops.never_zerodepthbounds_depthdisabled_stencilenabled

View File

@@ -979,7 +979,8 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
if (ctx->options->chip_class >= GFX7) if (ctx->options->chip_class >= GFX7)
return bld.vop1(aco_opcode::v_floor_f64, Definition(dst), val); return bld.vop1(aco_opcode::v_floor_f64, Definition(dst), val);
/* GFX6 doesn't support V_FLOOR_F64, lower it. */ /* GFX6 doesn't support V_FLOOR_F64, lower it (note that it's actually
* lowered at NIR level for precision reasons). */
Temp src0 = as_vgpr(ctx, val); Temp src0 = as_vgpr(ctx, val);
Temp mask = bld.copy(bld.def(s1), Operand(3u)); /* isnan */ Temp mask = bld.copy(bld.def(s1), Operand(3u)); /* isnan */

View File

@@ -478,6 +478,14 @@ radv_shader_compile_to_nir(struct radv_device *device,
nir_lower_doubles_options lower_doubles = nir_lower_doubles_options lower_doubles =
nir->options->lower_doubles_options; nir->options->lower_doubles_options;
if (device->physical_device->rad_info.chip_class == GFX6) {
/* GFX6 doesn't support v_floor_f64 and the precision
* of v_fract_f64 which is used to implement 64-bit
* floor is less than what Vulkan requires.
*/
lower_doubles |= nir_lower_dfloor;
}
NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles); NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
} }