i965/compiler: handle conversion to smaller type in the lowering pass for that
This rollbacks the revert of this same patch introduced in
commit 7b9c15628a
.
And also squahes the following patch to prevent a piglit regression caused
by this change:
intel/compiler: Fix lower_conversions for 8-bit types.
Author: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
For 8-bit types the execution type is word. A byte raw MOV has 16-bit
execution type and 8-bit destination and it shouldn't be considered
a conversion case. So there is no need to change alignment and enter
in lower_conversions for these instructions.
Fixes a regresion in the piglit test "glsl-fs-shader-stencil-export"
that is introduced with this patch from the Vulkan shaderInt16 series:
'i965/compiler: handle conversion to smaller type in the lowering
pass for that'. The problem is caused because there is already a case
in the driver that injects Byte instructions like this:
mov(8) g127<1>UB g2<32,8,4>UB
And the aforementioned pass was not accounting for the special
handling of the execution size of Byte instructions. This patch
fixes this.
v2: (Jason Ekstrand)
- Simplify is_byte_raw_mov, include reference to PRM and not
consider B <-> UB conversions as raw movs.
v3: (Matt Turner)
- Indentation style fixes.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106393
Tested-by: Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
@@ -43,6 +43,24 @@ supports_type_conversion(const fs_inst *inst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* From the SKL PRM Vol 2a, "Move":
|
||||||
|
*
|
||||||
|
* "A mov with the same source and destination type, no source modifier,
|
||||||
|
* and no saturation is a raw move. A packed byte destination region (B
|
||||||
|
* or UB type with HorzStride == 1 and ExecSize > 1) can only be written
|
||||||
|
* using raw move."
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
is_byte_raw_mov (const fs_inst *inst)
|
||||||
|
{
|
||||||
|
return type_sz(inst->dst.type) == 1 &&
|
||||||
|
inst->opcode == BRW_OPCODE_MOV &&
|
||||||
|
inst->src[0].type == inst->dst.type &&
|
||||||
|
!inst->saturate &&
|
||||||
|
!inst->src[0].negate &&
|
||||||
|
!inst->src[0].abs;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::lower_conversions()
|
fs_visitor::lower_conversions()
|
||||||
{
|
{
|
||||||
@@ -54,7 +72,8 @@ fs_visitor::lower_conversions()
|
|||||||
bool saturate = inst->saturate;
|
bool saturate = inst->saturate;
|
||||||
|
|
||||||
if (supports_type_conversion(inst)) {
|
if (supports_type_conversion(inst)) {
|
||||||
if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
|
if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
|
||||||
|
!is_byte_raw_mov(inst)) {
|
||||||
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
|
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
|
||||||
* Single Precision Float":
|
* Single Precision Float":
|
||||||
*
|
*
|
||||||
@@ -64,6 +83,9 @@ fs_visitor::lower_conversions()
|
|||||||
* So we need to allocate a temporary that's two registers, and then do
|
* So we need to allocate a temporary that's two registers, and then do
|
||||||
* a strided MOV to get the lower DWord of every Qword that has the
|
* a strided MOV to get the lower DWord of every Qword that has the
|
||||||
* result.
|
* result.
|
||||||
|
*
|
||||||
|
* This restriction applies, in general, whenever we convert to
|
||||||
|
* a type with a smaller bit-size.
|
||||||
*/
|
*/
|
||||||
fs_reg temp = ibld.vgrf(get_exec_type(inst));
|
fs_reg temp = ibld.vgrf(get_exec_type(inst));
|
||||||
fs_reg strided_temp = subscript(temp, dst.type, 0);
|
fs_reg strided_temp = subscript(temp, dst.type, 0);
|
||||||
|
@@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
case nir_op_f2f16_undef:
|
case nir_op_f2f16_undef:
|
||||||
case nir_op_i2i16:
|
inst = bld.MOV(result, op[0]);
|
||||||
case nir_op_u2u16: {
|
|
||||||
/* TODO: Fixing aligment rules for conversions from 32-bits to
|
|
||||||
* 16-bit types should be moved to lower_conversions
|
|
||||||
*/
|
|
||||||
fs_reg tmp = bld.vgrf(op[0].type, 1);
|
|
||||||
tmp = subscript(tmp, result.type, 0);
|
|
||||||
inst = bld.MOV(tmp, op[0]);
|
|
||||||
inst->saturate = instr->dest.saturate;
|
|
||||||
inst = bld.MOV(result, tmp);
|
|
||||||
inst->saturate = instr->dest.saturate;
|
inst->saturate = instr->dest.saturate;
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
case nir_op_f2f64:
|
case nir_op_f2f64:
|
||||||
case nir_op_f2i64:
|
case nir_op_f2i64:
|
||||||
@@ -807,6 +797,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
|||||||
case nir_op_f2u16:
|
case nir_op_f2u16:
|
||||||
case nir_op_i2i32:
|
case nir_op_i2i32:
|
||||||
case nir_op_u2u32:
|
case nir_op_u2u32:
|
||||||
|
case nir_op_i2i16:
|
||||||
|
case nir_op_u2u16:
|
||||||
case nir_op_i2f16:
|
case nir_op_i2f16:
|
||||||
case nir_op_u2f16:
|
case nir_op_u2f16:
|
||||||
inst = bld.MOV(result, op[0]);
|
inst = bld.MOV(result, op[0]);
|
||||||
|
Reference in New Issue
Block a user