intel/compiler: handle extended math restrictions for half-float
Extended math with half-float operands is only supported since gen9, but it is limited to SIMD8. In gen8 we lower it to 32-bit. v2: quashed together the following patches (Jason): - intel/compiler: allow extended math functions with HF operands - intel/compiler: lower 16-bit extended math to 32-bit prior to gen9 - intel/compiler: extended Math is limited to SIMD8 on half-float Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> (allow extended math functions with HF operands, extended Math is limited to SIMD8 on half-float)
This commit is contained in:

committed by
Juan A. Suarez Romero

parent
114f4e6c29
commit
4588f4a604
@@ -1916,8 +1916,10 @@ void gen6_math(struct brw_codegen *p,
|
|||||||
assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
|
assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
|
||||||
(devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
|
(devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
|
||||||
} else {
|
} else {
|
||||||
assert(src0.type == BRW_REGISTER_TYPE_F);
|
assert(src0.type == BRW_REGISTER_TYPE_F ||
|
||||||
assert(src1.type == BRW_REGISTER_TYPE_F);
|
(src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
|
||||||
|
assert(src1.type == BRW_REGISTER_TYPE_F ||
|
||||||
|
(src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Source modifiers are ignored for extended math instructions on Gen6. */
|
/* Source modifiers are ignored for extended math instructions on Gen6. */
|
||||||
|
@@ -5936,18 +5936,27 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||||||
case SHADER_OPCODE_EXP2:
|
case SHADER_OPCODE_EXP2:
|
||||||
case SHADER_OPCODE_LOG2:
|
case SHADER_OPCODE_LOG2:
|
||||||
case SHADER_OPCODE_SIN:
|
case SHADER_OPCODE_SIN:
|
||||||
case SHADER_OPCODE_COS:
|
case SHADER_OPCODE_COS: {
|
||||||
/* Unary extended math instructions are limited to SIMD8 on Gen4 and
|
/* Unary extended math instructions are limited to SIMD8 on Gen4 and
|
||||||
* Gen6.
|
* Gen6. Extended Math Function is limited to SIMD8 with half-float.
|
||||||
*/
|
*/
|
||||||
return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
|
if (devinfo->gen == 6 || (devinfo->gen == 4 && !devinfo->is_g4x))
|
||||||
devinfo->gen == 5 || devinfo->is_g4x ? MIN2(16, inst->exec_size) :
|
return MIN2(8, inst->exec_size);
|
||||||
MIN2(8, inst->exec_size));
|
if (inst->dst.type == BRW_REGISTER_TYPE_HF)
|
||||||
|
return MIN2(8, inst->exec_size);
|
||||||
|
return MIN2(16, inst->exec_size);
|
||||||
|
}
|
||||||
|
|
||||||
case SHADER_OPCODE_POW:
|
case SHADER_OPCODE_POW: {
|
||||||
/* SIMD16 is only allowed on Gen7+. */
|
/* SIMD16 is only allowed on Gen7+. Extended Math Function is limited
|
||||||
return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
|
* to SIMD8 with half-float
|
||||||
MIN2(8, inst->exec_size));
|
*/
|
||||||
|
if (devinfo->gen < 7)
|
||||||
|
return MIN2(8, inst->exec_size);
|
||||||
|
if (inst->dst.type == BRW_REGISTER_TYPE_HF)
|
||||||
|
return MIN2(8, inst->exec_size);
|
||||||
|
return MIN2(16, inst->exec_size);
|
||||||
|
}
|
||||||
|
|
||||||
case SHADER_OPCODE_INT_QUOTIENT:
|
case SHADER_OPCODE_INT_QUOTIENT:
|
||||||
case SHADER_OPCODE_INT_REMAINDER:
|
case SHADER_OPCODE_INT_REMAINDER:
|
||||||
|
@@ -631,6 +631,8 @@ lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data)
|
|||||||
if (alu->dest.dest.ssa.bit_size != 16)
|
if (alu->dest.dest.ssa.bit_size != 16)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
const struct brw_compiler *compiler = (const struct brw_compiler *) data;
|
||||||
|
|
||||||
switch (alu->op) {
|
switch (alu->op) {
|
||||||
case nir_op_idiv:
|
case nir_op_idiv:
|
||||||
case nir_op_imod:
|
case nir_op_imod:
|
||||||
@@ -643,6 +645,15 @@ lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data)
|
|||||||
case nir_op_fround_even:
|
case nir_op_fround_even:
|
||||||
case nir_op_ftrunc:
|
case nir_op_ftrunc:
|
||||||
return 32;
|
return 32;
|
||||||
|
case nir_op_frcp:
|
||||||
|
case nir_op_frsq:
|
||||||
|
case nir_op_fsqrt:
|
||||||
|
case nir_op_fpow:
|
||||||
|
case nir_op_fexp2:
|
||||||
|
case nir_op_flog2:
|
||||||
|
case nir_op_fsin:
|
||||||
|
case nir_op_fcos:
|
||||||
|
return compiler->devinfo->gen < 9 ? 32 : 0;
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -719,7 +730,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||||||
OPT(nir_opt_large_constants, NULL, 32);
|
OPT(nir_opt_large_constants, NULL, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPT(nir_lower_bit_size, lower_bit_size_callback, NULL);
|
OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
|
||||||
|
|
||||||
if (is_scalar) {
|
if (is_scalar) {
|
||||||
OPT(nir_lower_load_const_to_scalar);
|
OPT(nir_lower_load_const_to_scalar);
|
||||||
|
Reference in New Issue
Block a user