intel/compiler: validate region restrictions for half-float conversions
v2: - Consider implicit conversions in 2-src instructions too (Curro) - For restrictions that involve destination stride requirements only validate them for Align1, since Align16 always requires packed data. - Skip general rule for the dst/execution type size ratio for mixed float instructions on CHV and SKL+, these have their own set of rules that we'll be validated separately. v3 (Curro): - Do not check src1 type in single-source instructions. - Check restriction on src1. - Remove invalid test. Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:

committed by
Juan A. Suarez Romero

parent
6ff52f0628
commit
7376d57a9c
@@ -469,6 +469,66 @@ is_packed(unsigned vstride, unsigned width, unsigned hstride)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether an instruction is an explicit or implicit conversion
|
||||||
|
* to/from half-float.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
is_half_float_conversion(const struct gen_device_info *devinfo,
|
||||||
|
const brw_inst *inst)
|
||||||
|
{
|
||||||
|
enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
|
||||||
|
|
||||||
|
unsigned num_sources = num_sources_from_inst(devinfo, inst);
|
||||||
|
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||||
|
|
||||||
|
if (dst_type != src0_type &&
|
||||||
|
(dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
|
||||||
|
return true;
|
||||||
|
} else if (num_sources > 1) {
|
||||||
|
enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
|
||||||
|
return dst_type != src1_type &&
|
||||||
|
(dst_type == BRW_REGISTER_TYPE_HF ||
|
||||||
|
src1_type == BRW_REGISTER_TYPE_HF);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns whether an instruction is using mixed float operation mode
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst)
|
||||||
|
{
|
||||||
|
if (devinfo->gen < 8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (inst_is_send(devinfo, inst))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned opcode = brw_inst_opcode(devinfo, inst);
|
||||||
|
const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
|
||||||
|
if (desc->ndst == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* FIXME: support 3-src instructions */
|
||||||
|
unsigned num_sources = num_sources_from_inst(devinfo, inst);
|
||||||
|
assert(num_sources < 3);
|
||||||
|
|
||||||
|
enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
|
||||||
|
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||||
|
|
||||||
|
if (num_sources == 1)
|
||||||
|
return types_are_mixed_float(src0_type, dst_type);
|
||||||
|
|
||||||
|
enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
|
||||||
|
|
||||||
|
return types_are_mixed_float(src0_type, src1_type) ||
|
||||||
|
types_are_mixed_float(src0_type, dst_type) ||
|
||||||
|
types_are_mixed_float(src1_type, dst_type);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks restrictions listed in "General Restrictions Based on Operand Types"
|
* Checks restrictions listed in "General Restrictions Based on Operand Types"
|
||||||
* in the "Register Region Restrictions" section.
|
* in the "Register Region Restrictions" section.
|
||||||
@@ -539,7 +599,100 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
|
|||||||
exec_type_size == 8 && dst_type_size == 4)
|
exec_type_size == 8 && dst_type_size == 4)
|
||||||
dst_type_size = 8;
|
dst_type_size = 8;
|
||||||
|
|
||||||
if (exec_type_size > dst_type_size) {
|
if (is_half_float_conversion(devinfo, inst)) {
|
||||||
|
/**
|
||||||
|
* A helper to validate used in the validation of the following restriction
|
||||||
|
* from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
|
||||||
|
*
|
||||||
|
* "There is no direct conversion from HF to DF or DF to HF.
|
||||||
|
* There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
|
||||||
|
*
|
||||||
|
* Even if these restrictions are listed for the MOV instruction, we
|
||||||
|
* validate this more generally, since there is the possibility
|
||||||
|
* of implicit conversions from other instructions, such us implicit
|
||||||
|
* conversion from integer to HF with the ADD instruction in SKL+.
|
||||||
|
*/
|
||||||
|
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||||
|
enum brw_reg_type src1_type = num_sources > 1 ?
|
||||||
|
brw_inst_src1_type(devinfo, inst) : 0;
|
||||||
|
ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
|
||||||
|
(type_sz(src0_type) == 8 ||
|
||||||
|
(num_sources > 1 && type_sz(src1_type) == 8)),
|
||||||
|
"There are no direct conversions between 64-bit types and HF");
|
||||||
|
|
||||||
|
ERROR_IF(type_sz(dst_type) == 8 &&
|
||||||
|
(src0_type == BRW_REGISTER_TYPE_HF ||
|
||||||
|
(num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
|
||||||
|
"There are no direct conversions between 64-bit types and HF");
|
||||||
|
|
||||||
|
/* From the BDW+ PRM:
|
||||||
|
*
|
||||||
|
* "Conversion between Integer and HF (Half Float) must be
|
||||||
|
* DWord-aligned and strided by a DWord on the destination."
|
||||||
|
*
|
||||||
|
* Also, the above restrictions seems to be expanded on CHV and SKL+ by:
|
||||||
|
*
|
||||||
|
* "There is a relaxed alignment rule for word destinations. When
|
||||||
|
* the destination type is word (UW, W, HF), destination data types
|
||||||
|
* can be aligned to either the lowest word or the second lowest
|
||||||
|
* word of the execution channel. This means the destination data
|
||||||
|
* words can be either all in the even word locations or all in the
|
||||||
|
* odd word locations."
|
||||||
|
*
|
||||||
|
* We do not implement the second rule as is though, since empirical
|
||||||
|
* testing shows inconsistencies:
|
||||||
|
* - It suggests that packed 16-bit is not allowed, which is not true.
|
||||||
|
* - It suggests that conversions from Q/DF to W (which need to be
|
||||||
|
* 64-bit aligned on the destination) are not possible, which is
|
||||||
|
* not true.
|
||||||
|
*
|
||||||
|
* So from this rule we only validate the implication that conversions
|
||||||
|
* from F to HF need to be DWord strided (except in Align1 mixed
|
||||||
|
* float mode where packed fp16 destination is allowed so long as the
|
||||||
|
* destination is oword-aligned).
|
||||||
|
*
|
||||||
|
* Finally, we only validate this for Align1 because Align16 always
|
||||||
|
* requires packed destinations, so these restrictions can't possibly
|
||||||
|
* apply to Align16 mode.
|
||||||
|
*/
|
||||||
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
||||||
|
if ((dst_type == BRW_REGISTER_TYPE_HF &&
|
||||||
|
(brw_reg_type_is_integer(src0_type) ||
|
||||||
|
(num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
|
||||||
|
(brw_reg_type_is_integer(dst_type) &&
|
||||||
|
(src0_type == BRW_REGISTER_TYPE_HF ||
|
||||||
|
(num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
|
||||||
|
ERROR_IF(dst_stride * dst_type_size != 4,
|
||||||
|
"Conversions between integer and half-float must be "
|
||||||
|
"strided by a DWord on the destination");
|
||||||
|
|
||||||
|
unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
|
||||||
|
ERROR_IF(subreg % 4 != 0,
|
||||||
|
"Conversions between integer and half-float must be "
|
||||||
|
"aligned to a DWord on the destination");
|
||||||
|
} else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
|
||||||
|
dst_type == BRW_REGISTER_TYPE_HF) {
|
||||||
|
unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
|
||||||
|
ERROR_IF(dst_stride != 2 &&
|
||||||
|
!(is_mixed_float(devinfo, inst) &&
|
||||||
|
dst_stride == 1 && subreg % 16 == 0),
|
||||||
|
"Conversions to HF must have either all words in even "
|
||||||
|
"word locations or all words in odd word locations or "
|
||||||
|
"be mixed-float with Oword-aligned packed destination");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There are special regioning rules for mixed-float mode in CHV and SKL that
|
||||||
|
* override the general rule for the ratio of sizes of the destination type
|
||||||
|
* and the execution type. We will add validation for those in a later patch.
|
||||||
|
*/
|
||||||
|
bool validate_dst_size_and_exec_size_ratio =
|
||||||
|
!is_mixed_float(devinfo, inst) ||
|
||||||
|
!(devinfo->is_cherryview || devinfo->gen >= 9);
|
||||||
|
|
||||||
|
if (validate_dst_size_and_exec_size_ratio &&
|
||||||
|
exec_type_size > dst_type_size) {
|
||||||
if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
|
if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
|
||||||
ERROR_IF(dst_stride * dst_type_size != exec_type_size,
|
ERROR_IF(dst_stride * dst_type_size != exec_type_size,
|
||||||
"Destination stride must be equal to the ratio of the sizes "
|
"Destination stride must be equal to the ratio of the sizes "
|
||||||
|
@@ -848,6 +848,122 @@ TEST_P(validation_test, byte_destination_relaxed_alignment)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, half_float_conversion)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
unsigned dst_subnr;
|
||||||
|
bool expected_result_bdw;
|
||||||
|
bool expected_result_chv_gen9;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
dst_subnr, \
|
||||||
|
expected_result, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
#define INST_S(dst_type, src_type, dst_stride, dst_subnr, \
|
||||||
|
expected_result_bdw, expected_result_chv_gen9) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
dst_subnr, \
|
||||||
|
expected_result_bdw, \
|
||||||
|
expected_result_chv_gen9, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* MOV to half-float destination */
|
||||||
|
INST_C(HF, B, 1, 0, false),
|
||||||
|
INST_C(HF, W, 1, 0, false),
|
||||||
|
INST_C(HF, HF, 1, 0, true),
|
||||||
|
INST_C(HF, HF, 1, 2, true),
|
||||||
|
INST_C(HF, D, 1, 0, false),
|
||||||
|
INST_S(HF, F, 1, 0, false, true),
|
||||||
|
INST_C(HF, Q, 1, 0, false),
|
||||||
|
INST_C(HF, B, 2, 0, true),
|
||||||
|
INST_C(HF, B, 2, 2, false),
|
||||||
|
INST_C(HF, W, 2, 0, true),
|
||||||
|
INST_C(HF, W, 2, 2, false),
|
||||||
|
INST_C(HF, HF, 2, 0, true),
|
||||||
|
INST_C(HF, HF, 2, 2, true),
|
||||||
|
INST_C(HF, D, 2, 0, true),
|
||||||
|
INST_C(HF, D, 2, 2, false),
|
||||||
|
INST_C(HF, F, 2, 0, true),
|
||||||
|
INST_S(HF, F, 2, 2, false, true),
|
||||||
|
INST_C(HF, Q, 2, 0, false),
|
||||||
|
INST_C(HF, DF, 2, 0, false),
|
||||||
|
INST_C(HF, B, 4, 0, false),
|
||||||
|
INST_C(HF, W, 4, 0, false),
|
||||||
|
INST_C(HF, HF, 4, 0, true),
|
||||||
|
INST_C(HF, HF, 4, 2, true),
|
||||||
|
INST_C(HF, D, 4, 0, false),
|
||||||
|
INST_C(HF, F, 4, 0, false),
|
||||||
|
INST_C(HF, Q, 4, 0, false),
|
||||||
|
INST_C(HF, DF, 4, 0, false),
|
||||||
|
|
||||||
|
/* MOV from half-float source */
|
||||||
|
INST_C( B, HF, 1, 0, false),
|
||||||
|
INST_C( W, HF, 1, 0, false),
|
||||||
|
INST_C( D, HF, 1, 0, true),
|
||||||
|
INST_C( D, HF, 1, 4, true),
|
||||||
|
INST_C( F, HF, 1, 0, true),
|
||||||
|
INST_C( F, HF, 1, 4, true),
|
||||||
|
INST_C( Q, HF, 1, 0, false),
|
||||||
|
INST_C(DF, HF, 1, 0, false),
|
||||||
|
INST_C( B, HF, 2, 0, false),
|
||||||
|
INST_C( W, HF, 2, 0, true),
|
||||||
|
INST_C( W, HF, 2, 2, false),
|
||||||
|
INST_C( D, HF, 2, 0, false),
|
||||||
|
INST_C( F, HF, 2, 0, true),
|
||||||
|
INST_C( B, HF, 4, 0, true),
|
||||||
|
INST_C( B, HF, 4, 1, false),
|
||||||
|
INST_C( W, HF, 4, 0, false),
|
||||||
|
|
||||||
|
#undef INST_C
|
||||||
|
#undef INST_S
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
if (!devinfo.has_64bit_types &&
|
||||||
|
(type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
|
||||||
|
|
||||||
|
brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
|
||||||
|
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
|
||||||
|
|
||||||
|
if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
|
||||||
|
brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
|
||||||
|
} else {
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
|
||||||
|
brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devinfo.is_cherryview || devinfo.gen >= 9)
|
||||||
|
EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
|
||||||
|
else
|
||||||
|
EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(validation_test, vector_immediate_destination_alignment)
|
TEST_P(validation_test, vector_immediate_destination_alignment)
|
||||||
{
|
{
|
||||||
static const struct {
|
static const struct {
|
||||||
|
Reference in New Issue
Block a user