intel/compiler: validate region restrictions for mixed float mode
v2: - Adapted unit tests to make them consistent with the changes done to the validation of half-float conversions. v3 (Curro): - Check all the accummulators - Constify declarations - Do not check src1 type in single-source instructions. - Check for all instructions that read accumulator (either implicitly or explicitly) - Check restrictions in src1 too. - Merge conditional block - Add invalid test case. v4 (Curro): - Assert on 3-src instructions, as they are not validated. - Get rid of types_are_mixed_float(), as we know instruction is mixed float at that point. - Remove conditions from not verified case. - Fix brackets on conditional. Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:

committed by
Juan A. Suarez Romero

parent
58d6417e59
commit
8ed6d74c92
@@ -170,6 +170,20 @@ src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
|
|||||||
brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
|
brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
|
||||||
|
{
|
||||||
|
return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||||
|
(brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
|
||||||
|
{
|
||||||
|
return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||||
|
(brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
|
src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
|
||||||
{
|
{
|
||||||
@@ -275,6 +289,24 @@ sources_not_null(const struct gen_device_info *devinfo,
|
|||||||
return error_msg;
|
return error_msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
|
||||||
|
{
|
||||||
|
/* Check instructions that use implicit accumulator sources */
|
||||||
|
switch (brw_inst_opcode(devinfo, inst)) {
|
||||||
|
case BRW_OPCODE_MAC:
|
||||||
|
case BRW_OPCODE_MACH:
|
||||||
|
case BRW_OPCODE_SADA2:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FIXME: support 3-src instructions */
|
||||||
|
unsigned num_sources = num_sources_from_inst(devinfo, inst);
|
||||||
|
assert(num_sources < 3);
|
||||||
|
|
||||||
|
return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
|
||||||
|
}
|
||||||
|
|
||||||
static struct string
|
static struct string
|
||||||
send_restrictions(const struct gen_device_info *devinfo,
|
send_restrictions(const struct gen_device_info *devinfo,
|
||||||
const brw_inst *inst)
|
const brw_inst *inst)
|
||||||
@@ -938,6 +970,223 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
|
|||||||
return error_msg;
|
return error_msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct string
|
||||||
|
special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo,
|
||||||
|
const brw_inst *inst)
|
||||||
|
{
|
||||||
|
struct string error_msg = { .str = NULL, .len = 0 };
|
||||||
|
|
||||||
|
const unsigned opcode = brw_inst_opcode(devinfo, inst);
|
||||||
|
const unsigned num_sources = num_sources_from_inst(devinfo, inst);
|
||||||
|
if (num_sources >= 3)
|
||||||
|
return error_msg;
|
||||||
|
|
||||||
|
if (!is_mixed_float(devinfo, inst))
|
||||||
|
return error_msg;
|
||||||
|
|
||||||
|
unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
|
||||||
|
bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
|
||||||
|
|
||||||
|
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||||
|
enum brw_reg_type src1_type = num_sources > 1 ?
|
||||||
|
brw_inst_src1_type(devinfo, inst) : 0;
|
||||||
|
enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
|
||||||
|
|
||||||
|
unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
|
||||||
|
bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "Indirect addressing on source is not supported when source and
|
||||||
|
* destination data types are mixed float."
|
||||||
|
*/
|
||||||
|
ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
|
||||||
|
(num_sources > 1 &&
|
||||||
|
brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
|
||||||
|
"Indirect addressing on source is not supported when source and "
|
||||||
|
"destination data types are mixed float");
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "No SIMD16 in mixed mode when destination is f32. Instruction
|
||||||
|
* execution size must be no more than 8."
|
||||||
|
*/
|
||||||
|
ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
|
||||||
|
"Mixed float mode with 32-bit float destination is limited "
|
||||||
|
"to SIMD8");
|
||||||
|
|
||||||
|
if (is_align16) {
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "In Align16 mode, when half float and float data types are mixed
|
||||||
|
* between source operands OR between source and destination operands,
|
||||||
|
* the register content are assumed to be packed."
|
||||||
|
*
|
||||||
|
* Since Align16 doesn't have a concept of horizontal stride (or width),
|
||||||
|
* it means that vertical stride must always be 4, since 0 and 2 would
|
||||||
|
* lead to replicated data, and any other value is disallowed in Align16.
|
||||||
|
*/
|
||||||
|
ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||||
|
"Align16 mixed float mode assumes packed data (vstride must be 4");
|
||||||
|
|
||||||
|
ERROR_IF(num_sources >= 2 &&
|
||||||
|
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||||
|
"Align16 mixed float mode assumes packed data (vstride must be 4");
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "For Align16 mixed mode, both input and output packed f16 data
|
||||||
|
* must be oword aligned, no oword crossing in packed f16."
|
||||||
|
*
|
||||||
|
* The previous rule requires that Align16 operands are always packed,
|
||||||
|
* and since there is only one bit for Align16 subnr, which represents
|
||||||
|
* offsets 0B and 16B, this rule is always enforced and we don't need to
|
||||||
|
* validate it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "No SIMD16 in mixed mode when destination is packed f16 for both
|
||||||
|
* Align1 and Align16."
|
||||||
|
*
|
||||||
|
* And:
|
||||||
|
*
|
||||||
|
* "In Align16 mode, when half float and float data types are mixed
|
||||||
|
* between source operands OR between source and destination operands,
|
||||||
|
* the register content are assumed to be packed."
|
||||||
|
*
|
||||||
|
* Which implies that SIMD16 is not available in Align16. This is further
|
||||||
|
* confirmed by:
|
||||||
|
*
|
||||||
|
* "For Align16 mixed mode, both input and output packed f16 data
|
||||||
|
* must be oword aligned, no oword crossing in packed f16"
|
||||||
|
*
|
||||||
|
* Since oword-aligned packed f16 data would cross oword boundaries when
|
||||||
|
* the execution size is larger than 8.
|
||||||
|
*/
|
||||||
|
ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "No accumulator read access for Align16 mixed float."
|
||||||
|
*/
|
||||||
|
ERROR_IF(inst_uses_src_acc(devinfo, inst),
|
||||||
|
"No accumulator read access for Align16 mixed float");
|
||||||
|
} else {
|
||||||
|
assert(!is_align16);
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "No SIMD16 in mixed mode when destination is packed f16 for both
|
||||||
|
* Align1 and Align16."
|
||||||
|
*/
|
||||||
|
ERROR_IF(exec_size > 8 && dst_is_packed &&
|
||||||
|
dst_type == BRW_REGISTER_TYPE_HF,
|
||||||
|
"Align1 mixed float mode is limited to SIMD8 when destination "
|
||||||
|
"is packed half-float");
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "Math operations for mixed mode:
|
||||||
|
* - In Align1, f16 inputs need to be strided"
|
||||||
|
*/
|
||||||
|
if (opcode == BRW_OPCODE_MATH) {
|
||||||
|
if (src0_type == BRW_REGISTER_TYPE_HF) {
|
||||||
|
ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
|
||||||
|
"Align1 mixed mode math needs strided half-float inputs");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
|
||||||
|
ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
|
||||||
|
"Align1 mixed mode math needs strided half-float inputs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "In Align1, destination stride can be smaller than execution
|
||||||
|
* type. When destination is stride of 1, 16 bit packed data is
|
||||||
|
* updated on the destination. However, output packed f16 data
|
||||||
|
* must be oword aligned, no oword crossing in packed f16."
|
||||||
|
*
|
||||||
|
* The requirement of not crossing oword boundaries for 16-bit oword
|
||||||
|
* aligned data means that execution size is limited to 8.
|
||||||
|
*/
|
||||||
|
unsigned subreg;
|
||||||
|
if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
|
||||||
|
subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
|
||||||
|
else
|
||||||
|
subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
|
||||||
|
ERROR_IF(subreg % 16 != 0,
|
||||||
|
"Align1 mixed mode packed half-float output must be "
|
||||||
|
"oword aligned");
|
||||||
|
ERROR_IF(exec_size > 8,
|
||||||
|
"Align1 mixed mode packed half-float output must not "
|
||||||
|
"cross oword boundaries (max exec size is 8)");
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "When source is float or half float from accumulator register and
|
||||||
|
* destination is half float with a stride of 1, the source must
|
||||||
|
* register aligned. i.e., source must have offset zero."
|
||||||
|
*
|
||||||
|
* Align16 mixed float mode doesn't allow accumulator access on sources,
|
||||||
|
* so we only need to check this for Align1.
|
||||||
|
*/
|
||||||
|
if (src0_is_acc(devinfo, inst) &&
|
||||||
|
(src0_type == BRW_REGISTER_TYPE_F ||
|
||||||
|
src0_type == BRW_REGISTER_TYPE_HF)) {
|
||||||
|
ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
|
||||||
|
"Mixed float mode requires register-aligned accumulator "
|
||||||
|
"source reads when destination is packed half-float");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_sources > 1 &&
|
||||||
|
src1_is_acc(devinfo, inst) &&
|
||||||
|
(src1_type == BRW_REGISTER_TYPE_F ||
|
||||||
|
src1_type == BRW_REGISTER_TYPE_HF)) {
|
||||||
|
ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
|
||||||
|
"Mixed float mode requires register-aligned accumulator "
|
||||||
|
"source reads when destination is packed half-float");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||||
|
* Float Operations:
|
||||||
|
*
|
||||||
|
* "No swizzle is allowed when an accumulator is used as an implicit
|
||||||
|
* source or an explicit source in an instruction. i.e. when
|
||||||
|
* destination is half float with an implicit accumulator source,
|
||||||
|
* destination stride needs to be 2."
|
||||||
|
*
|
||||||
|
* FIXME: it is not quite clear what the first sentence actually means
|
||||||
|
* or its link to the implication described after it, so we only
|
||||||
|
* validate the explicit implication, which is clearly described.
|
||||||
|
*/
|
||||||
|
if (dst_type == BRW_REGISTER_TYPE_HF &&
|
||||||
|
inst_uses_src_acc(devinfo, inst)) {
|
||||||
|
ERROR_IF(dst_stride != 2,
|
||||||
|
"Mixed float mode with implicit/explicit accumulator "
|
||||||
|
"source and half-float destination requires a stride "
|
||||||
|
"of 2 on the destination");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return error_msg;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an \p access_mask for an \p exec_size, \p element_size, and a region
|
* Creates an \p access_mask for an \p exec_size, \p element_size, and a region
|
||||||
*
|
*
|
||||||
@@ -1576,6 +1825,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
|
|||||||
CHECK(send_restrictions);
|
CHECK(send_restrictions);
|
||||||
CHECK(general_restrictions_based_on_operand_types);
|
CHECK(general_restrictions_based_on_operand_types);
|
||||||
CHECK(general_restrictions_on_region_parameters);
|
CHECK(general_restrictions_on_region_parameters);
|
||||||
|
CHECK(special_restrictions_for_mixed_float_mode);
|
||||||
CHECK(region_alignment_rules);
|
CHECK(region_alignment_rules);
|
||||||
CHECK(vector_immediate_restrictions);
|
CHECK(vector_immediate_restrictions);
|
||||||
CHECK(special_requirements_for_handling_double_precision_data_types);
|
CHECK(special_requirements_for_handling_double_precision_data_types);
|
||||||
|
@@ -1019,6 +1019,636 @@ TEST_P(validation_test, half_float_conversion)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_source_indirect_addressing)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
bool dst_indirect;
|
||||||
|
bool src0_indirect;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(dst_type, src0_type, src1_type, \
|
||||||
|
dst_stride, dst_indirect, src0_indirect, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
dst_indirect, \
|
||||||
|
src0_indirect, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Source and dest are mixed float: indirect src addressing not allowed */
|
||||||
|
INST(HF, F, F, 2, false, false, true),
|
||||||
|
INST(HF, F, F, 2, true, false, true),
|
||||||
|
INST(HF, F, F, 2, false, true, false),
|
||||||
|
INST(HF, F, F, 2, true, true, false),
|
||||||
|
INST( F, HF, F, 1, false, false, true),
|
||||||
|
INST( F, HF, F, 1, true, false, true),
|
||||||
|
INST( F, HF, F, 1, false, true, false),
|
||||||
|
INST( F, HF, F, 1, true, true, false),
|
||||||
|
|
||||||
|
INST(HF, HF, F, 2, false, false, true),
|
||||||
|
INST(HF, HF, F, 2, true, false, true),
|
||||||
|
INST(HF, HF, F, 2, false, true, false),
|
||||||
|
INST(HF, HF, F, 2, true, true, false),
|
||||||
|
INST( F, F, HF, 1, false, false, true),
|
||||||
|
INST( F, F, HF, 1, true, false, true),
|
||||||
|
INST( F, F, HF, 1, false, true, false),
|
||||||
|
INST( F, F, HF, 1, true, true, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align1_simd16)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
unsigned exec_size;
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(exec_size, dst_type, src0_type, src1_type, \
|
||||||
|
dst_stride, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_EXECUTE_##exec_size, \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No SIMD16 in mixed mode when destination is packed f16 */
|
||||||
|
INST( 8, HF, F, HF, 2, true),
|
||||||
|
INST(16, HF, HF, F, 2, true),
|
||||||
|
INST(16, HF, HF, F, 1, false),
|
||||||
|
INST(16, HF, F, HF, 1, false),
|
||||||
|
|
||||||
|
/* No SIMD16 in mixed mode when destination is f32 */
|
||||||
|
INST( 8, F, HF, F, 1, true),
|
||||||
|
INST( 8, F, F, HF, 1, true),
|
||||||
|
INST(16, F, HF, F, 1, false),
|
||||||
|
INST(16, F, F, HF, 1, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
|
||||||
|
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
bool read_acc;
|
||||||
|
unsigned subnr;
|
||||||
|
bool expected_result_bdw;
|
||||||
|
bool expected_result_chv_skl;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \
|
||||||
|
expected_result_bdw, expected_result_chv_skl) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
read_acc, \
|
||||||
|
subnr, \
|
||||||
|
expected_result_bdw, \
|
||||||
|
expected_result_chv_skl, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destination is not packed */
|
||||||
|
INST(HF, HF, F, 2, true, 0, true, true),
|
||||||
|
INST(HF, HF, F, 2, true, 2, true, true),
|
||||||
|
INST(HF, HF, F, 2, true, 4, true, true),
|
||||||
|
INST(HF, HF, F, 2, true, 8, true, true),
|
||||||
|
INST(HF, HF, F, 2, true, 16, true, true),
|
||||||
|
|
||||||
|
/* Destination is packed, we don't read acc */
|
||||||
|
INST(HF, HF, F, 1, false, 0, false, true),
|
||||||
|
INST(HF, HF, F, 1, false, 2, false, true),
|
||||||
|
INST(HF, HF, F, 1, false, 4, false, true),
|
||||||
|
INST(HF, HF, F, 1, false, 8, false, true),
|
||||||
|
INST(HF, HF, F, 1, false, 16, false, true),
|
||||||
|
|
||||||
|
/* Destination is packed, we read acc */
|
||||||
|
INST(HF, HF, F, 1, true, 0, false, false),
|
||||||
|
INST(HF, HF, F, 1, true, 2, false, false),
|
||||||
|
INST(HF, HF, F, 1, true, 4, false, false),
|
||||||
|
INST(HF, HF, F, 1, true, 8, false, false),
|
||||||
|
INST(HF, HF, F, 1, true, 16, false, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
|
||||||
|
brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
|
||||||
|
|
||||||
|
if (devinfo.is_cherryview || devinfo.gen >= 9)
|
||||||
|
EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
|
||||||
|
else
|
||||||
|
EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
unsigned exec_size;
|
||||||
|
unsigned opcode;
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
bool read_acc;
|
||||||
|
bool expected_result_bdw;
|
||||||
|
bool expected_result_chv_skl;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(exec_size, opcode, dst_type, src0_type, src1_type, \
|
||||||
|
dst_stride, read_acc,expected_result_bdw, \
|
||||||
|
expected_result_chv_skl) \
|
||||||
|
{ \
|
||||||
|
BRW_EXECUTE_##exec_size, \
|
||||||
|
BRW_OPCODE_##opcode, \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
read_acc, \
|
||||||
|
expected_result_bdw, \
|
||||||
|
expected_result_chv_skl, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Packed fp16 dest with implicit acc needs hstride=2 */
|
||||||
|
INST(8, MAC, HF, HF, F, 1, false, false, false),
|
||||||
|
INST(8, MAC, HF, HF, F, 2, false, true, true),
|
||||||
|
INST(8, MAC, HF, F, HF, 1, false, false, false),
|
||||||
|
INST(8, MAC, HF, F, HF, 2, false, true, true),
|
||||||
|
|
||||||
|
/* Packed fp16 dest with explicit acc needs hstride=2 */
|
||||||
|
INST(8, ADD, HF, HF, F, 1, true, false, false),
|
||||||
|
INST(8, ADD, HF, HF, F, 2, true, true, true),
|
||||||
|
INST(8, ADD, HF, F, HF, 1, true, false, false),
|
||||||
|
INST(8, ADD, HF, F, HF, 2, true, true, true),
|
||||||
|
|
||||||
|
/* If destination is not fp16, restriction doesn't apply */
|
||||||
|
INST(8, MAC, F, HF, F, 1, false, true, true),
|
||||||
|
INST(8, MAC, F, HF, F, 2, false, true, true),
|
||||||
|
|
||||||
|
/* If there is no implicit/explicit acc, restriction doesn't apply */
|
||||||
|
INST(8, ADD, HF, HF, F, 1, false, false, true),
|
||||||
|
INST(8, ADD, HF, HF, F, 2, false, true, true),
|
||||||
|
INST(8, ADD, HF, F, HF, 1, false, false, true),
|
||||||
|
INST(8, ADD, HF, F, HF, 2, false, true, true),
|
||||||
|
INST(8, ADD, F, HF, F, 1, false, true, true),
|
||||||
|
INST(8, ADD, F, HF, F, 2, false, true, true),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
if (inst[i].opcode == BRW_OPCODE_MAC) {
|
||||||
|
brw_MAC(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
} else {
|
||||||
|
assert(inst[i].opcode == BRW_OPCODE_ADD);
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
|
||||||
|
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
|
||||||
|
if (devinfo.is_cherryview || devinfo.gen >= 9)
|
||||||
|
EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
|
||||||
|
else
|
||||||
|
EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
unsigned src0_stride;
|
||||||
|
unsigned src1_stride;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(dst_type, src0_type, src1_type, \
|
||||||
|
dst_stride, src0_stride, src1_stride, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##src0_stride, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##src1_stride, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
INST(HF, HF, F, 2, 2, 1, true),
|
||||||
|
INST(HF, F, HF, 2, 1, 2, true),
|
||||||
|
INST(HF, F, HF, 1, 1, 2, true),
|
||||||
|
INST(HF, F, HF, 2, 1, 1, false),
|
||||||
|
INST(HF, HF, F, 2, 1, 1, false),
|
||||||
|
INST(HF, HF, F, 1, 1, 1, false),
|
||||||
|
INST(HF, HF, F, 2, 1, 1, false),
|
||||||
|
INST( F, HF, F, 1, 1, 1, false),
|
||||||
|
INST( F, F, HF, 1, 1, 2, true),
|
||||||
|
INST( F, HF, HF, 1, 2, 1, false),
|
||||||
|
INST( F, HF, HF, 1, 2, 2, true),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
/* No half-float math in gen8 */
|
||||||
|
if (devinfo.gen < 9)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
gen6_math(p, retype(g0, inst[i].dst_type),
|
||||||
|
BRW_MATH_FUNCTION_POW,
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
|
||||||
|
brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
|
||||||
|
|
||||||
|
brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
|
||||||
|
brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
unsigned exec_size;
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned dst_stride;
|
||||||
|
unsigned dst_subnr;
|
||||||
|
bool expected_result_bdw;
|
||||||
|
bool expected_result_chv_skl;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
|
||||||
|
expected_result_bdw, expected_result_chv_skl) \
|
||||||
|
{ \
|
||||||
|
BRW_EXECUTE_##exec_size, \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||||
|
dst_subnr, \
|
||||||
|
expected_result_bdw, \
|
||||||
|
expected_result_chv_skl \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* SIMD8 packed fp16 dst won't cross oword boundaries if region is
|
||||||
|
* oword-aligned
|
||||||
|
*/
|
||||||
|
INST( 8, HF, HF, F, 1, 0, false, true),
|
||||||
|
INST( 8, HF, HF, F, 1, 2, false, false),
|
||||||
|
INST( 8, HF, HF, F, 1, 4, false, false),
|
||||||
|
INST( 8, HF, HF, F, 1, 8, false, false),
|
||||||
|
INST( 8, HF, HF, F, 1, 16, false, true),
|
||||||
|
|
||||||
|
/* SIMD16 packed fp16 always crosses oword boundaries */
|
||||||
|
INST(16, HF, HF, F, 1, 0, false, false),
|
||||||
|
INST(16, HF, HF, F, 1, 2, false, false),
|
||||||
|
INST(16, HF, HF, F, 1, 4, false, false),
|
||||||
|
INST(16, HF, HF, F, 1, 8, false, false),
|
||||||
|
INST(16, HF, HF, F, 1, 16, false, false),
|
||||||
|
|
||||||
|
/* If destination is not packed (or not fp16) we can cross oword
|
||||||
|
* boundaries
|
||||||
|
*/
|
||||||
|
INST( 8, HF, HF, F, 2, 0, true, true),
|
||||||
|
INST( 8, F, HF, F, 1, 0, true, true),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
|
||||||
|
brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
|
||||||
|
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
|
||||||
|
brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
|
||||||
|
|
||||||
|
brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
|
||||||
|
brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
|
||||||
|
|
||||||
|
brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
|
||||||
|
|
||||||
|
if (devinfo.is_cherryview || devinfo.gen >= 9)
|
||||||
|
EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
|
||||||
|
else
|
||||||
|
EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align16_packed_data)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned src0_vstride;
|
||||||
|
unsigned src1_vstride;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(dst_type, src0_type, src1_type, \
|
||||||
|
src0_vstride, src1_vstride, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_VERTICAL_STRIDE_##src0_vstride, \
|
||||||
|
BRW_VERTICAL_STRIDE_##src1_vstride, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We only test with F destination because there is a restriction
|
||||||
|
* by which F->HF conversions need to be DWord aligned but Align16 also
|
||||||
|
* requires that destination horizontal stride is 1.
|
||||||
|
*/
|
||||||
|
INST(F, F, HF, 4, 4, true),
|
||||||
|
INST(F, F, HF, 2, 4, false),
|
||||||
|
INST(F, F, HF, 4, 2, false),
|
||||||
|
INST(F, F, HF, 0, 4, false),
|
||||||
|
INST(F, F, HF, 4, 0, false),
|
||||||
|
INST(F, HF, F, 4, 4, true),
|
||||||
|
INST(F, HF, F, 4, 2, false),
|
||||||
|
INST(F, HF, F, 2, 4, false),
|
||||||
|
INST(F, HF, F, 0, 4, false),
|
||||||
|
INST(F, HF, F, 4, 0, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8 || devinfo.gen >= 11)
|
||||||
|
return;
|
||||||
|
|
||||||
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
|
||||||
|
brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align16_no_simd16)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
unsigned exec_size;
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_EXECUTE_##exec_size, \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We only test with F destination because there is a restriction
|
||||||
|
* by which F->HF conversions need to be DWord aligned but Align16 also
|
||||||
|
* requires that destination horizontal stride is 1.
|
||||||
|
*/
|
||||||
|
INST( 8, F, F, HF, true),
|
||||||
|
INST( 8, F, HF, F, true),
|
||||||
|
INST( 8, F, F, HF, true),
|
||||||
|
INST(16, F, F, HF, false),
|
||||||
|
INST(16, F, HF, F, false),
|
||||||
|
INST(16, F, F, HF, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8 || devinfo.gen >= 11)
|
||||||
|
return;
|
||||||
|
|
||||||
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
|
||||||
|
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align16_no_acc_read)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
bool read_acc;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
read_acc, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We only test with F destination because there is a restriction
|
||||||
|
* by which F->HF conversions need to be DWord aligned but Align16 also
|
||||||
|
* requires that destination horizontal stride is 1.
|
||||||
|
*/
|
||||||
|
INST( F, F, HF, false, true),
|
||||||
|
INST( F, F, HF, true, false),
|
||||||
|
INST( F, HF, F, false, true),
|
||||||
|
INST( F, HF, F, true, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
if (devinfo.gen < 8 || devinfo.gen >= 11)
|
||||||
|
return;
|
||||||
|
|
||||||
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
brw_ADD(p, retype(g0, inst[i].dst_type),
|
||||||
|
retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(validation_test, mixed_float_align16_math_packed_format)
|
||||||
|
{
|
||||||
|
static const struct {
|
||||||
|
enum brw_reg_type dst_type;
|
||||||
|
enum brw_reg_type src0_type;
|
||||||
|
enum brw_reg_type src1_type;
|
||||||
|
unsigned src0_vstride;
|
||||||
|
unsigned src1_vstride;
|
||||||
|
bool expected_result;
|
||||||
|
} inst[] = {
|
||||||
|
#define INST(dst_type, src0_type, src1_type, \
|
||||||
|
src0_vstride, src1_vstride, expected_result) \
|
||||||
|
{ \
|
||||||
|
BRW_REGISTER_TYPE_##dst_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src0_type, \
|
||||||
|
BRW_REGISTER_TYPE_##src1_type, \
|
||||||
|
BRW_VERTICAL_STRIDE_##src0_vstride, \
|
||||||
|
BRW_VERTICAL_STRIDE_##src1_vstride, \
|
||||||
|
expected_result, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We only test with F destination because there is a restriction
|
||||||
|
* by which F->HF conversions need to be DWord aligned but Align16 also
|
||||||
|
* requires that destination horizontal stride is 1.
|
||||||
|
*/
|
||||||
|
INST( F, HF, F, 4, 0, false),
|
||||||
|
INST( F, HF, HF, 4, 4, true),
|
||||||
|
INST( F, F, HF, 4, 0, false),
|
||||||
|
INST( F, F, HF, 2, 4, false),
|
||||||
|
INST( F, F, HF, 4, 2, false),
|
||||||
|
INST( F, HF, HF, 0, 4, false),
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Align16 Math for mixed float mode is not supported in gen8 */
|
||||||
|
if (devinfo.gen < 9 || devinfo.gen >= 11)
|
||||||
|
return;
|
||||||
|
|
||||||
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
|
||||||
|
gen6_math(p, retype(g0, inst[i].dst_type),
|
||||||
|
BRW_MATH_FUNCTION_POW,
|
||||||
|
retype(g0, inst[i].src0_type),
|
||||||
|
retype(g0, inst[i].src1_type));
|
||||||
|
|
||||||
|
brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
|
||||||
|
brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
|
||||||
|
|
||||||
|
EXPECT_EQ(inst[i].expected_result, validate(p));
|
||||||
|
|
||||||
|
clear_instructions(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(validation_test, vector_immediate_destination_alignment)
|
TEST_P(validation_test, vector_immediate_destination_alignment)
|
||||||
{
|
{
|
||||||
static const struct {
|
static const struct {
|
||||||
|
Reference in New Issue
Block a user