intel/fs/xe2: Fix up subdword integer region restriction with strided byte src and packed byte dst.
This fixes a corner case of the LNL sub-dword integer restrictions
that wasn't being detected by has_subdword_integer_region_restriction(),
specifically:
> if(Src.Type==Byte && Dst.Type==Byte && Dst.Stride==1 && W!=2) {
> // ...
> if(Src.Stride == 2) && (Src.UniformStride) && (Dst.SubReg%32 == Src.SubReg/2 ) { Allowed }
> // ...
> }
All the other restrictions that require agreement between the SubReg
number of source and destination only affect sources with a stride
greater than a dword, which is why
has_subdword_integer_region_restriction() was returning false except
when "byte_stride(srcs[i]) >= 4" evaluated to true, but as implied by
the pseudocode above, in the particular case of a packed byte
destination, the restriction applies for source strides as narrow as
2B.
The form of the equation that relates the subreg numbers is consistent
with the existing calculations in brw_fs_lower_regioning (see
required_src_byte_offset()), we just need to enable lowering for this
corner case, and change lower_dst_region() to call lower_instruction()
recursively, since some of the cases where we break this restriction
are copy instructions introduced by brw_fs_lower_regioning() itself
trying to lower other instructions with byte destinations.
This fixes some Vulkan CTS test-cases that were hitting these
restrictions with byte data types.
Fixes: 217d412360
("intel/fs/gfx20+: Implement sub-dword integer regioning restrictions.")
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32261>
This commit is contained in:

committed by
Dylan Baker

parent
7dc34f1147
commit
f35c690b12
@@ -974,7 +974,7 @@
|
||||
"description": "intel/fs/xe2: Fix up subdword integer region restriction with strided byte src and packed byte dst.",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 3,
|
||||
"main_sha": null,
|
||||
"because_sha": "217d412360762803bc9941ba5171ca0be2c5332b",
|
||||
"notes": null
|
||||
|
@@ -58,9 +58,8 @@ namespace {
|
||||
return MAX2(brw_type_size_bytes(inst->dst.type),
|
||||
byte_stride(inst->dst));
|
||||
|
||||
} else if (has_subdword_integer_region_restriction(devinfo, inst) &&
|
||||
brw_type_size_bytes(inst->src[i].type) < 4 &&
|
||||
byte_stride(inst->src[i]) >= 4) {
|
||||
} else if (has_subdword_integer_region_restriction(devinfo, inst,
|
||||
&inst->src[i], 1)) {
|
||||
/* Use a stride of 32bits if possible, since that will guarantee that
|
||||
* the copy emitted to lower this region won't be affected by the
|
||||
* sub-dword integer region restrictions. This may not be possible
|
||||
@@ -85,9 +84,8 @@ namespace {
|
||||
if (has_dst_aligned_region_restriction(devinfo, inst)) {
|
||||
return reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
||||
|
||||
} else if (has_subdword_integer_region_restriction(devinfo, inst) &&
|
||||
brw_type_size_bytes(inst->src[i].type) < 4 &&
|
||||
byte_stride(inst->src[i]) >= 4) {
|
||||
} else if (has_subdword_integer_region_restriction(devinfo, inst,
|
||||
&inst->src[i], 1)) {
|
||||
const unsigned dst_byte_stride =
|
||||
MAX2(byte_stride(inst->dst), brw_type_size_bytes(inst->dst.type));
|
||||
const unsigned src_byte_stride = required_src_byte_stride(devinfo, inst, i);
|
||||
@@ -647,9 +645,16 @@ namespace {
|
||||
subscript(inst->dst, raw_type, j));
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < n; j++)
|
||||
ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j),
|
||||
subscript(tmp, raw_type, j));
|
||||
for (unsigned j = 0; j < n; j++) {
|
||||
fs_inst *jnst = ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j),
|
||||
subscript(tmp, raw_type, j));
|
||||
if (has_subdword_integer_region_restriction(v->devinfo, jnst)) {
|
||||
/* The copy isn't guaranteed to comply with all subdword integer
|
||||
* regioning restrictions in some cases. Lower it recursively.
|
||||
*/
|
||||
lower_instruction(v, block, jnst);
|
||||
}
|
||||
}
|
||||
|
||||
/* If the destination was an accumulator, after lowering it will be a
|
||||
* GRF. Clear writes_accumulator for the instruction.
|
||||
|
@@ -445,8 +445,12 @@ has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
||||
brw_type_size_bytes(inst->dst.type)) < 4) {
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (brw_type_is_int(srcs[i].type) &&
|
||||
brw_type_size_bytes(srcs[i].type) < 4 &&
|
||||
byte_stride(srcs[i]) >= 4)
|
||||
((brw_type_size_bytes(srcs[i].type) < 4 &&
|
||||
byte_stride(srcs[i]) >= 4) ||
|
||||
(MAX2(byte_stride(inst->dst),
|
||||
brw_type_size_bytes(inst->dst.type)) == 1 &&
|
||||
brw_type_size_bytes(srcs[i].type) == 1 &&
|
||||
byte_stride(srcs[i]) >= 2)))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user