intel/fs: make scan/reduce work with SIMD32 when it fits 2 registers
When dealing with uint16_t and uint8_t on SIMD32 we can do all the operations using just 2 registers, so we don't hit the recursion at the beginning of emit_scan(). Because of that, we need to actually compute scan/reduce for channels 31:16. v2: Still missed instructions (Jason). Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
This commit is contained in:

committed by
Jason Ekstrand

parent
7f07046dbc
commit
d9ddf5076d
@@ -514,6 +514,16 @@ namespace brw {
|
||||
right = horiz_offset(tmp, 8 + 4);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
}
|
||||
|
||||
if (dispatch_width() > 16) {
|
||||
left = component(tmp, 16 + 3);
|
||||
right = horiz_offset(tmp, 16 + 4);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
|
||||
left = component(tmp, 24 + 3);
|
||||
right = horiz_offset(tmp, 24 + 4);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
if (cluster_size > 8 && dispatch_width() > 8) {
|
||||
@@ -521,6 +531,19 @@ namespace brw {
|
||||
src_reg left = component(tmp, 7);
|
||||
dst_reg right = horiz_offset(tmp, 8);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
|
||||
if (dispatch_width() > 16) {
|
||||
left = component(tmp, 16 + 7);
|
||||
right = horiz_offset(tmp, 16 + 8);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
if (cluster_size > 16 && dispatch_width() > 16) {
|
||||
const fs_builder ubld = exec_all().group(16, 0);
|
||||
src_reg left = component(tmp, 15);
|
||||
dst_reg right = horiz_offset(tmp, 16);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user