intel/fs: Refactor our shuffle emit code
This adds an emit_scan_step helper which gives us a place to do something a bit more interesting than emitting a single op. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7329>
This commit is contained in:

committed by
Marge Bot

parent
44571c6a68
commit
a6500236e3
@@ -436,6 +436,18 @@ namespace brw {
|
|||||||
return src_reg(dst);
|
return src_reg(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
emit_scan_step(enum opcode opcode, brw_conditional_mod mod,
|
||||||
|
const dst_reg &tmp,
|
||||||
|
unsigned left_offset, unsigned left_stride,
|
||||||
|
unsigned right_offset, unsigned right_stride) const
|
||||||
|
{
|
||||||
|
dst_reg left, right;
|
||||||
|
left = horiz_stride(horiz_offset(tmp, left_offset), left_stride);
|
||||||
|
right = horiz_stride(horiz_offset(tmp, right_offset), right_stride);
|
||||||
|
set_condmod(mod, emit(opcode, right, left, right));
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
emit_scan(enum opcode opcode, const dst_reg &tmp,
|
emit_scan(enum opcode opcode, const dst_reg &tmp,
|
||||||
unsigned cluster_size, brw_conditional_mod mod) const
|
unsigned cluster_size, brw_conditional_mod mod) const
|
||||||
@@ -453,30 +465,23 @@ namespace brw {
|
|||||||
ubld.emit_scan(opcode, left, cluster_size, mod);
|
ubld.emit_scan(opcode, left, cluster_size, mod);
|
||||||
ubld.emit_scan(opcode, right, cluster_size, mod);
|
ubld.emit_scan(opcode, right, cluster_size, mod);
|
||||||
if (cluster_size > half_width) {
|
if (cluster_size > half_width) {
|
||||||
src_reg left_comp = component(left, half_width - 1);
|
ubld.emit_scan_step(opcode, mod, tmp,
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left_comp, right));
|
half_width - 1, 0, half_width, 1);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cluster_size > 1) {
|
if (cluster_size > 1) {
|
||||||
const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
|
const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
|
||||||
const dst_reg left = horiz_stride(tmp, 2);
|
ubld.emit_scan_step(opcode, mod, tmp, 0, 2, 1, 2);
|
||||||
const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cluster_size > 2) {
|
if (cluster_size > 2) {
|
||||||
if (type_sz(tmp.type) <= 4) {
|
if (type_sz(tmp.type) <= 4) {
|
||||||
const fs_builder ubld =
|
const fs_builder ubld =
|
||||||
exec_all().group(dispatch_width() / 4, 0);
|
exec_all().group(dispatch_width() / 4, 0);
|
||||||
src_reg left = horiz_stride(horiz_offset(tmp, 1), 4);
|
ubld.emit_scan_step(opcode, mod, tmp, 1, 4, 2, 4);
|
||||||
|
ubld.emit_scan_step(opcode, mod, tmp, 1, 4, 3, 4);
|
||||||
dst_reg right = horiz_stride(horiz_offset(tmp, 2), 4);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
|
|
||||||
right = horiz_stride(horiz_offset(tmp, 3), 4);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
} else {
|
} else {
|
||||||
/* For 64-bit types, we have to do things differently because
|
/* For 64-bit types, we have to do things differently because
|
||||||
* the code above would land us with destination strides that
|
* the code above would land us with destination strides that
|
||||||
@@ -485,12 +490,8 @@ namespace brw {
|
|||||||
* instructions.
|
* instructions.
|
||||||
*/
|
*/
|
||||||
const fs_builder ubld = exec_all().group(2, 0);
|
const fs_builder ubld = exec_all().group(2, 0);
|
||||||
|
for (unsigned i = 0; i < dispatch_width(); i += 4)
|
||||||
for (unsigned i = 0; i < dispatch_width(); i += 4) {
|
ubld.emit_scan_step(opcode, mod, tmp, i + 1, 0, i + 2, 1);
|
||||||
src_reg left = component(tmp, i + 1);
|
|
||||||
dst_reg right = horiz_offset(tmp, i + 2);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -498,24 +499,14 @@ namespace brw {
|
|||||||
i < MIN2(cluster_size, dispatch_width());
|
i < MIN2(cluster_size, dispatch_width());
|
||||||
i *= 2) {
|
i *= 2) {
|
||||||
const fs_builder ubld = exec_all().group(i, 0);
|
const fs_builder ubld = exec_all().group(i, 0);
|
||||||
src_reg left = component(tmp, i - 1);
|
ubld.emit_scan_step(opcode, mod, tmp, i - 1, 0, i, 1);
|
||||||
dst_reg right = horiz_offset(tmp, i);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
|
|
||||||
if (dispatch_width() > i * 2) {
|
if (dispatch_width() > i * 2)
|
||||||
left = component(tmp, i * 3 - 1);
|
ubld.emit_scan_step(opcode, mod, tmp, i * 3 - 1, 0, i * 3, 1);
|
||||||
right = horiz_offset(tmp, i * 3);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dispatch_width() > i * 4) {
|
if (dispatch_width() > i * 4) {
|
||||||
left = component(tmp, i * 5 - 1);
|
ubld.emit_scan_step(opcode, mod, tmp, i * 5 - 1, 0, i * 5, 1);
|
||||||
right = horiz_offset(tmp, i * 5);
|
ubld.emit_scan_step(opcode, mod, tmp, i * 7 - 1, 0, i * 7, 1);
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
|
|
||||||
left = component(tmp, i * 7 - 1);
|
|
||||||
right = horiz_offset(tmp, i * 7);
|
|
||||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user