intel/compiler: tileY friendly LID order for CS
Computer shaders that access tileY resources (textures) benefit from Y-locality accesses. Easiest way to implement this is walk local ids in Y-major fashion, instead of X-major fashion. Y-major local ids will reduce partial writes and increase cache locality for tileY accesses since tileY resources cachelines progress in Y direction. Improves performance on TGL: Borderlands3.dxvk-g2 +1.5% Y-major can introduce a performance drop on CS that use mixture of buffers and images. This should be fixed in next commit. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10733>
This commit is contained in:
@@ -111,14 +111,28 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
|
||||
nir_ssa_def *id_x, *id_y, *id_z;
|
||||
switch (state->nir->info.cs.derivative_group) {
|
||||
case DERIVATIVE_GROUP_NONE:
|
||||
/* If not using derivatives, just set the local invocation
|
||||
* index linearly, and calculate local invocation ID from that.
|
||||
*/
|
||||
id_x = nir_umod(b, linear, size_x);
|
||||
id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y);
|
||||
if (nir->info.num_images == 0 &&
|
||||
nir->info.num_textures == 0) {
|
||||
/* X-major lid order. Optimal for linear accesses only,
|
||||
* which are usually buffers. X,Y ordering will look like:
|
||||
* (0,0) (1,0) (2,0) ... (size_x-1,0) (0,1) (1,1) ...
|
||||
*/
|
||||
id_x = nir_umod(b, linear, size_x);
|
||||
id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y);
|
||||
local_index = linear;
|
||||
} else {
|
||||
/* Y-major lid order. Optimal for tileY accesses only,
|
||||
* which are usually images. X,Y ordering will look like:
|
||||
* (0,0) (0,1) (0,2) ... (0,size_y-1) (1,0) (1,1) ...
|
||||
*/
|
||||
id_y = nir_umod(b, linear, size_y);
|
||||
id_x = nir_umod(b, nir_udiv(b, linear, size_y), size_x);
|
||||
local_index = nir_iadd(b, nir_iadd(b, id_x,
|
||||
nir_imul(b, id_y, size_x)),
|
||||
nir_imul(b, id_z, size_xy));
|
||||
}
|
||||
id_z = nir_udiv(b, linear, size_xy);
|
||||
local_id = nir_vec3(b, id_x, id_y, id_z);
|
||||
local_index = linear;
|
||||
break;
|
||||
case DERIVATIVE_GROUP_LINEAR:
|
||||
/* For linear, just set the local invocation index linearly,
|
||||
|
Reference in New Issue
Block a user