anv: simplify buffer address+size loads from descriptor buffer

Only found a couple titles that have been helped by this :

 PERCENTAGE DELTAS Shaders   Instrs    Cycles
 cyberpunk_2077    10388     -0.00%    -0.00%
 -----------------------------------------------
 All affected      1         -2.24%    -0.39%
 -----------------------------------------------
 Total             10388     -0.00%    -0.00%

 PERCENTAGE DELTAS    Shaders   Instrs    Cycles
 red_dead_redemption2 5949      -0.10%    -0.00%
 --------------------------------------------------
 All affected         111       -0.74%    -0.14%
 --------------------------------------------------
 Total                5949      -0.10%    -0.00%

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23318>
This commit is contained in:
Lionel Landwerlin
2023-05-30 14:33:33 +03:00
committed by Marge Bot
parent f1f58c3bea
commit 50c29e1ffa
2 changed files with 9 additions and 70 deletions

View File

@@ -1375,6 +1375,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
device->compiler->use_bindless_sampler_offset = !device->indirect_descriptors;
isl_device_init(&device->isl_dev, &device->info);
device->isl_dev.buffer_length_in_aux_addr = true;
result = anv_physical_device_init_uuids(device);
if (result != VK_SUCCESS)

View File

@@ -348,7 +348,12 @@ build_load_descriptor_mem(nir_builder *b,
* like anv_address_range_descriptor where all the fields match perfectly the
* vec4 address format we need to generate for A64 messages. Instead we need
* to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
* enough for the surface address, lot less fun for the size.
* enough for the surface address, lot less fun for the size where you have to
* combine 3 fields scattered over multiple dwords, add one to the total and
* do a check against the surface type to deal with the null descriptors.
*
* Fortunately we can reuse the Auxiliary surface adddress field to stash our
* buffer size and just load a vec4.
*/
static nir_ssa_def *
build_load_render_surface_state_address(nir_builder *b,
@@ -358,80 +363,13 @@ build_load_render_surface_state_address(nir_builder *b,
{
const struct intel_device_info *devinfo = &state->pdevice->info;
assert(((RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) +
RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo) - 1) -
RENDER_SURFACE_STATE_Width_start(devinfo)) / 8 <= 32);
nir_ssa_def *surface_addr =
build_load_descriptor_mem(b, desc_addr,
RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
DIV_ROUND_UP(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo), 32),
32, state);
4, 32, state);
nir_ssa_def *addr_ldw = nir_channel(b, surface_addr, 0);
nir_ssa_def *addr_udw = nir_channel(b, surface_addr, 1);
/* Take all the RENDER_SURFACE_STATE fields from the beginning of the
* structure up to the Depth field.
*/
const uint32_t type_sizes_dwords =
DIV_ROUND_UP(RENDER_SURFACE_STATE_Depth_start(devinfo) +
RENDER_SURFACE_STATE_Depth_bits(devinfo), 32);
nir_ssa_def *type_sizes =
build_load_descriptor_mem(b, desc_addr, 0, type_sizes_dwords, 32, state);
const unsigned width_start = RENDER_SURFACE_STATE_Width_start(devinfo);
/* SKL PRMs, Volume 2d: Command Reference: Structures, RENDER_SURFACE_STATE
*
* Width: "bits [6:0] of the number of entries in the buffer - 1"
* Height: "bits [20:7] of the number of entries in the buffer - 1"
* Depth: "bits [31:21] of the number of entries in the buffer - 1"
*/
const unsigned width_bits = 7;
nir_ssa_def *width =
nir_iand_imm(b,
nir_ishr_imm(b,
nir_channel(b, type_sizes, width_start / 32),
width_start % 32),
(1u << width_bits) - 1);
const unsigned height_start = RENDER_SURFACE_STATE_Height_start(devinfo);
const unsigned height_bits = RENDER_SURFACE_STATE_Height_bits(devinfo);
nir_ssa_def *height =
nir_iand_imm(b,
nir_ishr_imm(b,
nir_channel(b, type_sizes, height_start / 32),
height_start % 32),
(1u << height_bits) - 1);
const unsigned depth_start = RENDER_SURFACE_STATE_Depth_start(devinfo);
const unsigned depth_bits = RENDER_SURFACE_STATE_Depth_bits(devinfo);
nir_ssa_def *depth =
nir_iand_imm(b,
nir_ishr_imm(b,
nir_channel(b, type_sizes, depth_start / 32),
depth_start % 32),
(1u << depth_bits) - 1);
nir_ssa_def *length = width;
length = nir_ior(b, length, nir_ishl_imm(b, height, width_bits));
length = nir_ior(b, length, nir_ishl_imm(b, depth, width_bits + height_bits));
length = nir_iadd_imm(b, length, 1);
/* Check the surface type, if it's SURFTYPE_NULL, set the length of the
* buffer to 0.
*/
const unsigned type_start = RENDER_SURFACE_STATE_SurfaceType_start(devinfo);
const unsigned type_dw = type_start / 32;
nir_ssa_def *type =
nir_iand_imm(b,
nir_ishr_imm(b,
nir_channel(b, type_sizes, type_dw),
type_start % 32),
(1u << RENDER_SURFACE_STATE_SurfaceType_bits(devinfo)) - 1);
length = nir_bcsel(b,
nir_ieq_imm(b, type, 7 /* SURFTYPE_NULL */),
nir_imm_int(b, 0), length);
nir_ssa_def *length = nir_channel(b, surface_addr, 3);
return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
}