From cccf0609a6364ffdb68ffa61c2f50dc0730caffc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 27 Apr 2024 21:18:30 -0400 Subject: [PATCH] asahi: simplify image atomic lowering Do more calculation in the preamble so we can do less pointer chasing and keep everything within our 64-bit budget. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/genxml/cmdbuf.xml | 42 +++++-------------- src/asahi/lib/agx_nir_lower_texture.c | 10 +---- src/asahi/lib/shaders/geometry.cl | 6 --- src/asahi/lib/shaders/libagx.h | 6 +++ src/asahi/lib/shaders/texture.cl | 45 ++++++++++++-------- src/gallium/drivers/asahi/agx_state.c | 60 +++++++++------------------ 6 files changed, 66 insertions(+), 103 deletions(-) diff --git a/src/asahi/genxml/cmdbuf.xml b/src/asahi/genxml/cmdbuf.xml index d4101182be1..3394e098cad 100644 --- a/src/asahi/genxml/cmdbuf.xml +++ b/src/asahi/genxml/cmdbuf.xml @@ -204,36 +204,6 @@ - - - - - - - - - - - - - - - - - - - - @@ -279,8 +249,16 @@ - - + + + + + + + diff --git a/src/asahi/lib/agx_nir_lower_texture.c b/src/asahi/lib/agx_nir_lower_texture.c index 514d712a8ff..8eef8406d1b 100644 --- a/src/asahi/lib/agx_nir_lower_texture.c +++ b/src/asahi/lib/agx_nir_lower_texture.c @@ -490,19 +490,13 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr, (dim == GLSL_SAMPLER_DIM_CUBE) || (dim == GLSL_SAMPLER_DIM_3D); - /* The last 8 bytes of the 24-byte PBE descriptor points to the - * software-defined atomic descriptor. Grab the address. - */ - nir_def *meta_meta_ptr = nir_iadd_imm(b, desc_address, 16); - nir_def *meta_ptr = nir_load_global_constant(b, meta_meta_ptr, 8, 1, 64); - if (dim == GLSL_SAMPLER_DIM_BUF && return_index) { return nir_channel(b, coord, 0); } else if (dim == GLSL_SAMPLER_DIM_BUF) { - return libagx_buffer_texel_address(b, meta_ptr, coord, blocksize_B); + return libagx_buffer_texel_address(b, desc_address, coord, blocksize_B); } else { return libagx_image_texel_address( - b, meta_ptr, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B, + b, desc_address, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B, nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_1D), nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered), nir_imm_bool(b, return_index)); diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl index 0e79db6a91a..c66708b04e1 100644 --- a/src/asahi/lib/shaders/geometry.cl +++ b/src/asahi/lib/shaders/geometry.cl @@ -6,12 +6,6 @@ #include "geometry.h" -static uint -align(uint x, uint y) -{ - return (x + y - 1) & ~(y - 1); -} - /* Compatible with util/u_math.h */ static inline uint util_logbase2_ceil(uint n) diff --git a/src/asahi/lib/shaders/libagx.h b/src/asahi/lib/shaders/libagx.h index 040d6ad57d4..1b25b8f0dae 100644 --- a/src/asahi/lib/shaders/libagx.h +++ b/src/asahi/lib/shaders/libagx.h @@ -44,6 +44,12 @@ uint ballot(bool cond); #define AGX_STATIC_ASSERT(_COND) \ typedef char static_assertion_##__line__[(_COND) ? 1 : -1] +static inline uint +align(uint x, uint y) +{ + return (x + y - 1) & ~(y - 1); +} + #endif #endif diff --git a/src/asahi/lib/shaders/texture.cl b/src/asahi/lib/shaders/texture.cl index ec32e3c2eeb..2940c5e04e6 100644 --- a/src/asahi/lib/shaders/texture.cl +++ b/src/asahi/lib/shaders/texture.cl @@ -113,7 +113,7 @@ libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr, static uint32_t calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px, - uint16_t tile_h_px, uint32_t width_tl) + uint16_t tile_h_px, uint32_t aligned_width_px) { /* Modulo by the tile width/height to get the offsets within the tile */ ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1); @@ -131,7 +131,7 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px, * tile height = * align_down(y, tile height) * width_tl * tile width */ - uint32_t tile_row_start_px = tile_px.y * width_tl * tile_w_px; + uint32_t tile_row_start_px = tile_px.y * aligned_width_px; /* tile column start (px) = * (x // tile width) * (# of pix/tile) = @@ -145,12 +145,12 @@ calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px, } uint64_t -libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr, +libagx_image_texel_address(constant const struct agx_pbe_packed *ptr, uint4 coord, uint sample_idx, uint bytes_per_sample_B, bool is_1d, bool is_msaa, bool is_layered, bool return_index) { - agx_unpack(NULL, ptr, ATOMIC_SOFTWARE, d); + agx_unpack(NULL, ptr, PBE, d); /* We do not allow atomics on linear 2D or linear 2D arrays, as there are no * known use cases. So we're twiddled in this path, unless we're handling a @@ -162,30 +162,41 @@ libagx_image_texel_address(constant const struct agx_atomic_software_packed *ptr if (is_1d) { total_px = coord.x; } else { - total_px = - calculate_twiddled_coordinates(convert_ushort2(coord.xy), d.tile_width, - d.tile_height, d.tiles_per_row); + uint aligned_width_px; + if (is_msaa) { + aligned_width_px = d.aligned_width_msaa_sw; + } else { + uint width_px = max(d.width >> d.level, 1u); + aligned_width_px = align(width_px, d.tile_width_sw); + } + + total_px = calculate_twiddled_coordinates( + convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw, + aligned_width_px); } - if (is_layered) - total_px += coord[is_1d ? 1 : 2] * d.layer_stride_pixels; + uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0; - uint sample_count = is_msaa ? d.sample_count : 1; - uint total_sa = (total_px * d.sample_count) + sample_idx; + if (is_layered) { + total_px += coord[is_1d ? 1 : 2] * + ((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2); + } + + uint total_sa = (total_px << samples_log2) + sample_idx; if (return_index) return total_sa; else - return d.base + (uint64_t)(total_sa * bytes_per_sample_B); + return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) + + (uint64_t)(total_sa * bytes_per_sample_B); } uint64_t -libagx_buffer_texel_address( - constant const struct agx_pbe_buffer_software_packed *ptr, uint4 coord, - uint bytes_per_pixel_B) +libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr, + uint4 coord, uint bytes_per_pixel_B) { - agx_unpack(NULL, ptr, PBE_BUFFER_SOFTWARE, d); - return d.base + (uint64_t)(coord.x * bytes_per_pixel_B); + agx_unpack(NULL, ptr, PBE, d); + return d.buffer + (uint64_t)(coord.x * bytes_per_pixel_B); } /* Buffer texture lowerings */ diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 1783ed84f5f..c0de9d7dbb8 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1174,41 +1174,6 @@ sampler_view_for_surface(struct pipe_surface *surf) }; } -static void -agx_pack_image_atomic_data(void *packed, struct pipe_image_view *view) -{ - struct agx_resource *tex = agx_resource(view->resource); - - if (tex->base.target == PIPE_BUFFER) { - agx_pack(packed, PBE_BUFFER_SOFTWARE, cfg) { - cfg.base = tex->bo->ptr.gpu + view->u.buf.offset; - } - } else if (tex->layout.writeable_image) { - unsigned level = view->u.tex.level; - unsigned blocksize_B = util_format_get_blocksize(tex->layout.format); - - agx_pack(packed, ATOMIC_SOFTWARE, cfg) { - cfg.base = - tex->bo->ptr.gpu + - ail_get_layer_level_B(&tex->layout, view->u.tex.first_layer, level); - - cfg.sample_count = MAX2(util_res_sample_count(view->resource), 1); - - if (tex->layout.tiling == AIL_TILING_TWIDDLED) { - struct ail_tile tile_size = tex->layout.tilesize_el[level]; - cfg.tile_width = tile_size.width_el; - cfg.tile_height = tile_size.height_el; - - unsigned width_el = u_minify(tex->base.width0, level); - cfg.tiles_per_row = DIV_ROUND_UP(width_el, tile_size.width_el); - - cfg.layer_stride_pixels = DIV_ROUND_UP( - tex->layout.layer_stride_B, blocksize_B * cfg.sample_count); - } - } - } -} - static bool target_is_array(enum pipe_texture_target target) { @@ -1355,12 +1320,27 @@ agx_batch_upload_pbe(struct agx_batch *batch, struct agx_pbe_packed *out, /* When the descriptor isn't extended architecturally, we can use the last * 8 bytes as a sideband. We use it to provide metadata for image atomics. */ - if (!cfg.extended) { - struct agx_ptr desc = - agx_pool_alloc_aligned(&batch->pool, AGX_ATOMIC_SOFTWARE_LENGTH, 8); + if (!cfg.extended && tex->layout.writeable_image && + tex->base.target != PIPE_BUFFER) { - agx_pack_image_atomic_data(desc.cpu, view); - cfg.software_defined = desc.gpu; + if (util_res_sample_count(&tex->base) > 1) { + cfg.aligned_width_msaa_sw = + align(u_minify(view->resource->width0, level), + tex->layout.tilesize_el[level].width_el); + } else { + cfg.level_offset_sw = + ail_get_level_offset_B(&tex->layout, cfg.level); + } + + cfg.sample_count_log2_sw = util_logbase2(tex->base.nr_samples); + + if (tex->layout.tiling == AIL_TILING_TWIDDLED) { + struct ail_tile tile_size = tex->layout.tilesize_el[level]; + cfg.tile_width_sw = tile_size.width_el; + cfg.tile_height_sw = tile_size.height_el; + + cfg.layer_stride_sw = tex->layout.layer_stride_B; + } } }; }