diff --git a/src/gallium/drivers/panfrost/pan_afbc_cso.c b/src/gallium/drivers/panfrost/pan_afbc_cso.c index 62597b31dcc..7ea74252b98 100644 --- a/src/gallium/drivers/panfrost/pan_afbc_cso.c +++ b/src/gallium/drivers/panfrost/pan_afbc_cso.c @@ -50,6 +50,34 @@ read_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx) AFBC_HEADER_BYTES_PER_TILE / 4, 32); } +static void +write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr) +{ + nir_def *offset = nir_imul_imm(b, idx, AFBC_HEADER_BYTES_PER_TILE); + nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF); +} + +static nir_def * +get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride, + nir_def *dst_stride) +{ + nir_def *x = nir_umod(b, idx, dst_stride); + nir_def *y = nir_udiv(b, idx, dst_stride); + + nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride); + offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6)); + + x = nir_iand_imm(b, x, 0x7); + x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13); + x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15); + y = nir_iand_imm(b, y, 0x7); + y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13); + y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15); + nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1)); + + return nir_iadd(b, offset, tile_idx); +} + static nir_def * get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr, nir_def *uncompressed_size) @@ -99,6 +127,71 @@ get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr, : size; } +static nir_def * +get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx, + nir_def **out_size) +{ + nir_def *metadata_offset = + nir_u2u64(b, nir_imul_imm(b, idx, sizeof(struct pan_afbc_block_info))); + nir_def *range_ptr = nir_iadd(b, metadata, metadata_offset); + nir_def *entry = nir_load_global(b, range_ptr, 4, + sizeof(struct pan_afbc_block_info) / 4, 32); + nir_def *offset = + nir_channel(b, entry, offsetof(struct pan_afbc_block_info, offset) / 4); + + if (out_size) + *out_size = + nir_channel(b, entry, offsetof(struct pan_afbc_block_info, size) / 4); + + return nir_u2u64(b, offset); +} + +#define MAX_LINE_SIZE 16 + +static void +copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz, + nir_def *src, nir_def *src_idx, nir_def *metadata, + nir_def *meta_idx, unsigned align) +{ + nir_def *hdr = read_afbc_header(b, src, src_idx); + nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0)); + nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr); + + nir_def *size; + nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size); + nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz); + nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr); + + /* Replace the `base_body_ptr` field if not zero (solid color) */ + nir_def *hdr2 = + nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0); + hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2); + write_afbc_header(b, dst, dst_idx, hdr); + + nir_variable *offset_var = + nir_local_variable_create(b->impl, glsl_uint_type(), "offset"); + nir_store_var(b, offset_var, nir_imm_int(b, 0), 1); + nir_loop *loop = nir_push_loop(b); + { + nir_def *offset = nir_load_var(b, offset_var); + nir_if *loop_check = nir_push_if(b, nir_uge(b, offset, size)); + nir_jump(b, nir_jump_break); + nir_push_else(b, loop_check); + unsigned line_sz = align <= MAX_LINE_SIZE ? align : MAX_LINE_SIZE; + for (unsigned i = 0; i < align / line_sz; ++i) { + nir_def *src_line = nir_iadd(b, src_bodyptr, nir_u2u64(b, offset)); + nir_def *dst_line = nir_iadd(b, dst_bodyptr, nir_u2u64(b, offset)); + nir_store_global( + b, dst_line, line_sz, + nir_load_global(b, src_line, line_sz, line_sz / 4, 32), ~0); + offset = nir_iadd_imm(b, offset, line_sz); + } + nir_store_var(b, offset_var, offset, 0x1); + nir_pop_if(b, loop_check); + } + nir_pop_loop(b, loop); +} + #define panfrost_afbc_size_get_info_field(b, field) \ panfrost_afbc_get_info_field(size, b, field) @@ -135,6 +228,37 @@ panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp, return b.shader; } +#define panfrost_afbc_pack_get_info_field(b, field) \ + panfrost_afbc_get_info_field(pack, b, field) + +static nir_shader * +panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align, + bool tiled) +{ + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(), + "panfrost_afbc_pack"); + + panfrost_afbc_add_info_ubo(pack, b); + + nir_def *coord = nir_load_global_invocation_id(&b, 32); + nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride); + nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride); + nir_def *dst_idx = nir_channel(&b, coord, 0); + nir_def *src_idx = + tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx; + nir_def *src = panfrost_afbc_pack_get_info_field(&b, src); + nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst); + nir_def *header_size = + nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size)); + nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata); + + copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata, + src_idx, align); + + return b.shader; +} + struct pan_afbc_shader_data * panfrost_afbc_get_shaders(struct panfrost_context *ctx, struct panfrost_resource *rsrc, unsigned align) @@ -171,6 +295,7 @@ panfrost_afbc_get_shaders(struct panfrost_context *ctx, } COMPILE_SHADER(size, key.bpp, key.align); + COMPILE_SHADER(pack, key.align, key.tiled); #undef COMPILE_SHADER diff --git a/src/gallium/drivers/panfrost/pan_afbc_cso.h b/src/gallium/drivers/panfrost/pan_afbc_cso.h index 86e09c57735..4b9f324ac2d 100644 --- a/src/gallium/drivers/panfrost/pan_afbc_cso.h +++ b/src/gallium/drivers/panfrost/pan_afbc_cso.h @@ -42,6 +42,7 @@ struct pan_afbc_shader_key { struct pan_afbc_shader_data { struct pan_afbc_shader_key key; void *size_cso; + void *pack_cso; }; struct pan_afbc_shaders { @@ -59,6 +60,16 @@ struct panfrost_afbc_size_info { mali_ptr metadata; } PACKED; +struct panfrost_afbc_pack_info { + mali_ptr src; + mali_ptr dst; + mali_ptr metadata; + uint32_t header_size; + uint32_t src_stride; + uint32_t dst_stride; + uint32_t padding[3]; // FIXME +} PACKED; + void panfrost_afbc_context_init(struct panfrost_context *ctx); void panfrost_afbc_context_destroy(struct panfrost_context *ctx); diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index c2b1a69d4ca..a5c4ed2c6e8 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -3953,6 +3953,31 @@ panfrost_afbc_size(struct panfrost_batch *batch, struct panfrost_resource *src, LAUNCH_AFBC_SHADER(size, batch, src, consts, slice->afbc.nr_blocks); } +static void +panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src, + struct panfrost_bo *dst, + struct pan_image_slice_layout *dst_slice, + struct panfrost_bo *metadata, unsigned metadata_offset, + unsigned level) +{ + struct pan_image_slice_layout *src_slice = &src->image.layout.slices[level]; + struct panfrost_afbc_pack_info consts = { + .src = src->image.data.bo->ptr.gpu + src->image.data.offset + + src_slice->offset, + .dst = dst->ptr.gpu + dst_slice->offset, + .metadata = metadata->ptr.gpu + metadata_offset, + .header_size = dst_slice->afbc.header_size, + .src_stride = src_slice->afbc.stride, + .dst_stride = dst_slice->afbc.stride, + }; + + panfrost_batch_write_rsrc(batch, src, PIPE_SHADER_COMPUTE); + panfrost_batch_write_bo(batch, dst, PIPE_SHADER_COMPUTE); + panfrost_batch_add_bo(batch, metadata, PIPE_SHADER_COMPUTE); + + LAUNCH_AFBC_SHADER(pack, batch, src, consts, dst_slice->afbc.nr_blocks); +} + static void * panfrost_create_rasterizer_state(struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) @@ -4570,6 +4595,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options); screen->vtbl.compile_shader = GENX(pan_shader_compile); screen->vtbl.afbc_size = panfrost_afbc_size; + screen->vtbl.afbc_pack = panfrost_afbc_pack; GENX(pan_blitter_init) (dev, &screen->blitter.bin_pool.base, &screen->blitter.desc_pool.base); diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index b628670835f..cb493a4a462 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -907,6 +907,16 @@ panfrost_load_tiled_images(struct panfrost_transfer *transfer, } } +/* Get scan-order index from (x, y) position when blocks are + * arranged in z-order in 8x8 tiles */ +static unsigned +get_morton_index(unsigned x, unsigned y, unsigned stride) +{ + unsigned i = ((x << 0) & 1) | ((y << 1) & 2) | ((x << 1) & 4) | + ((y << 2) & 8) | ((x << 2) & 16) | ((y << 3) & 32); + return (((y & ~7) * stride) + ((x & ~7) << 3)) + i; +} + static void panfrost_store_tiled_images(struct panfrost_transfer *transfer, struct panfrost_resource *rsrc) @@ -1303,6 +1313,110 @@ panfrost_get_afbc_superblock_sizes(struct panfrost_context *ctx, return bo; } +void +panfrost_pack_afbc(struct panfrost_context *ctx, + struct panfrost_resource *prsrc) +{ + struct panfrost_screen *screen = pan_screen(ctx->base.screen); + struct panfrost_device *dev = pan_device(ctx->base.screen); + struct panfrost_bo *metadata_bo; + unsigned metadata_offsets[PIPE_MAX_TEXTURE_LEVELS]; + + uint64_t src_modifier = prsrc->image.layout.modifier; + uint64_t dst_modifier = + src_modifier & ~(AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE); + bool is_tiled = src_modifier & AFBC_FORMAT_MOD_TILED; + unsigned last_level = prsrc->base.last_level; + struct pan_image_slice_layout slice_infos[PIPE_MAX_TEXTURE_LEVELS] = {0}; + unsigned total_size = 0; + + /* It doesn't make sense to pack everything if we need to unpack right + * away to upload data to another level */ + for (int i = 0; i <= last_level; i++) { + if (!BITSET_TEST(prsrc->valid.data, i)) + return; + } + + metadata_bo = panfrost_get_afbc_superblock_sizes(ctx, prsrc, 0, last_level, + metadata_offsets); + panfrost_bo_wait(metadata_bo, INT64_MAX, false); + + for (unsigned level = 0; level <= last_level; ++level) { + struct pan_image_slice_layout *src_slice = + &prsrc->image.layout.slices[level]; + struct pan_image_slice_layout *dst_slice = &slice_infos[level]; + + unsigned width = u_minify(prsrc->base.width0, level); + unsigned height = u_minify(prsrc->base.height0, level); + unsigned src_stride = + pan_afbc_stride_blocks(src_modifier, src_slice->row_stride); + unsigned dst_stride = + DIV_ROUND_UP(width, panfrost_afbc_superblock_width(dst_modifier)); + unsigned dst_height = + DIV_ROUND_UP(height, panfrost_afbc_superblock_height(dst_modifier)); + + uint32_t offset = 0; + struct pan_afbc_block_info *meta = + metadata_bo->ptr.cpu + metadata_offsets[level]; + + for (unsigned y = 0, i = 0; y < dst_height; ++y) { + for (unsigned x = 0; x < dst_stride; ++x, ++i) { + unsigned idx = is_tiled ? get_morton_index(x, y, src_stride) : i; + uint32_t size = meta[idx].size; + meta[idx].offset = offset; /* write the start offset */ + offset += size; + } + } + + total_size = ALIGN_POT(total_size, pan_slice_align(dst_modifier)); + { + dst_slice->afbc.stride = dst_stride; + dst_slice->afbc.nr_blocks = dst_stride * dst_height; + dst_slice->afbc.header_size = + ALIGN_POT(dst_stride * dst_height * AFBC_HEADER_BYTES_PER_TILE, + pan_afbc_body_align(dst_modifier)); + dst_slice->afbc.body_size = offset; + dst_slice->afbc.surface_stride = dst_slice->afbc.header_size + offset; + + dst_slice->offset = total_size; + dst_slice->row_stride = dst_stride * AFBC_HEADER_BYTES_PER_TILE; + dst_slice->surface_stride = dst_slice->afbc.surface_stride; + dst_slice->size = dst_slice->afbc.surface_stride; + } + total_size += dst_slice->afbc.surface_stride; + } + + unsigned new_size = ALIGN_POT(total_size, 4096); // FIXME + unsigned old_size = prsrc->image.data.bo->size; + + if (new_size == old_size) + return; + + if (dev->debug & PAN_DBG_PERF) { + printf("%i%%: %i KB -> %i KB\n", 100 * new_size / old_size, + old_size / 1024, new_size / 1024); + } + + struct panfrost_bo *dst = + panfrost_bo_create(dev, new_size, 0, "AFBC compact texture"); + struct panfrost_batch *batch = + panfrost_get_fresh_batch_for_fbo(ctx, "AFBC compaction"); + + for (unsigned level = 0; level <= last_level; ++level) { + struct pan_image_slice_layout *slice = &slice_infos[level]; + screen->vtbl.afbc_pack(batch, prsrc, dst, slice, metadata_bo, + metadata_offsets[level], level); + prsrc->image.layout.slices[level] = *slice; + } + + panfrost_flush_batches_accessing_rsrc(ctx, prsrc, "AFBC compaction flush"); + + prsrc->image.layout.modifier = dst_modifier; + panfrost_bo_unreference(prsrc->image.data.bo); + prsrc->image.data.bo = dst; + panfrost_bo_unreference(metadata_bo); +} + static void panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer) { diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index e2bdd202823..d6d1593e587 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -181,6 +181,9 @@ struct panfrost_bo *panfrost_get_afbc_superblock_sizes( struct panfrost_context *ctx, struct panfrost_resource *rsrc, unsigned first_level, unsigned last_level, unsigned *out_offsets); +void panfrost_pack_afbc(struct panfrost_context *ctx, + struct panfrost_resource *prsrc); + void pan_resource_modifier_convert(struct panfrost_context *ctx, struct panfrost_resource *rsrc, uint64_t modifier, const char *reason); diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 3f5c690d4b0..9c378d6a4a3 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -105,6 +105,13 @@ struct panfrost_vtable { struct panfrost_resource *src, struct panfrost_bo *metadata, unsigned offset, unsigned level); + + /* Run a compute shader to compact a sparse layout afbc resource */ + void (*afbc_pack)(struct panfrost_batch *batch, + struct panfrost_resource *src, struct panfrost_bo *dst, + struct pan_image_slice_layout *slice, + struct panfrost_bo *metadata, unsigned metadata_offset, + unsigned level); }; struct panfrost_screen {