diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index e3a58100321..cdbfb97ce19 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -504,14 +504,8 @@ agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr) } static agx_instr * -agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr) +agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr) { - nir_io_semantics sem = nir_intrinsic_io_semantics(instr); - unsigned loc = sem.location; - assert(sem.dual_source_blend_index == 0 && "todo: dual-source blending"); - assert(loc == FRAG_RESULT_DATA0 && "todo: MRT"); - unsigned rt = (loc - FRAG_RESULT_DATA0); - /* TODO: Reverse-engineer interactions with MRT */ if (b->shader->key->fs.ignore_tib_dependencies) { assert(b->shader->nir->info.internal && "only for clear shaders"); @@ -532,19 +526,15 @@ agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr) b->shader->did_writeout = true; return agx_st_tile(b, agx_src_index(&instr->src[0]), - b->shader->key->fs.tib_formats[rt], - nir_intrinsic_write_mask(instr)); + agx_src_index(&instr->src[1]), + agx_format_for_pipe(nir_intrinsic_format(instr)), + nir_intrinsic_write_mask(instr), + nir_intrinsic_base(instr)); } static void -agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) +agx_emit_local_load_pixel(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { - nir_io_semantics sem = nir_intrinsic_io_semantics(instr); - unsigned loc = sem.location; - assert(sem.dual_source_blend_index == 0 && "dual src ld_tile is nonsense"); - assert(loc == FRAG_RESULT_DATA0 && "todo: MRT"); - unsigned rt = (loc - FRAG_RESULT_DATA0); - /* TODO: Reverse-engineer interactions with MRT */ assert(!b->shader->key->fs.ignore_tib_dependencies && "invalid usage"); agx_writeout(b, 0x0008); @@ -552,8 +542,10 @@ agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) b->shader->out->reads_tib = true; unsigned nr_comps = nir_dest_num_components(instr->dest); - agx_ld_tile_to(b, dest, b->shader->key->fs.tib_formats[rt], - BITFIELD_MASK(nr_comps)); + agx_ld_tile_to(b, dest, agx_src_index(&instr->src[0]), + agx_format_for_pipe(nir_intrinsic_format(instr)), + BITFIELD_MASK(nr_comps), + nir_intrinsic_base(instr)); agx_emit_cached_split(b, dest, nr_comps); } @@ -770,16 +762,16 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) return NULL; case nir_intrinsic_store_output: - if (stage == MESA_SHADER_FRAGMENT) - return agx_emit_fragment_out(b, instr); - else if (stage == MESA_SHADER_VERTEX) - return agx_emit_store_vary(b, instr); - else - unreachable("Unsupported shader stage"); + assert(stage == MESA_SHADER_VERTEX); + return agx_emit_store_vary(b, instr); - case nir_intrinsic_load_output: + case nir_intrinsic_store_local_pixel_agx: assert(stage == MESA_SHADER_FRAGMENT); - agx_emit_load_tile(b, dst, instr); + return agx_emit_local_store_pixel(b, instr); + + case nir_intrinsic_load_local_pixel_agx: + assert(stage == MESA_SHADER_FRAGMENT); + agx_emit_local_load_pixel(b, dst, instr); return NULL; case nir_intrinsic_load_ubo: diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 738890b3795..00cdf0a83b0 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -254,8 +254,6 @@ struct agx_vs_shader_key { }; struct agx_fs_shader_key { - enum agx_format tib_formats[AGX_MAX_RTS]; - /* Normally, access to the tilebuffer must be guarded by appropriate fencing * instructions to ensure correct results in the presence of out-of-order * hardware optimizations. However, specially dispatched clear shaders are diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index 46f76589d6e..36ad3f0f816 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -300,15 +300,18 @@ typedef struct { uint32_t component; uint32_t channels; uint32_t bfi_mask; + uint16_t pixel_offset; enum agx_sr sr; enum agx_icond icond; enum agx_fcond fcond; - enum agx_format format; enum agx_round round; enum agx_lod_mode lod_mode; struct agx_block *target; }; + /* For local access */ + enum agx_format format; + /* For load varying */ bool perspective : 1; diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 7a23794156a..d9877f3bc39 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -94,6 +94,7 @@ SHIFT = immediate("shift") MASK = immediate("mask") BFI_MASK = immediate("bfi_mask") LOD_MODE = immediate("lod_mode", "enum agx_lod_mode") +PIXEL_OFFSET = immediate("pixel_offset") DIM = enum("dim", { 0: '1d', @@ -250,11 +251,12 @@ op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR]) op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False) -# Essentially same encoding -op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0, imms = [FORMAT, MASK], can_reorder = False) +# Essentially same encoding. Last source is the sample mask +op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 1, + imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False) -op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1, - can_eliminate = False, imms = [FORMAT, MASK]) +op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 2, + can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET]) for (name, exact) in [("any", 0xC000), ("none", 0xC200)]: op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0, diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 19f8eccef64..24cc4786963 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -122,6 +122,7 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, /* cmpselsrc takes integer immediates only */ if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false; + if (I->op == AGX_OPCODE_ST_TILE && s == 0) continue; if (float_src) { bool fp16 = (def->dest[0].size == AGX_SIZE_16); @@ -215,9 +216,11 @@ agx_optimizer_forward(agx_context *ctx) agx_optimizer_fmov(defs, I); /* Inline immediates if we can. TODO: systematic */ - if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && - I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_TEXTURE_SAMPLE && - I->op != AGX_OPCODE_TEXTURE_LOAD && I->op != AGX_OPCODE_UNIFORM_STORE && + if (I->op != AGX_OPCODE_ST_VARY && + I->op != AGX_OPCODE_COLLECT && + I->op != AGX_OPCODE_TEXTURE_SAMPLE && + I->op != AGX_OPCODE_TEXTURE_LOAD && + I->op != AGX_OPCODE_UNIFORM_STORE && I->op != AGX_OPCODE_BLOCK_IMAGE_STORE) agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float); } diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 23911d56077..f44e2e5f408 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -426,18 +426,28 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx { bool load = (I->op == AGX_OPCODE_LD_TILE); unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]); - unsigned rt = 0; /* TODO */ assert(I->mask < 0x10); + assert(I->pixel_offset < 0x200); + + agx_index sample_index = load ? I->src[0] : I->src[1]; + assert(sample_index.type == AGX_INDEX_REGISTER || + sample_index.type == AGX_INDEX_IMMEDIATE); + assert(sample_index.size == AGX_SIZE_16); + unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0; + unsigned S = sample_index.value; + assert(S < 0x100); uint64_t raw = - 0x09 | - (load ? (1 << 6) : 0) | + agx_opcodes_info[I->op].encoding.exact | ((uint64_t) (D & BITFIELD_MASK(8)) << 7) | + (St << 22) | ((uint64_t) (I->format) << 24) | - ((uint64_t) (rt) << 32) | + ((uint64_t) (I->pixel_offset & BITFIELD_MASK(7)) << 28) | (load ? (1ull << 35) : 0) | ((uint64_t) (I->mask) << 36) | - ((uint64_t) 0x0380FC << 40) | + ((uint64_t) (I->pixel_offset >> 7) << 40) | + ((uint64_t) (S & BITFIELD_MASK(6)) << 42) | + ((uint64_t) (S >> 6) << 56) | (((uint64_t) (D >> 8)) << 60); unsigned size = 8; diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index 53d035e94a9..78fd81aeb5a 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -61,18 +61,6 @@ struct agx_device { pthread_mutex_t bo_map_lock; struct util_sparse_array bo_map; - - /* Fixed shaders */ - struct { - struct agx_bo *bo; - uint32_t clear; - uint32_t store; - } internal; - - struct { - struct agx_bo *bo; - uint32_t format[AGX_NUM_FORMATS]; - } reload; }; bool diff --git a/src/gallium/drivers/asahi/agx_blit.c b/src/gallium/drivers/asahi/agx_blit.c index bbb95c52b25..16916f95451 100644 --- a/src/gallium/drivers/asahi/agx_blit.c +++ b/src/gallium/drivers/asahi/agx_blit.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2021 Alyssa Rosenzweig * Copyright (C) 2020-2021 Collabora, Ltd. * Copyright (C) 2014 Broadcom @@ -28,61 +28,6 @@ #include "asahi/compiler/agx_compile.h" #include "gallium/auxiliary/util/u_blitter.h" -static void -agx_build_reload_shader(struct agx_device *dev) -{ - nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, - &agx_nir_options, "agx_reload"); - - nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out, - glsl_vector_type(GLSL_TYPE_FLOAT, 4), "output"); - out->data.location = FRAG_RESULT_DATA0; - - nir_ssa_def *fragcoord = nir_load_frag_coord(&b); - nir_ssa_def *coord = nir_channels(&b, fragcoord, 0x3); - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); - tex->dest_type = nir_type_float32; - tex->sampler_dim = GLSL_SAMPLER_DIM_RECT; - tex->op = nir_texop_tex; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(coord); - tex->coord_components = 2; - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); - nir_builder_instr_insert(&b, &tex->instr); - nir_store_var(&b, out, &tex->dest.ssa, 0xFF); - - unsigned offset = 0; - unsigned bo_size = 4096; - - struct agx_bo *bo = agx_bo_create(dev, bo_size, AGX_MEMORY_TYPE_SHADER); - dev->reload.bo = bo; - - for (unsigned i = 0; i < AGX_NUM_FORMATS; ++i) { - struct util_dynarray binary; - util_dynarray_init(&binary, NULL); - - nir_shader *s = nir_shader_clone(NULL, b.shader); - struct agx_shader_info info; - - struct agx_shader_key key = { - .fs.tib_formats[0] = i, - .fs.ignore_tib_dependencies = true, - }; - - agx_preprocess_nir(s); - agx_compile_shader_nir(s, &key, NULL, &binary, &info); - - assert(offset + binary.size < bo_size); - memcpy(((uint8_t *) bo->ptr.cpu) + offset, binary.data, binary.size); - - dev->reload.format[i] = bo->ptr.gpu + offset; - offset += ALIGN_POT(binary.size, 128); - - util_dynarray_fini(&binary); - } -} - void agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter, bool render_cond) @@ -133,51 +78,3 @@ agx_blit(struct pipe_context *pipe, agx_blitter_save(ctx, ctx->blitter, info->render_condition_enable); util_blitter_blit(ctx->blitter, info); } - -/* We need some fixed shaders for common rendering tasks. When colour buffer - * reload is not in use, a shader is used to clear a particular colour. At the - * end of rendering a tile, a shader is used to write it out. These shaders are - * too trivial to go through the compiler at this stage. */ -#define AGX_STOP \ - 0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \ - 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \ - -#define AGX_BLEND \ - 0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03 - -/* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour - - 0: 7e018c098040 bitop_mov r0, u6 - 6: 7e058e098000 bitop_mov r1, u7 - c: 09000004f0fc8003 TODO.blend - */ - -static uint8_t shader_clear[] = { - 0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40, - 0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00, - AGX_BLEND, - AGX_STOP -}; - -static uint8_t shader_store[] = { - 0x7e, 0x00, 0x04, 0x09, 0x80, 0x00, - 0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00, - AGX_STOP -}; - -void -agx_internal_shaders(struct agx_device *dev) -{ - unsigned clear_offset = 0; - unsigned store_offset = 1024; - - struct agx_bo *bo = agx_bo_create(dev, 4096, AGX_MEMORY_TYPE_SHADER); - memcpy(((uint8_t *) bo->ptr.cpu) + clear_offset, shader_clear, sizeof(shader_clear)); - memcpy(((uint8_t *) bo->ptr.cpu) + store_offset, shader_store, sizeof(shader_store)); - - dev->internal.bo = bo; - dev->internal.clear = bo->ptr.gpu + clear_offset; - dev->internal.store = bo->ptr.gpu + store_offset; - - agx_build_reload_shader(dev); -} diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 11668d1cc05..d3f553a1576 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -668,8 +668,15 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED"); /* Fast clears configure the batch */ - if (fastclear & PIPE_CLEAR_COLOR0) - memcpy(batch->clear_color, color->f, sizeof(color->f)); + for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) { + if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt))) + continue; + + static_assert(sizeof(color->f) == 16, "mismatched structure"); + + batch->uploaded_clear_color[rt] = + agx_pool_upload_aligned(&batch->pool, color->f, sizeof(color->f), 16); + } if (fastclear & PIPE_CLEAR_DEPTH) batch->clear_depth = depth; @@ -731,49 +738,21 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 }; memcpy(batch->encoder_current, stop, sizeof(stop)); - /* Emit the commandbuffer */ - uint64_t pipeline_clear = 0, pipeline_reload = 0; + uint64_t pipeline_background = agx_build_meta(batch, false, false); + uint64_t pipeline_background_partial = agx_build_meta(batch, false, true); + uint64_t pipeline_store = agx_build_meta(batch, true, false); + bool clear_pipeline_textures = false; - uint16_t clear_colour[4] = { - _mesa_float_to_half(batch->clear_color[0]), - _mesa_float_to_half(batch->clear_color[1]), - _mesa_float_to_half(batch->clear_color[2]), - _mesa_float_to_half(batch->clear_color[3]) - }; - - pipeline_clear = agx_build_clear_pipeline(batch, - dev->internal.clear, - agx_pool_upload(&batch->pool, clear_colour, sizeof(clear_colour))); - - if (batch->key.cbufs[0]) { - enum agx_format internal = AGX_FORMAT_U8NORM /* other formats broken */; - uint32_t shader = dev->reload.format[internal]; - - pipeline_reload = agx_build_reload_pipeline(batch, shader, - batch->key.cbufs[0]); - } - - if (batch->key.cbufs[0] && !(batch->clear & PIPE_CLEAR_COLOR0)) { - clear_pipeline_textures = true; - pipeline_clear = pipeline_reload; - } - - uint64_t pipeline_store = 0; - - if (batch->key.cbufs[0]) { - pipeline_store = - agx_build_store_pipeline(batch, - dev->internal.store, - agx_batch_upload_pbe(batch, 0)); - } - for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) { struct pipe_surface *surf = batch->key.cbufs[i]; if (surf && surf->texture) { struct agx_resource *rt = agx_resource(surf->texture); BITSET_SET(rt->data_valid, surf->u.tex.level); + + if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i))) + clear_pipeline_textures = true; } } @@ -797,8 +776,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) agx_batch_add_bo(batch, batch->encoder); agx_batch_add_bo(batch, batch->scissor.bo); agx_batch_add_bo(batch, batch->depth_bias.bo); - agx_batch_add_bo(batch, dev->internal.bo); - agx_batch_add_bo(batch, dev->reload.bo); unsigned handle_count = agx_batch_num_bo(batch) + @@ -832,8 +809,8 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) encoder_id, batch->scissor.bo->ptr.gpu, batch->depth_bias.bo->ptr.gpu, - pipeline_clear, - pipeline_reload, + pipeline_background, + pipeline_background_partial, pipeline_store, clear_pipeline_textures, batch->clear, @@ -846,8 +823,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) free(handles); - agx_submit_cmdbuf(dev, dev->cmdbuf.handle, dev->memmap.handle, dev->queue.id); - agx_wait_queue(dev->queue); if (dev->debug & AGX_DBG_TRACE) { @@ -929,6 +904,7 @@ agx_create_context(struct pipe_screen *screen, pctx->invalidate_resource = agx_invalidate_resource; agx_init_state_functions(pctx); + agx_meta_init(&ctx->meta, agx_device(screen), ctx); ctx->blitter = util_blitter_create(pctx); @@ -1529,7 +1505,5 @@ agx_screen_create(int fd, struct renderonly *ro, struct sw_winsys *winsys) U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F); - agx_internal_shaders(&agx_screen->dev); - return screen; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index ab152f9ae81..3ff16a0c897 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1140,6 +1140,14 @@ agx_compile_variant(struct agx_device *dev, } agx_preprocess_nir(nir); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + struct agx_tilebuffer_layout tib = + agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1); + + agx_nir_lower_tilebuffer(nir, &tib); + } + agx_compile_shader_nir(nir, &key->base, debug, &binary, &compiled->info); if (binary.size) { @@ -1205,7 +1213,7 @@ agx_create_shader_state(struct pipe_context *pctx, } case MESA_SHADER_FRAGMENT: key.nr_cbufs = 1; - key.base.fs.tib_formats[0] = AGX_FORMAT_U8NORM; + key.rt_formats[0] = PIPE_FORMAT_R8G8B8A8_UNORM; break; default: unreachable("Unknown shader stage in shader-db precompile"); @@ -1275,13 +1283,7 @@ agx_update_fs(struct agx_batch *batch) for (unsigned i = 0; i < key.nr_cbufs; ++i) { struct pipe_surface *surf = batch->key.cbufs[i]; - if (surf) { - enum pipe_format fmt = surf->format; - key.rt_formats[i] = fmt; - key.base.fs.tib_formats[i] = AGX_FORMAT_U8NORM /* other formats broken */; - } else { - key.rt_formats[i] = PIPE_FORMAT_NONE; - } + key.rt_formats[i] = surf ? surf->format : PIPE_FORMAT_NONE; } memcpy(&key.blend, ctx->blend, sizeof(key.blend)); @@ -1417,42 +1419,91 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, enum return agx_usc_fini(&b); } -/* Internal pipelines (TODO: refactor?) */ uint64_t -agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf) +agx_build_meta(struct agx_batch *batch, bool store, bool partial_render) { + struct agx_context *ctx = batch->ctx; + + /* Construct the key */ + struct agx_meta_key key = { + .tib = batch->tilebuffer_layout + }; + + for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) { + struct pipe_surface *surf = batch->key.cbufs[rt]; + + if (surf == NULL) + continue; + + if (store) { + /* TODO: Suppress stores to discarded render targets */ + key.op[rt] = AGX_META_OP_STORE; + } else { + bool load = !(batch->clear & (PIPE_CLEAR_COLOR0 << rt)); + + /* The background program used for partial renders must always load + * whatever was stored in the mid-frame end-of-tile program. + */ + load |= partial_render; + + key.op[rt] = load ? AGX_META_OP_LOAD : AGX_META_OP_CLEAR; + } + } + + /* Get the shader */ + struct agx_meta_shader *shader = agx_get_meta_shader(&ctx->meta, &key); + agx_batch_add_bo(batch, shader->bo); + + /* Begin building the pipeline */ struct agx_usc_builder b = - agx_alloc_usc_control(&batch->pipeline_pool, 1); + agx_alloc_usc_control(&batch->pipeline_pool, 1 + PIPE_MAX_COLOR_BUFS); - agx_usc_pack(&b, UNIFORM, cfg) { - cfg.start_halfs = (6 * 2); - cfg.size_halfs = 4; - cfg.buffer = clear_buf; + for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) { + if (key.op[rt] == AGX_META_OP_LOAD) { + /* Each reloaded render target is textured */ + struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64); + struct pipe_surface *surf = batch->key.cbufs[rt]; + assert(surf != NULL && "cannot load nonexistant attachment"); + + struct agx_resource *rsrc = agx_resource(surf->texture); + + agx_pack_texture(texture.cpu, rsrc, surf->format, &(struct pipe_sampler_view) { + /* To reduce shader variants, we always use a 2D texture. For + * reloads of arrays and cube maps, we map a single layer as a 2D + * image. + */ + .target = PIPE_TEXTURE_2D, + .swizzle_r = PIPE_SWIZZLE_X, + .swizzle_g = PIPE_SWIZZLE_Y, + .swizzle_b = PIPE_SWIZZLE_Z, + .swizzle_a = PIPE_SWIZZLE_W, + .u.tex = { + .first_layer = surf->u.tex.first_layer, + .last_layer = surf->u.tex.last_layer, + .first_level = surf->u.tex.level, + .last_level = surf->u.tex.level + } + }); + + agx_usc_pack(&b, TEXTURE, cfg) { + cfg.start = rt; + cfg.count = 1; + cfg.buffer = texture.gpu; + } + } else if (key.op[rt] == AGX_META_OP_CLEAR) { + assert(batch->uploaded_clear_color[rt] && "set when cleared"); + agx_usc_uniform(&b, 8 * rt, 8, batch->uploaded_clear_color[rt]); + } else if (key.op[rt] == AGX_META_OP_STORE) { + agx_usc_pack(&b, TEXTURE, cfg) { + cfg.start = rt; + cfg.count = 1; + cfg.buffer = agx_batch_upload_pbe(batch, rt); + } + } } - agx_usc_pack(&b, SHARED, cfg) { - cfg.uses_shared_memory = true; - cfg.layout = AGX_SHARED_LAYOUT_32X32; - cfg.sample_stride_in_8_bytes = 1; - cfg.bytes_per_threadgroup = 32 * 256; - } - - agx_usc_pack(&b, SHADER, cfg) { - cfg.code = code; - cfg.unk_2 = 3; - } - - agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 8; - agx_usc_pack(&b, NO_PRESHADER, cfg); - - return agx_usc_fini(&b); -} - -uint64_t -agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf) -{ + /* All render targets share a sampler */ struct agx_ptr sampler = agx_pool_alloc_aligned(&batch->pool, AGX_SAMPLER_LENGTH, 64); - struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64); agx_pack(sampler.cpu, SAMPLER, cfg) { cfg.magnify_linear = true; @@ -1466,63 +1517,17 @@ agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_su cfg.unk_3 = 0; } - agx_pack(texture.cpu, TEXTURE, cfg) { - struct agx_resource *rsrc = agx_resource(surf->texture); - unsigned layer = surf->u.tex.first_layer; - const struct util_format_description *desc = - util_format_description(surf->format); - - /* To reduce shader variants, we always use a 2D texture. For reloads of - * arrays and cube maps, we map a single layer as a 2D image. - */ - cfg.dimension = AGX_TEXTURE_DIMENSION_2D; - cfg.layout = agx_translate_layout(rsrc->layout.tiling); - cfg.channels = agx_pixel_format[surf->format].channels; - cfg.type = agx_pixel_format[surf->format].type; - cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]); - cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]); - cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]); - cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]); - cfg.width = surf->width; - cfg.height = surf->height; - cfg.first_level = surf->u.tex.level; - cfg.last_level = surf->u.tex.level; - cfg.unk_mipmapped = rsrc->mipmapped; - cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); - cfg.address = agx_map_texture_gpu(rsrc, layer); - - if (rsrc->layout.tiling == AIL_TILING_LINEAR) - cfg.stride = ail_get_linear_stride_B(&rsrc->layout, surf->u.tex.level) - 16; - else - cfg.unk_tiled = true; - } - - struct agx_usc_builder b = - agx_alloc_usc_control(&batch->pipeline_pool, 2); - - agx_usc_pack(&b, TEXTURE, cfg) { - cfg.start = 0; - cfg.count = 1; - cfg.buffer = texture.gpu; - } - agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = sampler.gpu; } - agx_usc_pack(&b, SHARED, cfg) { - cfg.uses_shared_memory = true; - cfg.layout = AGX_SHARED_LAYOUT_32X32; - cfg.sample_stride_in_8_bytes = 1; - cfg.sample_count = 1; - cfg.bytes_per_threadgroup = 8 * 32 * 32; - } + agx_usc_tilebuffer(&b, &batch->tilebuffer_layout); agx_usc_pack(&b, SHADER, cfg) { - cfg.code = code; - cfg.unk_2 = 3; + cfg.code = shader->ptr; + cfg.unk_2 = 0; } agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 256; @@ -1531,40 +1536,6 @@ agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_su return agx_usc_fini(&b); } -uint64_t -agx_build_store_pipeline(struct agx_batch *batch, uint32_t code, - uint64_t render_target) -{ - struct agx_usc_builder b = agx_alloc_usc_control(&batch->pipeline_pool, 2); - - agx_usc_pack(&b, TEXTURE, cfg) { - cfg.start = 0; - cfg.count = 1; - cfg.buffer = render_target; - } - - uint32_t unk[] = { 0, ~0 }; - - agx_usc_pack(&b, UNIFORM, cfg) { - cfg.start_halfs = 4; - cfg.size_halfs = 4; - cfg.buffer = agx_pool_upload_aligned(&batch->pool, unk, sizeof(unk), 16); - } - - agx_usc_pack(&b, SHARED, cfg) { - cfg.uses_shared_memory = true; - cfg.layout = AGX_SHARED_LAYOUT_32X32; - cfg.sample_stride_in_8_bytes = 1; - cfg.bytes_per_threadgroup = 32 * 256; - } - - agx_usc_pack(&b, SHADER, cfg) cfg.code = code; - agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 8; - agx_usc_pack(&b, NO_PRESHADER, cfg); - - return agx_usc_fini(&b); -} - void agx_batch_init_state(struct agx_batch *batch) { diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 310196b926a..d67f675caf5 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -39,6 +39,7 @@ #include "compiler/nir/nir_lower_blend.h" #include "util/hash_table.h" #include "util/bitset.h" +#include "agx_meta.h" struct agx_streamout_target { struct pipe_stream_output_target base; @@ -106,7 +107,7 @@ struct agx_batch { /* Base of uploaded texture descriptors */ uint64_t textures; - float clear_color[4]; + uint64_t uploaded_clear_color[PIPE_MAX_COLOR_BUFS]; double clear_depth; unsigned clear_stencil; @@ -225,6 +226,8 @@ struct agx_context { /* Map of agx_resource to agx_batch that writes that resource */ struct hash_table *writer; + + struct agx_meta_cache meta; }; static inline struct agx_context * @@ -368,16 +371,6 @@ agx_push_location(struct agx_batch *batch, struct agx_push push, bool agx_batch_is_active(struct agx_batch *batch); -uint64_t -agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf); - -uint64_t -agx_build_store_pipeline(struct agx_batch *batch, uint32_t code, - uint64_t render_target); - -uint64_t -agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf); - uint64_t agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt); @@ -448,11 +441,12 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter, void agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info); -void agx_internal_shaders(struct agx_device *dev); - /* Batch logic */ void agx_batch_init_state(struct agx_batch *batch); +uint64_t +agx_build_meta(struct agx_batch *batch, bool store, bool partial_render); + #endif