asahi,agx: Use new tilebuffer infrastructure

Flag day change to replace the previous hardcoded background/end-of-tile shaders
and the API-style load/store_output in fragment shaders with the generated
shaders and lowered *_agx intrinsics. This gets us working non-UNORM8 render
targets and working MRT. It's also a step in the direction of working MSAA but
that needs a lot more work, since the multisampling programming model on AGX is
quite different from any of the APIs (including Metal).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19871>
This commit is contained in:
Alyssa Rosenzweig
2022-11-19 13:48:14 -05:00
committed by Marge Bot
parent c5c0ea39f6
commit 74e92274af
11 changed files with 167 additions and 335 deletions

View File

@@ -504,14 +504,8 @@ agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
}
static agx_instr *
agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr)
agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr)
{
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
unsigned loc = sem.location;
assert(sem.dual_source_blend_index == 0 && "todo: dual-source blending");
assert(loc == FRAG_RESULT_DATA0 && "todo: MRT");
unsigned rt = (loc - FRAG_RESULT_DATA0);
/* TODO: Reverse-engineer interactions with MRT */
if (b->shader->key->fs.ignore_tib_dependencies) {
assert(b->shader->nir->info.internal && "only for clear shaders");
@@ -532,19 +526,15 @@ agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr)
b->shader->did_writeout = true;
return agx_st_tile(b, agx_src_index(&instr->src[0]),
b->shader->key->fs.tib_formats[rt],
nir_intrinsic_write_mask(instr));
agx_src_index(&instr->src[1]),
agx_format_for_pipe(nir_intrinsic_format(instr)),
nir_intrinsic_write_mask(instr),
nir_intrinsic_base(instr));
}
static void
agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
agx_emit_local_load_pixel(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
{
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
unsigned loc = sem.location;
assert(sem.dual_source_blend_index == 0 && "dual src ld_tile is nonsense");
assert(loc == FRAG_RESULT_DATA0 && "todo: MRT");
unsigned rt = (loc - FRAG_RESULT_DATA0);
/* TODO: Reverse-engineer interactions with MRT */
assert(!b->shader->key->fs.ignore_tib_dependencies && "invalid usage");
agx_writeout(b, 0x0008);
@@ -552,8 +542,10 @@ agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
b->shader->out->reads_tib = true;
unsigned nr_comps = nir_dest_num_components(instr->dest);
agx_ld_tile_to(b, dest, b->shader->key->fs.tib_formats[rt],
BITFIELD_MASK(nr_comps));
agx_ld_tile_to(b, dest, agx_src_index(&instr->src[0]),
agx_format_for_pipe(nir_intrinsic_format(instr)),
BITFIELD_MASK(nr_comps),
nir_intrinsic_base(instr));
agx_emit_cached_split(b, dest, nr_comps);
}
@@ -770,16 +762,16 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
return NULL;
case nir_intrinsic_store_output:
if (stage == MESA_SHADER_FRAGMENT)
return agx_emit_fragment_out(b, instr);
else if (stage == MESA_SHADER_VERTEX)
return agx_emit_store_vary(b, instr);
else
unreachable("Unsupported shader stage");
assert(stage == MESA_SHADER_VERTEX);
return agx_emit_store_vary(b, instr);
case nir_intrinsic_load_output:
case nir_intrinsic_store_local_pixel_agx:
assert(stage == MESA_SHADER_FRAGMENT);
agx_emit_load_tile(b, dst, instr);
return agx_emit_local_store_pixel(b, instr);
case nir_intrinsic_load_local_pixel_agx:
assert(stage == MESA_SHADER_FRAGMENT);
agx_emit_local_load_pixel(b, dst, instr);
return NULL;
case nir_intrinsic_load_ubo:

View File

@@ -254,8 +254,6 @@ struct agx_vs_shader_key {
};
struct agx_fs_shader_key {
enum agx_format tib_formats[AGX_MAX_RTS];
/* Normally, access to the tilebuffer must be guarded by appropriate fencing
* instructions to ensure correct results in the presence of out-of-order
* hardware optimizations. However, specially dispatched clear shaders are

View File

@@ -300,15 +300,18 @@ typedef struct {
uint32_t component;
uint32_t channels;
uint32_t bfi_mask;
uint16_t pixel_offset;
enum agx_sr sr;
enum agx_icond icond;
enum agx_fcond fcond;
enum agx_format format;
enum agx_round round;
enum agx_lod_mode lod_mode;
struct agx_block *target;
};
/* For local access */
enum agx_format format;
/* For load varying */
bool perspective : 1;

View File

@@ -94,6 +94,7 @@ SHIFT = immediate("shift")
MASK = immediate("mask")
BFI_MASK = immediate("bfi_mask")
LOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
PIXEL_OFFSET = immediate("pixel_offset")
DIM = enum("dim", {
0: '1d',
@@ -250,11 +251,12 @@ op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR])
op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False)
# Essentially same encoding
op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0, imms = [FORMAT, MASK], can_reorder = False)
# Essentially same encoding. Last source is the sample mask
op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 1,
imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False)
op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1,
can_eliminate = False, imms = [FORMAT, MASK])
op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 2,
can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET])
for (name, exact) in [("any", 0xC000), ("none", 0xC200)]:
op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,

View File

@@ -122,6 +122,7 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
/* cmpselsrc takes integer immediates only */
if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
if (I->op == AGX_OPCODE_ST_TILE && s == 0) continue;
if (float_src) {
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
@@ -215,9 +216,11 @@ agx_optimizer_forward(agx_context *ctx)
agx_optimizer_fmov(defs, I);
/* Inline immediates if we can. TODO: systematic */
if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE &&
I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_TEXTURE_SAMPLE &&
I->op != AGX_OPCODE_TEXTURE_LOAD && I->op != AGX_OPCODE_UNIFORM_STORE &&
if (I->op != AGX_OPCODE_ST_VARY &&
I->op != AGX_OPCODE_COLLECT &&
I->op != AGX_OPCODE_TEXTURE_SAMPLE &&
I->op != AGX_OPCODE_TEXTURE_LOAD &&
I->op != AGX_OPCODE_UNIFORM_STORE &&
I->op != AGX_OPCODE_BLOCK_IMAGE_STORE)
agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
}

View File

@@ -426,18 +426,28 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
{
bool load = (I->op == AGX_OPCODE_LD_TILE);
unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]);
unsigned rt = 0; /* TODO */
assert(I->mask < 0x10);
assert(I->pixel_offset < 0x200);
agx_index sample_index = load ? I->src[0] : I->src[1];
assert(sample_index.type == AGX_INDEX_REGISTER ||
sample_index.type == AGX_INDEX_IMMEDIATE);
assert(sample_index.size == AGX_SIZE_16);
unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
unsigned S = sample_index.value;
assert(S < 0x100);
uint64_t raw =
0x09 |
(load ? (1 << 6) : 0) |
agx_opcodes_info[I->op].encoding.exact |
((uint64_t) (D & BITFIELD_MASK(8)) << 7) |
(St << 22) |
((uint64_t) (I->format) << 24) |
((uint64_t) (rt) << 32) |
((uint64_t) (I->pixel_offset & BITFIELD_MASK(7)) << 28) |
(load ? (1ull << 35) : 0) |
((uint64_t) (I->mask) << 36) |
((uint64_t) 0x0380FC << 40) |
((uint64_t) (I->pixel_offset >> 7) << 40) |
((uint64_t) (S & BITFIELD_MASK(6)) << 42) |
((uint64_t) (S >> 6) << 56) |
(((uint64_t) (D >> 8)) << 60);
unsigned size = 8;

View File

@@ -61,18 +61,6 @@ struct agx_device {
pthread_mutex_t bo_map_lock;
struct util_sparse_array bo_map;
/* Fixed shaders */
struct {
struct agx_bo *bo;
uint32_t clear;
uint32_t store;
} internal;
struct {
struct agx_bo *bo;
uint32_t format[AGX_NUM_FORMATS];
} reload;
};
bool

View File

@@ -1,4 +1,4 @@
/*
/*
* Copyright (C) 2021 Alyssa Rosenzweig
* Copyright (C) 2020-2021 Collabora, Ltd.
* Copyright (C) 2014 Broadcom
@@ -28,61 +28,6 @@
#include "asahi/compiler/agx_compile.h"
#include "gallium/auxiliary/util/u_blitter.h"
static void
agx_build_reload_shader(struct agx_device *dev)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
&agx_nir_options, "agx_reload");
nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out,
glsl_vector_type(GLSL_TYPE_FLOAT, 4), "output");
out->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *fragcoord = nir_load_frag_coord(&b);
nir_ssa_def *coord = nir_channels(&b, fragcoord, 0x3);
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
tex->dest_type = nir_type_float32;
tex->sampler_dim = GLSL_SAMPLER_DIM_RECT;
tex->op = nir_texop_tex;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(coord);
tex->coord_components = 2;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
nir_builder_instr_insert(&b, &tex->instr);
nir_store_var(&b, out, &tex->dest.ssa, 0xFF);
unsigned offset = 0;
unsigned bo_size = 4096;
struct agx_bo *bo = agx_bo_create(dev, bo_size, AGX_MEMORY_TYPE_SHADER);
dev->reload.bo = bo;
for (unsigned i = 0; i < AGX_NUM_FORMATS; ++i) {
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
nir_shader *s = nir_shader_clone(NULL, b.shader);
struct agx_shader_info info;
struct agx_shader_key key = {
.fs.tib_formats[0] = i,
.fs.ignore_tib_dependencies = true,
};
agx_preprocess_nir(s);
agx_compile_shader_nir(s, &key, NULL, &binary, &info);
assert(offset + binary.size < bo_size);
memcpy(((uint8_t *) bo->ptr.cpu) + offset, binary.data, binary.size);
dev->reload.format[i] = bo->ptr.gpu + offset;
offset += ALIGN_POT(binary.size, 128);
util_dynarray_fini(&binary);
}
}
void
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
bool render_cond)
@@ -133,51 +78,3 @@ agx_blit(struct pipe_context *pipe,
agx_blitter_save(ctx, ctx->blitter, info->render_condition_enable);
util_blitter_blit(ctx->blitter, info);
}
/* We need some fixed shaders for common rendering tasks. When colour buffer
* reload is not in use, a shader is used to clear a particular colour. At the
* end of rendering a tile, a shader is used to write it out. These shaders are
* too trivial to go through the compiler at this stage. */
#define AGX_STOP \
0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \
0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \
#define AGX_BLEND \
0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03
/* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour
0: 7e018c098040 bitop_mov r0, u6
6: 7e058e098000 bitop_mov r1, u7
c: 09000004f0fc8003 TODO.blend
*/
static uint8_t shader_clear[] = {
0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40,
0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00,
AGX_BLEND,
AGX_STOP
};
static uint8_t shader_store[] = {
0x7e, 0x00, 0x04, 0x09, 0x80, 0x00,
0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00,
AGX_STOP
};
void
agx_internal_shaders(struct agx_device *dev)
{
unsigned clear_offset = 0;
unsigned store_offset = 1024;
struct agx_bo *bo = agx_bo_create(dev, 4096, AGX_MEMORY_TYPE_SHADER);
memcpy(((uint8_t *) bo->ptr.cpu) + clear_offset, shader_clear, sizeof(shader_clear));
memcpy(((uint8_t *) bo->ptr.cpu) + store_offset, shader_store, sizeof(shader_store));
dev->internal.bo = bo;
dev->internal.clear = bo->ptr.gpu + clear_offset;
dev->internal.store = bo->ptr.gpu + store_offset;
agx_build_reload_shader(dev);
}

View File

@@ -668,8 +668,15 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED");
/* Fast clears configure the batch */
if (fastclear & PIPE_CLEAR_COLOR0)
memcpy(batch->clear_color, color->f, sizeof(color->f));
for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
continue;
static_assert(sizeof(color->f) == 16, "mismatched structure");
batch->uploaded_clear_color[rt] =
agx_pool_upload_aligned(&batch->pool, color->f, sizeof(color->f), 16);
}
if (fastclear & PIPE_CLEAR_DEPTH)
batch->clear_depth = depth;
@@ -731,49 +738,21 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
memcpy(batch->encoder_current, stop, sizeof(stop));
/* Emit the commandbuffer */
uint64_t pipeline_clear = 0, pipeline_reload = 0;
uint64_t pipeline_background = agx_build_meta(batch, false, false);
uint64_t pipeline_background_partial = agx_build_meta(batch, false, true);
uint64_t pipeline_store = agx_build_meta(batch, true, false);
bool clear_pipeline_textures = false;
uint16_t clear_colour[4] = {
_mesa_float_to_half(batch->clear_color[0]),
_mesa_float_to_half(batch->clear_color[1]),
_mesa_float_to_half(batch->clear_color[2]),
_mesa_float_to_half(batch->clear_color[3])
};
pipeline_clear = agx_build_clear_pipeline(batch,
dev->internal.clear,
agx_pool_upload(&batch->pool, clear_colour, sizeof(clear_colour)));
if (batch->key.cbufs[0]) {
enum agx_format internal = AGX_FORMAT_U8NORM /* other formats broken */;
uint32_t shader = dev->reload.format[internal];
pipeline_reload = agx_build_reload_pipeline(batch, shader,
batch->key.cbufs[0]);
}
if (batch->key.cbufs[0] && !(batch->clear & PIPE_CLEAR_COLOR0)) {
clear_pipeline_textures = true;
pipeline_clear = pipeline_reload;
}
uint64_t pipeline_store = 0;
if (batch->key.cbufs[0]) {
pipeline_store =
agx_build_store_pipeline(batch,
dev->internal.store,
agx_batch_upload_pbe(batch, 0));
}
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
struct pipe_surface *surf = batch->key.cbufs[i];
if (surf && surf->texture) {
struct agx_resource *rt = agx_resource(surf->texture);
BITSET_SET(rt->data_valid, surf->u.tex.level);
if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
clear_pipeline_textures = true;
}
}
@@ -797,8 +776,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
agx_batch_add_bo(batch, batch->encoder);
agx_batch_add_bo(batch, batch->scissor.bo);
agx_batch_add_bo(batch, batch->depth_bias.bo);
agx_batch_add_bo(batch, dev->internal.bo);
agx_batch_add_bo(batch, dev->reload.bo);
unsigned handle_count =
agx_batch_num_bo(batch) +
@@ -832,8 +809,8 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
encoder_id,
batch->scissor.bo->ptr.gpu,
batch->depth_bias.bo->ptr.gpu,
pipeline_clear,
pipeline_reload,
pipeline_background,
pipeline_background_partial,
pipeline_store,
clear_pipeline_textures,
batch->clear,
@@ -846,8 +823,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
free(handles);
agx_submit_cmdbuf(dev, dev->cmdbuf.handle, dev->memmap.handle, dev->queue.id);
agx_wait_queue(dev->queue);
if (dev->debug & AGX_DBG_TRACE) {
@@ -929,6 +904,7 @@ agx_create_context(struct pipe_screen *screen,
pctx->invalidate_resource = agx_invalidate_resource;
agx_init_state_functions(pctx);
agx_meta_init(&ctx->meta, agx_device(screen), ctx);
ctx->blitter = util_blitter_create(pctx);
@@ -1529,7 +1505,5 @@ agx_screen_create(int fd, struct renderonly *ro, struct sw_winsys *winsys)
U_TRANSFER_HELPER_MSAA_MAP |
U_TRANSFER_HELPER_Z24_IN_Z32F);
agx_internal_shaders(&agx_screen->dev);
return screen;
}

View File

@@ -1140,6 +1140,14 @@ agx_compile_variant(struct agx_device *dev,
}
agx_preprocess_nir(nir);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
struct agx_tilebuffer_layout tib =
agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1);
agx_nir_lower_tilebuffer(nir, &tib);
}
agx_compile_shader_nir(nir, &key->base, debug, &binary, &compiled->info);
if (binary.size) {
@@ -1205,7 +1213,7 @@ agx_create_shader_state(struct pipe_context *pctx,
}
case MESA_SHADER_FRAGMENT:
key.nr_cbufs = 1;
key.base.fs.tib_formats[0] = AGX_FORMAT_U8NORM;
key.rt_formats[0] = PIPE_FORMAT_R8G8B8A8_UNORM;
break;
default:
unreachable("Unknown shader stage in shader-db precompile");
@@ -1275,13 +1283,7 @@ agx_update_fs(struct agx_batch *batch)
for (unsigned i = 0; i < key.nr_cbufs; ++i) {
struct pipe_surface *surf = batch->key.cbufs[i];
if (surf) {
enum pipe_format fmt = surf->format;
key.rt_formats[i] = fmt;
key.base.fs.tib_formats[i] = AGX_FORMAT_U8NORM /* other formats broken */;
} else {
key.rt_formats[i] = PIPE_FORMAT_NONE;
}
key.rt_formats[i] = surf ? surf->format : PIPE_FORMAT_NONE;
}
memcpy(&key.blend, ctx->blend, sizeof(key.blend));
@@ -1417,42 +1419,91 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, enum
return agx_usc_fini(&b);
}
/* Internal pipelines (TODO: refactor?) */
uint64_t
agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf)
agx_build_meta(struct agx_batch *batch, bool store, bool partial_render)
{
struct agx_context *ctx = batch->ctx;
/* Construct the key */
struct agx_meta_key key = {
.tib = batch->tilebuffer_layout
};
for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
struct pipe_surface *surf = batch->key.cbufs[rt];
if (surf == NULL)
continue;
if (store) {
/* TODO: Suppress stores to discarded render targets */
key.op[rt] = AGX_META_OP_STORE;
} else {
bool load = !(batch->clear & (PIPE_CLEAR_COLOR0 << rt));
/* The background program used for partial renders must always load
* whatever was stored in the mid-frame end-of-tile program.
*/
load |= partial_render;
key.op[rt] = load ? AGX_META_OP_LOAD : AGX_META_OP_CLEAR;
}
}
/* Get the shader */
struct agx_meta_shader *shader = agx_get_meta_shader(&ctx->meta, &key);
agx_batch_add_bo(batch, shader->bo);
/* Begin building the pipeline */
struct agx_usc_builder b =
agx_alloc_usc_control(&batch->pipeline_pool, 1);
agx_alloc_usc_control(&batch->pipeline_pool, 1 + PIPE_MAX_COLOR_BUFS);
agx_usc_pack(&b, UNIFORM, cfg) {
cfg.start_halfs = (6 * 2);
cfg.size_halfs = 4;
cfg.buffer = clear_buf;
for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
if (key.op[rt] == AGX_META_OP_LOAD) {
/* Each reloaded render target is textured */
struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64);
struct pipe_surface *surf = batch->key.cbufs[rt];
assert(surf != NULL && "cannot load nonexistant attachment");
struct agx_resource *rsrc = agx_resource(surf->texture);
agx_pack_texture(texture.cpu, rsrc, surf->format, &(struct pipe_sampler_view) {
/* To reduce shader variants, we always use a 2D texture. For
* reloads of arrays and cube maps, we map a single layer as a 2D
* image.
*/
.target = PIPE_TEXTURE_2D,
.swizzle_r = PIPE_SWIZZLE_X,
.swizzle_g = PIPE_SWIZZLE_Y,
.swizzle_b = PIPE_SWIZZLE_Z,
.swizzle_a = PIPE_SWIZZLE_W,
.u.tex = {
.first_layer = surf->u.tex.first_layer,
.last_layer = surf->u.tex.last_layer,
.first_level = surf->u.tex.level,
.last_level = surf->u.tex.level
}
});
agx_usc_pack(&b, TEXTURE, cfg) {
cfg.start = rt;
cfg.count = 1;
cfg.buffer = texture.gpu;
}
} else if (key.op[rt] == AGX_META_OP_CLEAR) {
assert(batch->uploaded_clear_color[rt] && "set when cleared");
agx_usc_uniform(&b, 8 * rt, 8, batch->uploaded_clear_color[rt]);
} else if (key.op[rt] == AGX_META_OP_STORE) {
agx_usc_pack(&b, TEXTURE, cfg) {
cfg.start = rt;
cfg.count = 1;
cfg.buffer = agx_batch_upload_pbe(batch, rt);
}
}
}
agx_usc_pack(&b, SHARED, cfg) {
cfg.uses_shared_memory = true;
cfg.layout = AGX_SHARED_LAYOUT_32X32;
cfg.sample_stride_in_8_bytes = 1;
cfg.bytes_per_threadgroup = 32 * 256;
}
agx_usc_pack(&b, SHADER, cfg) {
cfg.code = code;
cfg.unk_2 = 3;
}
agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 8;
agx_usc_pack(&b, NO_PRESHADER, cfg);
return agx_usc_fini(&b);
}
uint64_t
agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf)
{
/* All render targets share a sampler */
struct agx_ptr sampler = agx_pool_alloc_aligned(&batch->pool, AGX_SAMPLER_LENGTH, 64);
struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64);
agx_pack(sampler.cpu, SAMPLER, cfg) {
cfg.magnify_linear = true;
@@ -1466,63 +1517,17 @@ agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_su
cfg.unk_3 = 0;
}
agx_pack(texture.cpu, TEXTURE, cfg) {
struct agx_resource *rsrc = agx_resource(surf->texture);
unsigned layer = surf->u.tex.first_layer;
const struct util_format_description *desc =
util_format_description(surf->format);
/* To reduce shader variants, we always use a 2D texture. For reloads of
* arrays and cube maps, we map a single layer as a 2D image.
*/
cfg.dimension = AGX_TEXTURE_DIMENSION_2D;
cfg.layout = agx_translate_layout(rsrc->layout.tiling);
cfg.channels = agx_pixel_format[surf->format].channels;
cfg.type = agx_pixel_format[surf->format].type;
cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]);
cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]);
cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]);
cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]);
cfg.width = surf->width;
cfg.height = surf->height;
cfg.first_level = surf->u.tex.level;
cfg.last_level = surf->u.tex.level;
cfg.unk_mipmapped = rsrc->mipmapped;
cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
cfg.address = agx_map_texture_gpu(rsrc, layer);
if (rsrc->layout.tiling == AIL_TILING_LINEAR)
cfg.stride = ail_get_linear_stride_B(&rsrc->layout, surf->u.tex.level) - 16;
else
cfg.unk_tiled = true;
}
struct agx_usc_builder b =
agx_alloc_usc_control(&batch->pipeline_pool, 2);
agx_usc_pack(&b, TEXTURE, cfg) {
cfg.start = 0;
cfg.count = 1;
cfg.buffer = texture.gpu;
}
agx_usc_pack(&b, SAMPLER, cfg) {
cfg.start = 0;
cfg.count = 1;
cfg.buffer = sampler.gpu;
}
agx_usc_pack(&b, SHARED, cfg) {
cfg.uses_shared_memory = true;
cfg.layout = AGX_SHARED_LAYOUT_32X32;
cfg.sample_stride_in_8_bytes = 1;
cfg.sample_count = 1;
cfg.bytes_per_threadgroup = 8 * 32 * 32;
}
agx_usc_tilebuffer(&b, &batch->tilebuffer_layout);
agx_usc_pack(&b, SHADER, cfg) {
cfg.code = code;
cfg.unk_2 = 3;
cfg.code = shader->ptr;
cfg.unk_2 = 0;
}
agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 256;
@@ -1531,40 +1536,6 @@ agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_su
return agx_usc_fini(&b);
}
uint64_t
agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
uint64_t render_target)
{
struct agx_usc_builder b = agx_alloc_usc_control(&batch->pipeline_pool, 2);
agx_usc_pack(&b, TEXTURE, cfg) {
cfg.start = 0;
cfg.count = 1;
cfg.buffer = render_target;
}
uint32_t unk[] = { 0, ~0 };
agx_usc_pack(&b, UNIFORM, cfg) {
cfg.start_halfs = 4;
cfg.size_halfs = 4;
cfg.buffer = agx_pool_upload_aligned(&batch->pool, unk, sizeof(unk), 16);
}
agx_usc_pack(&b, SHARED, cfg) {
cfg.uses_shared_memory = true;
cfg.layout = AGX_SHARED_LAYOUT_32X32;
cfg.sample_stride_in_8_bytes = 1;
cfg.bytes_per_threadgroup = 32 * 256;
}
agx_usc_pack(&b, SHADER, cfg) cfg.code = code;
agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 8;
agx_usc_pack(&b, NO_PRESHADER, cfg);
return agx_usc_fini(&b);
}
void
agx_batch_init_state(struct agx_batch *batch)
{

View File

@@ -39,6 +39,7 @@
#include "compiler/nir/nir_lower_blend.h"
#include "util/hash_table.h"
#include "util/bitset.h"
#include "agx_meta.h"
struct agx_streamout_target {
struct pipe_stream_output_target base;
@@ -106,7 +107,7 @@ struct agx_batch {
/* Base of uploaded texture descriptors */
uint64_t textures;
float clear_color[4];
uint64_t uploaded_clear_color[PIPE_MAX_COLOR_BUFS];
double clear_depth;
unsigned clear_stencil;
@@ -225,6 +226,8 @@ struct agx_context {
/* Map of agx_resource to agx_batch that writes that resource */
struct hash_table *writer;
struct agx_meta_cache meta;
};
static inline struct agx_context *
@@ -368,16 +371,6 @@ agx_push_location(struct agx_batch *batch, struct agx_push push,
bool
agx_batch_is_active(struct agx_batch *batch);
uint64_t
agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf);
uint64_t
agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
uint64_t render_target);
uint64_t
agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf);
uint64_t
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
@@ -448,11 +441,12 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
void agx_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info);
void agx_internal_shaders(struct agx_device *dev);
/* Batch logic */
void
agx_batch_init_state(struct agx_batch *batch);
uint64_t
agx_build_meta(struct agx_batch *batch, bool store, bool partial_render);
#endif