Files
third_party_mesa3d/src/asahi/compiler/agx_compiler.h
Alyssa Rosenzweig db0461a8d0 agx: Implement nir_texop_txf_ms
Mutlisampled texture fetch (txf_ms) is encoded like regular txf. However, we now
need to pack the multisample index in the right place, which we do by extending
our existing NIR texture source lowering pass. 2D MS arrays use a new value of
dim which requires tweaking the encoding slightly. Otherwise, everything is
bog standard.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19871>
2022-11-19 20:25:41 +00:00

814 lines
20 KiB
C

/*
* Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
* Copyright (C) 2020 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __AGX_COMPILER_H
#define __AGX_COMPILER_H
#include "compiler/nir/nir.h"
#include "util/u_math.h"
#include "util/half_float.h"
#include "util/u_dynarray.h"
#include "util/u_worklist.h"
#include "agx_compile.h"
#include "agx_opcodes.h"
#include "agx_minifloat.h"
#ifdef __cplusplus
extern "C" {
#endif
enum agx_dbg {
AGX_DBG_MSGS = BITFIELD_BIT(0),
AGX_DBG_SHADERS = BITFIELD_BIT(1),
AGX_DBG_SHADERDB = BITFIELD_BIT(2),
AGX_DBG_VERBOSE = BITFIELD_BIT(3),
AGX_DBG_INTERNAL = BITFIELD_BIT(4),
AGX_DBG_NOVALIDATE = BITFIELD_BIT(5),
AGX_DBG_NOOPT = BITFIELD_BIT(6),
};
extern int agx_debug;
/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
#define AGX_NUM_REGS (256)
/* u0-u255 inclusive, as pairs of 16-bits */
#define AGX_NUM_UNIFORMS (512)
enum agx_index_type {
AGX_INDEX_NULL = 0,
AGX_INDEX_NORMAL = 1,
AGX_INDEX_IMMEDIATE = 2,
AGX_INDEX_UNIFORM = 3,
AGX_INDEX_REGISTER = 4,
};
enum agx_size {
AGX_SIZE_16 = 0,
AGX_SIZE_32 = 1,
AGX_SIZE_64 = 2
};
static inline unsigned
agx_size_align_16(enum agx_size size)
{
switch (size) {
case AGX_SIZE_16: return 1;
case AGX_SIZE_32: return 2;
case AGX_SIZE_64: return 4;
}
unreachable("Invalid size");
}
/* Keep synced with hash_index */
typedef struct {
/* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
unsigned value : 22;
/* Indicates that this source kills the referenced value (because it is the
* last use in a block and the source is not live after the block). Set by
* liveness analysis. */
bool kill : 1;
/* Cache hints */
bool cache : 1;
bool discard : 1;
/* src - float modifiers */
bool abs : 1;
bool neg : 1;
enum agx_size size : 2;
enum agx_index_type type : 3;
} agx_index;
static inline agx_index
agx_get_index(unsigned value, enum agx_size size)
{
return (agx_index) {
.value = value,
.size = size,
.type = AGX_INDEX_NORMAL,
};
}
static inline agx_index
agx_immediate(uint32_t imm)
{
assert(imm < (1 << 16) && "overflowed immediate");
return (agx_index) {
.value = imm,
.size = AGX_SIZE_16,
.type = AGX_INDEX_IMMEDIATE,
};
}
static inline agx_index
agx_immediate_f(float f)
{
assert(agx_minifloat_exact(f));
return agx_immediate(agx_minifloat_encode(f));
}
/* in half-words, specify r0h as 1, r1 as 2... */
static inline agx_index
agx_register(uint32_t imm, enum agx_size size)
{
assert(imm < AGX_NUM_REGS);
return (agx_index) {
.value = imm,
.size = size,
.type = AGX_INDEX_REGISTER,
};
}
/* Also in half-words */
static inline agx_index
agx_uniform(uint32_t imm, enum agx_size size)
{
assert(imm < AGX_NUM_UNIFORMS);
return (agx_index) {
.value = imm,
.size = size,
.type = AGX_INDEX_UNIFORM,
};
}
static inline agx_index
agx_null()
{
return (agx_index) { .type = AGX_INDEX_NULL };
}
static inline agx_index
agx_zero()
{
return agx_immediate(0);
}
/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
* = exponent = 0, sign bit set */
static inline agx_index
agx_negzero()
{
return agx_immediate(0x80);
}
static inline agx_index
agx_abs(agx_index idx)
{
idx.abs = true;
idx.neg = false;
return idx;
}
static inline agx_index
agx_neg(agx_index idx)
{
idx.neg ^= true;
return idx;
}
/* Replaces an index, preserving any modifiers */
static inline agx_index
agx_replace_index(agx_index old, agx_index replacement)
{
replacement.abs = old.abs;
replacement.neg = old.neg;
return replacement;
}
static inline bool
agx_is_null(agx_index idx)
{
return idx.type == AGX_INDEX_NULL;
}
/* Compares equivalence as references */
static inline bool
agx_is_equiv(agx_index left, agx_index right)
{
return (left.type == right.type) && (left.value == right.value);
}
enum agx_icond {
AGX_ICOND_UEQ = 0,
AGX_ICOND_ULT = 1,
AGX_ICOND_UGT = 2,
/* unknown */
AGX_ICOND_SEQ = 4,
AGX_ICOND_SLT = 5,
AGX_ICOND_SGT = 6,
/* unknown */
};
enum agx_fcond {
AGX_FCOND_EQ = 0,
AGX_FCOND_LT = 1,
AGX_FCOND_GT = 2,
AGX_FCOND_LTN = 3,
/* unknown */
AGX_FCOND_GE = 5,
AGX_FCOND_LE = 6,
AGX_FCOND_GTN = 7,
};
enum agx_round {
AGX_ROUND_RTZ = 0,
AGX_ROUND_RTE = 1,
};
enum agx_convert {
AGX_CONVERT_U8_TO_F = 0,
AGX_CONVERT_S8_TO_F = 1,
AGX_CONVERT_F_TO_U16 = 4,
AGX_CONVERT_F_TO_S16 = 5,
AGX_CONVERT_U16_TO_F = 6,
AGX_CONVERT_S16_TO_F = 7,
AGX_CONVERT_F_TO_U32 = 8,
AGX_CONVERT_F_TO_S32 = 9,
AGX_CONVERT_U32_TO_F = 10,
AGX_CONVERT_S32_TO_F = 11
};
enum agx_lod_mode {
AGX_LOD_MODE_AUTO_LOD = 0,
AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
AGX_LOD_MODE_LOD_MIN = 6,
AGX_LOD_MODE_LOD_GRAD = 4,
AGX_LOD_MODE_LOD_GRAD_MIN = 12
};
/* Forward declare for branch target */
struct agx_block;
/* Keep synced with hash_instr */
typedef struct {
/* Must be first */
struct list_head link;
/* The sources list.
*
* As a special case to workaround ordering issues when translating phis, if
* nr_srcs == 0 and the opcode is PHI, holds a pointer to the NIR phi node.
*/
union {
agx_index *src;
nir_phi_instr *phi;
};
/* Data flow */
agx_index *dest;
enum agx_opcode op;
uint8_t nr_dests;
uint8_t nr_srcs;
union {
uint64_t imm;
uint32_t writeout;
uint32_t truth_table;
uint32_t component;
uint32_t channels;
uint32_t bfi_mask;
enum agx_sr sr;
enum agx_icond icond;
enum agx_fcond fcond;
enum agx_format format;
enum agx_round round;
enum agx_lod_mode lod_mode;
struct agx_block *target;
};
/* For load varying */
bool perspective : 1;
/* Invert icond/fcond */
bool invert_cond : 1;
/* TODO: Handle tex ops more efficient */
enum agx_dim dim : 4;
bool offset : 1;
bool shadow : 1;
/* Final st_vary op */
bool last : 1;
/* Shift for a bitwise or memory op (conflicts with format for memory ops) */
unsigned shift : 4;
/* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
* scoreboarding (everything but memory load/store and texturing). */
unsigned scoreboard : 1;
/* Number of nested control flow layers to jump by */
unsigned nest : 2;
/* Output modifiers */
bool saturate : 1;
unsigned mask : 4;
} agx_instr;
static inline void
agx_replace_src(agx_instr *I, unsigned src_index, agx_index replacement)
{
I->src[src_index] = agx_replace_index(I->src[src_index], replacement);
}
struct agx_block;
typedef struct agx_block {
/* Link to next block. Must be first */
struct list_head link;
/* List of instructions emitted for the current block */
struct list_head instructions;
/* Index of the block in source order */
unsigned index;
/* Control flow graph */
struct agx_block *successors[2];
struct util_dynarray predecessors;
bool unconditional_jumps;
/* Liveness analysis results */
BITSET_WORD *live_in;
BITSET_WORD *live_out;
/* Register allocation */
BITSET_DECLARE(regs_out, AGX_NUM_REGS);
/* Offset of the block in the emitted binary */
off_t offset;
/** Available for passes to use for metadata */
uint8_t pass_flags;
} agx_block;
typedef struct {
nir_shader *nir;
gl_shader_stage stage;
struct list_head blocks; /* list of agx_block */
struct agx_shader_info *out;
struct agx_shader_key *key;
/* Maximum block index */
unsigned num_blocks;
/* For creating temporaries */
unsigned alloc;
/* I don't really understand how writeout ops work yet */
bool did_writeout;
/* Has r0l been zeroed yet due to control flow? */
bool any_cf;
/* Number of nested control flow structures within the innermost loop. Since
* NIR is just loop and if-else, this is the number of nested if-else
* statements in the loop */
unsigned loop_nesting;
/* During instruction selection, for inserting control flow */
agx_block *current_block;
agx_block *continue_block;
agx_block *break_block;
agx_block *after_block;
agx_block **indexed_nir_blocks;
/* During instruction selection, map from vector agx_index to its scalar
* components, populated by a split. */
struct hash_table_u64 *allocated_vec;
/* During instruction selection, preloaded values,
* or NULL if it hasn't been preloaded
*/
agx_index vertex_id, instance_id;
/* Stats for shader-db */
unsigned loop_count;
unsigned spills;
unsigned fills;
unsigned max_reg;
} agx_context;
static inline void
agx_remove_instruction(agx_instr *ins)
{
list_del(&ins->link);
}
static inline agx_index
agx_temp(agx_context *ctx, enum agx_size size)
{
return agx_get_index(ctx->alloc++, size);
}
static enum agx_size
agx_size_for_bits(unsigned bits)
{
switch (bits) {
case 1:
case 16: return AGX_SIZE_16;
case 32: return AGX_SIZE_32;
case 64: return AGX_SIZE_64;
default: unreachable("Invalid bitsize");
}
}
static inline agx_index
agx_src_index(nir_src *src)
{
assert(src->is_ssa);
return agx_get_index(src->ssa->index,
agx_size_for_bits(nir_src_bit_size(*src)));
}
static inline agx_index
agx_dest_index(nir_dest *dst)
{
assert(dst->is_ssa);
return agx_get_index(dst->ssa.index,
agx_size_for_bits(nir_dest_bit_size(*dst)));
}
static inline agx_index
agx_vec_for_dest(agx_context *ctx, nir_dest *dest)
{
return agx_temp(ctx, agx_size_for_bits(nir_dest_bit_size(*dest)));
}
static inline agx_index
agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
{
return agx_vec_for_dest(ctx, &instr->dest);
}
static inline unsigned
agx_num_predecessors(agx_block *block)
{
return util_dynarray_num_elements(&block->predecessors, agx_block *);
}
static inline agx_block *
agx_start_block(agx_context *ctx)
{
agx_block *first = list_first_entry(&ctx->blocks, agx_block, link);
assert(agx_num_predecessors(first) == 0);
return first;
}
/* Iterators for AGX IR */
#define agx_foreach_block(ctx, v) \
list_for_each_entry(agx_block, v, &ctx->blocks, link)
#define agx_foreach_block_rev(ctx, v) \
list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
#define agx_foreach_block_from(ctx, from, v) \
list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
#define agx_foreach_block_from_rev(ctx, from, v) \
list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
#define agx_foreach_instr_in_block(block, v) \
list_for_each_entry(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_rev(block, v) \
list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_safe(block, v) \
list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_safe_rev(block, v) \
list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_from(block, v, from) \
list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
#define agx_foreach_instr_in_block_from_rev(block, v, from) \
list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)
#define agx_foreach_instr_global(ctx, v) \
agx_foreach_block(ctx, v_block) \
agx_foreach_instr_in_block(v_block, v)
#define agx_foreach_instr_global_rev(ctx, v) \
agx_foreach_block_rev(ctx, v_block) \
agx_foreach_instr_in_block_rev(v_block, v)
#define agx_foreach_instr_global_safe(ctx, v) \
agx_foreach_block(ctx, v_block) \
agx_foreach_instr_in_block_safe(v_block, v)
#define agx_foreach_instr_global_safe_rev(ctx, v) \
agx_foreach_block_rev(ctx, v_block) \
agx_foreach_instr_in_block_safe_rev(v_block, v)
/* Based on set_foreach, expanded with automatic type casts */
#define agx_foreach_successor(blk, v) \
agx_block *v; \
agx_block **_v; \
for (_v = (agx_block **) &blk->successors[0], \
v = *_v; \
v != NULL && _v < (agx_block **) &blk->successors[2]; \
_v++, v = *_v) \
#define agx_foreach_predecessor(blk, v) \
util_dynarray_foreach(&blk->predecessors, agx_block *, v)
#define agx_foreach_src(ins, v) \
for (unsigned v = 0; v < ins->nr_srcs; ++v)
#define agx_foreach_dest(ins, v) \
for (unsigned v = 0; v < ins->nr_dests; ++v)
#define agx_foreach_ssa_src(ins, v) \
agx_foreach_src(ins, v) \
if (ins->src[v].type == AGX_INDEX_NORMAL)
#define agx_foreach_ssa_dest(ins, v) \
agx_foreach_dest(ins, v) \
if (ins->dest[v].type == AGX_INDEX_NORMAL)
/* Phis only come at the start so we stop as soon as we hit a non-phi */
#define agx_foreach_phi_in_block(block, v) \
agx_foreach_instr_in_block(block, v) \
if (v->op != AGX_OPCODE_PHI) \
break; \
else
/* Everything else comes after, so we stop as soon as we hit a phi in reverse */
#define agx_foreach_non_phi_in_block_rev(block, v) \
agx_foreach_instr_in_block_rev(block, v) \
if (v->op == AGX_OPCODE_PHI) \
break; \
else
/*
* Find the index of a predecessor, used as the implicit order of phi sources.
*/
static inline unsigned
agx_predecessor_index(agx_block *succ, agx_block *pred)
{
unsigned index = 0;
agx_foreach_predecessor(succ, x) {
if (*x == pred) return index;
index++;
}
unreachable("Invalid predecessor");
}
static inline agx_instr *
agx_prev_op(agx_instr *ins)
{
return list_last_entry(&(ins->link), agx_instr, link);
}
static inline agx_instr *
agx_next_op(agx_instr *ins)
{
return list_first_entry(&(ins->link), agx_instr, link);
}
static inline agx_block *
agx_next_block(agx_block *block)
{
return list_first_entry(&(block->link), agx_block, link);
}
static inline agx_block *
agx_exit_block(agx_context *ctx)
{
agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
assert(!last->successors[0] && !last->successors[1]);
return last;
}
#define agx_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
#define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
#define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
#define agx_worklist_peek_head(w) u_worklist_peek_head(w, agx_block, index)
#define agx_worklist_pop_head(w) u_worklist_pop_head( w, agx_block, index)
#define agx_worklist_peek_tail(w) u_worklist_peek_tail(w, agx_block, index)
#define agx_worklist_pop_tail(w) u_worklist_pop_tail( w, agx_block, index)
/* Like in NIR, for use with the builder */
enum agx_cursor_option {
agx_cursor_after_block,
agx_cursor_before_instr,
agx_cursor_after_instr
};
typedef struct {
enum agx_cursor_option option;
union {
agx_block *block;
agx_instr *instr;
};
} agx_cursor;
static inline agx_cursor
agx_after_block(agx_block *block)
{
return (agx_cursor) {
.option = agx_cursor_after_block,
.block = block
};
}
static inline agx_cursor
agx_before_instr(agx_instr *instr)
{
return (agx_cursor) {
.option = agx_cursor_before_instr,
.instr = instr
};
}
static inline agx_cursor
agx_after_instr(agx_instr *instr)
{
return (agx_cursor) {
.option = agx_cursor_after_instr,
.instr = instr
};
}
/*
* Get a cursor inserting at the logical end of the block. In particular, this
* is before branches or control flow instructions, which occur after the
* logical end but before the physical end.
*/
static inline agx_cursor
agx_after_block_logical(agx_block *block)
{
/* Search for a p_logical_end */
agx_foreach_instr_in_block_rev(block, I) {
if (I->op == AGX_OPCODE_LOGICAL_END)
return agx_before_instr(I);
}
/* If there's no p_logical_end, use the physical end */
return agx_after_block(block);
}
static inline agx_cursor
agx_before_nonempty_block(agx_block *block)
{
agx_instr *I = list_first_entry(&block->instructions, agx_instr, link);
assert(I != NULL);
return agx_before_instr(I);
}
static inline agx_cursor
agx_before_block(agx_block *block)
{
if (list_is_empty(&block->instructions))
return agx_after_block(block);
else
return agx_before_nonempty_block(block);
}
/* IR builder in terms of cursor infrastructure */
typedef struct {
agx_context *shader;
agx_cursor cursor;
} agx_builder;
static inline agx_builder
agx_init_builder(agx_context *ctx, agx_cursor cursor)
{
return (agx_builder) {
.shader = ctx,
.cursor = cursor
};
}
/* Insert an instruction at the cursor and move the cursor */
static inline void
agx_builder_insert(agx_cursor *cursor, agx_instr *I)
{
switch (cursor->option) {
case agx_cursor_after_instr:
list_add(&I->link, &cursor->instr->link);
cursor->instr = I;
return;
case agx_cursor_after_block:
list_addtail(&I->link, &cursor->block->instructions);
cursor->option = agx_cursor_after_instr;
cursor->instr = I;
return;
case agx_cursor_before_instr:
list_addtail(&I->link, &cursor->instr->link);
cursor->option = agx_cursor_after_instr;
cursor->instr = I;
return;
}
unreachable("Invalid cursor option");
}
/* Uniform file management */
agx_index
agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,
unsigned index, unsigned length);
agx_index
agx_vbo_base(agx_context *ctx, unsigned vbo);
/* Routines defined for AIR */
void agx_print_instr(agx_instr *I, FILE *fp);
void agx_print_block(agx_block *block, FILE *fp);
void agx_print_shader(agx_context *ctx, FILE *fp);
void agx_optimizer(agx_context *ctx);
void agx_lower_pseudo(agx_context *ctx);
void agx_opt_cse(agx_context *ctx);
void agx_dce(agx_context *ctx);
void agx_ra(agx_context *ctx);
void agx_lower_64bit_postra(agx_context *ctx);
void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
#ifndef NDEBUG
void agx_validate(agx_context *ctx, const char *after_str);
#else
static inline void agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str) { return; }
#endif
unsigned agx_write_registers(agx_instr *I, unsigned d);
struct agx_copy {
/* Base register destination of the copy */
unsigned dest;
/* Source of the copy */
agx_index src;
/* Whether the copy has been handled. Callers must leave to false. */
bool done;
};
void
agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies, unsigned n);
void agx_compute_liveness(agx_context *ctx);
void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
bool agx_lower_resinfo(nir_shader *s);
bool agx_nir_lower_array_texture(nir_shader *s);
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
bool agx_nir_lower_load_mask(nir_shader *shader);
#ifdef __cplusplus
} /* extern C */
#endif
#endif