diff --git a/docs/features.txt b/docs/features.txt index 965c05a1a79..40ebfd68028 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -108,7 +108,7 @@ GL 3.3, GLSL 3.30 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llv GL_ARB_texture_swizzle DONE (v3d, vc4, panfrost, lima, asahi) GL_ARB_timer_query DONE () GL_ARB_instanced_arrays DONE (etnaviv/HALTI2, v3d, panfrost) - GL_ARB_vertex_type_2_10_10_10_rev DONE (v3d, panfrost) + GL_ARB_vertex_type_2_10_10_10_rev DONE (v3d, panfrost, asahi) GL 4.0, GLSL 4.00 --- all DONE: freedreno/a6xx, i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl, zink, d3d12 @@ -208,7 +208,7 @@ GL 4.4, GLSL 4.40 -- all DONE: freedreno/a6xx, i965/gen8+, nvc0, r600, radeonsi, GL_ARB_query_buffer_object DONE (freedreno/a6xx, i965/hsw+, virgl) GL_ARB_texture_mirror_clamp_to_edge DONE (freedreno, i965, nv50, softpipe, virgl, v3d, panfrost) GL_ARB_texture_stencil8 DONE (freedreno, i965/hsw+, nv50, softpipe, virgl, v3d, panfrost, d3d12, asahi) - GL_ARB_vertex_type_10f_11f_11f_rev DONE (freedreno, i965, nv50, softpipe, virgl, panfrost, d3d12) + GL_ARB_vertex_type_10f_11f_11f_rev DONE (freedreno, i965, nv50, softpipe, virgl, panfrost, d3d12, asahi) GL 4.5, GLSL 4.50 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, zink diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 8deae01f4b3..a27e27671b7 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -358,61 +358,6 @@ agx_format_for_pipe(enum pipe_format format) unreachable("Invalid format"); } -/* AGX appears to lack support for vertex attributes. Lower to global loads. */ -static void -agx_emit_load_attr(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) -{ - nir_src *offset_src = nir_get_io_offset_src(instr); - assert(nir_src_is_const(*offset_src) && "no attribute indirects"); - unsigned index = nir_intrinsic_base(instr) + - nir_src_as_uint(*offset_src); - - struct agx_shader_key *key = b->shader->key; - struct agx_attribute attrib = key->vs.attributes[index]; - - /* address = base + (stride * vertex_id) + src_offset */ - unsigned buf = attrib.buf; - unsigned stride = key->vs.vbuf_strides[buf]; - unsigned shift = agx_format_shift(attrib.format); - - agx_index shifted_stride = agx_mov_imm(b, 32, stride >> shift); - agx_index src_offset = agx_mov_imm(b, 32, attrib.src_offset); - - /* A nonzero divisor requires dividing the instance ID. A zero divisor - * specifies per-instance data. */ - agx_index element_id = (attrib.divisor == 0) ? agx_vertex_id(b) : - agx_udiv_const(b, agx_instance_id(b), attrib.divisor); - - agx_index offset = agx_imad(b, element_id, shifted_stride, src_offset, 0); - - /* Each VBO has a 64-bit = 4 x 16-bit address, lookup the base address as a - * sysval. Mov around the base to handle uniform restrictions, copyprop will - * usually clean that up. - */ - agx_index base = agx_mov(b, agx_vbo_base(b->shader, buf)); - - /* Load the data */ - assert(instr->num_components <= 4); - - unsigned actual_comps = (attrib.nr_comps_minus_1 + 1); - agx_index vec = agx_vec_for_dest(b->shader, &instr->dest); - agx_device_load_to(b, vec, base, offset, attrib.format, - BITFIELD_MASK(attrib.nr_comps_minus_1 + 1), 0, 0); - agx_wait(b, 0); - - agx_index dests[4] = { agx_null() }; - agx_emit_split(b, dests, vec, actual_comps); - - agx_index one = agx_mov_imm(b, 32, fui(1.0)); - agx_index zero = agx_mov_imm(b, 32, 0); - agx_index default_value[4] = { zero, zero, zero, one }; - - for (unsigned i = actual_comps; i < instr->num_components; ++i) - dests[i] = default_value[i]; - - agx_emit_collect_to(b, dest, instr->num_components, dests); -} - static void agx_emit_load_vary_flat(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { @@ -733,13 +678,8 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) return NULL; case nir_intrinsic_load_input: - if (stage == MESA_SHADER_FRAGMENT) - agx_emit_load_vary_flat(b, dst, instr); - else if (stage == MESA_SHADER_VERTEX) - agx_emit_load_attr(b, dst, instr); - else - unreachable("Unsupported shader stage"); - + assert(stage == MESA_SHADER_FRAGMENT && "vertex loads lowered"); + agx_emit_load_vary_flat(b, dst, instr); return NULL; case nir_intrinsic_load_global: @@ -785,6 +725,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) nir_src_as_uint(instr->src[0]) * 4, b->shader->nir->info.num_ubos * 4)); + case nir_intrinsic_load_vbo_base_agx: + return agx_mov_to(b, dst, + agx_vbo_base(b->shader, nir_src_as_uint(instr->src[0]))); + case nir_intrinsic_load_vertex_id: return agx_mov_to(b, dst, agx_abs(agx_vertex_id(b))); diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 00cdf0a83b0..e549ee181d7 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -182,8 +182,6 @@ struct agx_shader_info { }; #define AGX_MAX_RTS (8) -#define AGX_MAX_ATTRIBS (16) -#define AGX_MAX_VBUFS (16) enum agx_format { AGX_FORMAT_I8 = 0, @@ -203,56 +201,6 @@ enum agx_format { AGX_NUM_FORMATS, }; -/* Returns the number of bits at the bottom of the address required to be zero. - * That is, returns the base-2 logarithm of the minimum alignment for an - * agx_format, where the minimum alignment is 2^n where n is the result of this - * function. The offset argument to device_load is left-shifted by this amount - * in the hardware */ - -static inline unsigned -agx_format_shift(enum agx_format format) -{ - switch (format) { - case AGX_FORMAT_I8: - case AGX_FORMAT_U8NORM: - case AGX_FORMAT_S8NORM: - case AGX_FORMAT_SRGBA8: - return 0; - - case AGX_FORMAT_I16: - case AGX_FORMAT_F16: - case AGX_FORMAT_U16NORM: - case AGX_FORMAT_S16NORM: - return 1; - - case AGX_FORMAT_I32: - case AGX_FORMAT_RGB10A2: - case AGX_FORMAT_RG11B10F: - case AGX_FORMAT_RGB9E5: - return 2; - - default: - unreachable("invalid format"); - } -} - -struct agx_attribute { - uint32_t divisor; - - unsigned buf : 5; - unsigned src_offset : 16; - unsigned nr_comps_minus_1 : 2; - enum agx_format format : 4; - unsigned padding : 5; -}; - -struct agx_vs_shader_key { - unsigned num_vbufs; - unsigned vbuf_strides[AGX_MAX_VBUFS]; - - struct agx_attribute attributes[AGX_MAX_ATTRIBS]; -}; - struct agx_fs_shader_key { /* Normally, access to the tilebuffer must be guarded by appropriate fencing * instructions to ensure correct results in the presence of out-of-order @@ -269,7 +217,6 @@ struct agx_fs_shader_key { struct agx_shader_key { union { - struct agx_vs_shader_key vs; struct agx_fs_shader_key fs; }; }; diff --git a/src/asahi/lib/agx_formats.c b/src/asahi/lib/agx_formats.c index 5f5923cf452..55f592d2f9c 100644 --- a/src/asahi/lib/agx_formats.c +++ b/src/asahi/lib/agx_formats.c @@ -190,69 +190,3 @@ const struct agx_pixel_format_entry agx_pixel_format[PIPE_FORMAT_COUNT] = { AGX_FMT(BPTC_RGBA_UNORM, BC7, UNORM, F, _), AGX_FMT(BPTC_SRGBA, BC7, UNORM, F, _), }; - -const enum agx_format -agx_vertex_format[PIPE_FORMAT_COUNT] = { - [PIPE_FORMAT_R32_FLOAT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32_SINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32_FLOAT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32_SINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32_FLOAT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32_SINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32A32_FLOAT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32A32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32A32_SINT] = AGX_FORMAT_I32, - - [PIPE_FORMAT_R8_UNORM] = AGX_FORMAT_U8NORM, - [PIPE_FORMAT_R8G8_UNORM] = AGX_FORMAT_U8NORM, - [PIPE_FORMAT_R8G8B8_UNORM] = AGX_FORMAT_U8NORM, - [PIPE_FORMAT_R8G8B8A8_UNORM] = AGX_FORMAT_U8NORM, - - [PIPE_FORMAT_R8_SNORM] = AGX_FORMAT_S8NORM, - [PIPE_FORMAT_R8G8_SNORM] = AGX_FORMAT_S8NORM, - [PIPE_FORMAT_R8G8B8_SNORM] = AGX_FORMAT_S8NORM, - [PIPE_FORMAT_R8G8B8A8_SNORM] = AGX_FORMAT_S8NORM, - - [PIPE_FORMAT_R16_UNORM] = AGX_FORMAT_U16NORM, - [PIPE_FORMAT_R16G16_UNORM] = AGX_FORMAT_U16NORM, - [PIPE_FORMAT_R16G16B16_UNORM] = AGX_FORMAT_U16NORM, - [PIPE_FORMAT_R16G16B16A16_UNORM] = AGX_FORMAT_U16NORM, - - [PIPE_FORMAT_R16_SNORM] = AGX_FORMAT_S16NORM, - [PIPE_FORMAT_R16G16_SNORM] = AGX_FORMAT_S16NORM, - [PIPE_FORMAT_R16G16B16_SNORM] = AGX_FORMAT_S16NORM, - [PIPE_FORMAT_R16G16B16A16_SNORM] = AGX_FORMAT_S16NORM, - - [PIPE_FORMAT_R8_UINT] = AGX_FORMAT_I8, - [PIPE_FORMAT_R8G8_UINT] = AGX_FORMAT_I8, - [PIPE_FORMAT_R8G8B8_UINT] = AGX_FORMAT_I8, - [PIPE_FORMAT_R8G8B8A8_UINT] = AGX_FORMAT_I8, - - [PIPE_FORMAT_R8_SINT] = AGX_FORMAT_I8, - [PIPE_FORMAT_R8G8_SINT] = AGX_FORMAT_I8, - [PIPE_FORMAT_R8G8B8_SINT] = AGX_FORMAT_I8, - [PIPE_FORMAT_R8G8B8A8_SINT] = AGX_FORMAT_I8, - - [PIPE_FORMAT_R16_UINT] = AGX_FORMAT_I16, - [PIPE_FORMAT_R16G16_UINT] = AGX_FORMAT_I16, - [PIPE_FORMAT_R16G16B16_UINT] = AGX_FORMAT_I16, - [PIPE_FORMAT_R16G16B16A16_UINT] = AGX_FORMAT_I16, - - [PIPE_FORMAT_R16_SINT] = AGX_FORMAT_I16, - [PIPE_FORMAT_R16G16_SINT] = AGX_FORMAT_I16, - [PIPE_FORMAT_R16G16B16_SINT] = AGX_FORMAT_I16, - [PIPE_FORMAT_R16G16B16A16_SINT] = AGX_FORMAT_I16, - - [PIPE_FORMAT_R32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32_UINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32A32_UINT] = AGX_FORMAT_I32, - - [PIPE_FORMAT_R32_SINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32_SINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32_SINT] = AGX_FORMAT_I32, - [PIPE_FORMAT_R32G32B32A32_SINT] = AGX_FORMAT_I32, -}; diff --git a/src/asahi/lib/agx_formats.h b/src/asahi/lib/agx_formats.h index c1e33fbc1d8..b463418b0e6 100644 --- a/src/asahi/lib/agx_formats.h +++ b/src/asahi/lib/agx_formats.h @@ -26,7 +26,6 @@ #define __AGX_FORMATS_H_ #include "util/format/u_format.h" -#include "asahi/compiler/agx_compile.h" struct agx_pixel_format_entry { uint8_t channels; @@ -36,7 +35,6 @@ struct agx_pixel_format_entry { }; extern const struct agx_pixel_format_entry agx_pixel_format[PIPE_FORMAT_COUNT]; -extern const enum agx_format agx_vertex_format[PIPE_FORMAT_COUNT]; /* N.b. hardware=0 corresponds to R8 UNORM, which is renderable. So a zero * entry indicates an invalid format. */ diff --git a/src/asahi/lib/agx_nir_lower_vbo.c b/src/asahi/lib/agx_nir_lower_vbo.c new file mode 100644 index 00000000000..d796f2d5489 --- /dev/null +++ b/src/asahi/lib/agx_nir_lower_vbo.c @@ -0,0 +1,239 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "agx_nir_lower_vbo.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" +#include "util/u_math.h" + +static bool +is_rgb10_a2(const struct util_format_description *desc) +{ + return desc->channel[0].shift == 0 && desc->channel[0].size == 10 && + desc->channel[1].shift == 10 && desc->channel[1].size == 10 && + desc->channel[2].shift == 20 && desc->channel[2].size == 10 && + desc->channel[3].shift == 30 && desc->channel[3].size == 2; +} + +static enum pipe_format +agx_vbo_internal_format(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + /* RGB10A2 formats are native for UNORM and unpacked otherwise */ + if (is_rgb10_a2(desc)) { + if (desc->is_unorm) + return PIPE_FORMAT_R10G10B10A2_UNORM; + else + return PIPE_FORMAT_R32_UINT; + } + + /* R11G11B10F is native and special */ + if (format == PIPE_FORMAT_R11G11B10_FLOAT) + return format; + + /* No other non-array formats handled */ + if (!desc->is_array) + return PIPE_FORMAT_NONE; + + /* Otherwise look at one (any) channel */ + int idx = util_format_get_first_non_void_channel(format); + if (idx < 0) + return PIPE_FORMAT_NONE; + + /* We only handle RGB formats (we could do SRGB if we wanted though?) */ + if ((desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) || + (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)) + return PIPE_FORMAT_NONE; + + /* We have native 8-bit and 16-bit normalized formats */ + struct util_format_channel_description chan = desc->channel[idx]; + + if (chan.normalized) { + if (chan.size == 8) + return desc->is_unorm ? PIPE_FORMAT_R8_UNORM : PIPE_FORMAT_R8_SNORM; + else if (chan.size == 16) + return desc->is_unorm ? PIPE_FORMAT_R16_UNORM : PIPE_FORMAT_R16_SNORM; + } + + /* Otherwise map to the corresponding integer format */ + switch (chan.size) { + case 32: return PIPE_FORMAT_R32_UINT; + case 16: return PIPE_FORMAT_R16_UINT; + case 8: return PIPE_FORMAT_R8_UINT; + default: return PIPE_FORMAT_NONE; + } +} + +bool +agx_vbo_supports_format(enum pipe_format format) +{ + return agx_vbo_internal_format(format) != PIPE_FORMAT_NONE; +} + +static nir_ssa_def * +apply_swizzle_channel(nir_builder *b, nir_ssa_def *vec, + unsigned swizzle, bool is_int) +{ + switch (swizzle) { + case PIPE_SWIZZLE_X: return nir_channel(b, vec, 0); + case PIPE_SWIZZLE_Y: return nir_channel(b, vec, 1); + case PIPE_SWIZZLE_Z: return nir_channel(b, vec, 2); + case PIPE_SWIZZLE_W: return nir_channel(b, vec, 3); + case PIPE_SWIZZLE_0: return nir_imm_intN_t(b, 0, vec->bit_size); + case PIPE_SWIZZLE_1: return is_int ? nir_imm_intN_t(b, 1, vec->bit_size) : + nir_imm_floatN_t(b, 1.0, vec->bit_size); + default: unreachable("Invalid swizzle channel"); + } +} + +static bool +pass(struct nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_input) + return false; + + struct agx_vbufs *vbufs = data; + b->cursor = nir_before_instr(instr); + + nir_src *offset_src = nir_get_io_offset_src(intr); + assert(nir_src_is_const(*offset_src) && "no attribute indirects"); + unsigned index = nir_intrinsic_base(intr) + nir_src_as_uint(*offset_src); + + struct agx_attribute attrib = vbufs->attributes[index]; + uint32_t stride = vbufs->strides[attrib.buf]; + uint16_t offset = attrib.src_offset; + + const struct util_format_description *desc = + util_format_description(attrib.format); + int chan = util_format_get_first_non_void_channel(attrib.format); + assert(chan >= 0); + + bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT; + bool is_unsigned = desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED; + bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED; + bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED; + bool is_int = util_format_is_pure_integer(attrib.format); + + assert((is_float ^ is_unsigned ^ is_signed ^ is_fixed) && "Invalid format"); + + enum pipe_format interchange_format = agx_vbo_internal_format(attrib.format); + assert(interchange_format != PIPE_FORMAT_NONE); + + unsigned interchange_align = util_format_get_blocksize(interchange_format); + unsigned interchange_comps = util_format_get_nr_components(attrib.format); + + /* In the hardware, uint formats zero-extend and float formats convert. + * However, non-uint formats using a uint interchange format shouldn't be + * zero extended. + */ + unsigned interchange_register_size = + util_format_is_pure_uint(interchange_format) && !util_format_is_pure_uint(attrib.format) ? + (interchange_align * 8): + nir_dest_bit_size(intr->dest); + + /* Non-UNORM R10G10B10A2 loaded as a scalar and unpacked */ + if (interchange_format == PIPE_FORMAT_R32_UINT && !desc->is_array) + interchange_comps = 1; + + /* Calculate the element to fetch the vertex for. Divide the instance ID by + * the divisor for per-instance data. Divisor=0 specifies per-vertex data. + */ + nir_ssa_def *el = (attrib.divisor == 0) ? + nir_load_vertex_id(b) : + nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor); + + nir_ssa_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf)); + + assert((stride % interchange_align) == 0 && "must be aligned"); + assert((offset % interchange_align) == 0 && "must be aligned"); + + unsigned stride_el = stride / interchange_align; + unsigned offset_el = offset / interchange_align; + + nir_ssa_def *stride_offset_el = + nir_iadd_imm(b, nir_imul_imm(b, el, stride_el), offset_el); + + /* Load the raw vector */ + nir_ssa_def *memory = + nir_load_constant_agx(b, interchange_comps, + interchange_register_size, + base, + stride_offset_el, + .format = interchange_format); + + unsigned dest_size = nir_dest_bit_size(intr->dest); + + /* Unpack but do not convert non-native non-array formats */ + if (is_rgb10_a2(desc) && interchange_format == PIPE_FORMAT_R32_UINT) { + unsigned bits[] = { 10, 10, 10, 2 }; + + if (is_signed) + memory = nir_format_unpack_sint(b, memory, bits, 4); + else + memory = nir_format_unpack_uint(b, memory, bits, 4); + } + + if (desc->channel[chan].normalized) { + /* 8/16-bit normalized formats are native, others converted here */ + if (is_rgb10_a2(desc) && is_signed) { + unsigned bits[] = { 10, 10, 10, 2 }; + memory = nir_format_snorm_to_float(b, memory, bits); + } else if (desc->channel[chan].size == 32) { + assert(desc->is_array && "no non-array 32-bit norm formats"); + unsigned bits[] = { 32, 32, 32, 32 }; + + if (is_signed) + memory = nir_format_snorm_to_float(b, memory, bits); + else + memory = nir_format_unorm_to_float(b, memory, bits); + } + } else if (desc->channel[chan].pure_integer) { + /* Zero-extension is native, may need to sign extend */ + if (is_signed) + memory = nir_i2iN(b, memory, dest_size); + } else { + if (is_unsigned) + memory = nir_u2fN(b, memory, dest_size); + else if (is_signed || is_fixed) + memory = nir_i2fN(b, memory, dest_size); + else + memory = nir_f2fN(b, memory, dest_size); + + /* 16.16 fixed-point weirdo GL formats need to be scaled */ + if (is_fixed) { + assert(desc->is_array && desc->channel[chan].size == 32); + assert(dest_size == 32 && "overflow if smaller"); + memory = nir_fmul_imm(b, memory, 1.0 / 65536.0); + } + } + + /* We now have a properly formatted vector of the components in memory. Apply + * the format swizzle forwards to trim/pad/reorder as needed. + */ + nir_ssa_def *channels[4] = { NULL }; + assert(nir_intrinsic_component(intr) == 0 && "unimplemented"); + + for (unsigned i = 0; i < intr->num_components; ++i) + channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[i], is_int); + + nir_ssa_def *logical = nir_vec(b, channels, intr->num_components); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, logical); + return true; +} + +bool +agx_nir_lower_vbo(nir_shader *shader, struct agx_vbufs *vbufs) +{ + assert(shader->info.stage == MESA_SHADER_VERTEX); + return nir_shader_instructions_pass(shader, pass, + nir_metadata_block_index | + nir_metadata_dominance, + vbufs); +} diff --git a/src/asahi/lib/agx_nir_lower_vbo.h b/src/asahi/lib/agx_nir_lower_vbo.h new file mode 100644 index 00000000000..ab014707d57 --- /dev/null +++ b/src/asahi/lib/agx_nir_lower_vbo.h @@ -0,0 +1,46 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#ifndef __AGX_NIR_LOWER_VBO_H +#define __AGX_NIR_LOWER_VBO_H + +#include +#include +#include "nir.h" +#include "util/format/u_formats.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define AGX_MAX_ATTRIBS (16) +#define AGX_MAX_VBUFS (16) + +/* See pipe_vertex_element for justification on the sizes. This structure should + * be small so it can be embedded into a shader key. + */ +struct agx_attribute { + uint32_t divisor; + uint16_t src_offset; + uint8_t buf; + + /* pipe_format, all vertex formats should be <= 255 */ + uint8_t format; +}; + +struct agx_vbufs { + unsigned count; + uint32_t strides[AGX_MAX_VBUFS]; + struct agx_attribute attributes[AGX_MAX_ATTRIBS]; +}; + +bool agx_nir_lower_vbo(nir_shader *shader, struct agx_vbufs *vbufs); +bool agx_vbo_supports_format(enum pipe_format format); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index d449d7bff68..5bff002b442 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -27,6 +27,7 @@ libasahi_lib_files = files( 'agx_meta.c', 'agx_tilebuffer.c', 'agx_nir_lower_tilebuffer.c', + 'agx_nir_lower_vbo.c', 'agx_ppp.h', 'pool.c', ) diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 3d4b621fa6e..fccf7b1c8cd 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1573,18 +1573,8 @@ agx_is_format_supported(struct pipe_screen* pscreen, return false; } - /* TODO: formats */ - if (usage & PIPE_BIND_VERTEX_BUFFER) { - switch (format) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - break; - default: - return false; - } - } + if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format)) + return false; if (usage & PIPE_BIND_DEPTH_STENCIL) { switch (format) { diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 90f7b129453..889b08bf8a8 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -987,18 +987,13 @@ agx_create_vertex_elements(struct pipe_context *ctx, const struct util_format_description *desc = util_format_description(ve.src_format); - unsigned chan_size = desc->channel[0].size / 8; - - assert(chan_size == 1 || chan_size == 2 || chan_size == 4); - assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); assert((ve.src_offset & (chan_size - 1)) == 0); attribs[i] = (struct agx_attribute) { .buf = ve.vertex_buffer_index, - .src_offset = ve.src_offset / chan_size, - .nr_comps_minus_1 = desc->nr_channels - 1, - .format = agx_vertex_format[ve.src_format], + .src_offset = ve.src_offset, + .format = ve.src_format, .divisor = ve.instance_divisor }; } @@ -1184,7 +1179,9 @@ agx_compile_variant(struct agx_device *dev, agx_preprocess_nir(nir); - if (nir->info.stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_VERTEX) { + NIR_PASS_V(nir, agx_nir_lower_vbo, &key->vbuf); + } else { struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1); @@ -1243,13 +1240,12 @@ agx_create_shader_state(struct pipe_context *pctx, switch (so->nir->info.stage) { case MESA_SHADER_VERTEX: { - key.base.vs.num_vbufs = AGX_MAX_VBUFS; + key.vbuf.count = AGX_MAX_VBUFS; for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) { - key.base.vs.vbuf_strides[i] = 16; - key.base.vs.attributes[i] = (struct agx_attribute) { + key.vbuf.strides[i] = 16; + key.vbuf.attributes[i] = (struct agx_attribute) { .buf = i, - .nr_comps_minus_1 = 4 - 1, - .format = AGX_FORMAT_I32 + .format = PIPE_FORMAT_R32G32B32A32_FLOAT }; } @@ -1295,20 +1291,18 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, static bool agx_update_vs(struct agx_context *ctx) { - struct agx_vs_shader_key key = { 0 }; - - memcpy(key.attributes, ctx->attributes, - sizeof(key.attributes[0]) * AGX_MAX_ATTRIBS); - - u_foreach_bit(i, ctx->vb_mask) { - key.vbuf_strides[i] = ctx->vertex_buffers[i].stride; - } - - struct asahi_shader_key akey = { - .base.vs = key + struct asahi_shader_key key = { + .vbuf.count = util_last_bit(ctx->vb_mask), }; - return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &akey); + memcpy(key.vbuf.attributes, ctx->attributes, + sizeof(key.vbuf.attributes[0]) * AGX_MAX_ATTRIBS); + + u_foreach_bit(i, ctx->vb_mask) { + key.vbuf.strides[i] = ctx->vertex_buffers[i].stride; + } + + return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &key); } static bool diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index ea1c3d65721..cdd70260355 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -34,6 +34,7 @@ #include "asahi/lib/agx_device.h" #include "asahi/lib/pool.h" #include "asahi/lib/agx_tilebuffer.h" +#include "asahi/lib/agx_nir_lower_vbo.h" #include "asahi/compiler/agx_compile.h" #include "asahi/layout/layout.h" #include "compiler/nir/nir_lower_blend.h" @@ -142,6 +143,8 @@ struct agx_blend { struct asahi_shader_key { struct agx_shader_key base; + struct agx_vbufs vbuf; + struct agx_blend blend; unsigned nr_cbufs;