diff --git a/docs/features.txt b/docs/features.txt index 9f9c52db416..badec84db6a 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -69,17 +69,17 @@ GL 3.0, GLSL 1.30 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llv (*) freedreno (a2xx-a4xx), llvmpipe, and softpipe have fake Multisample anti-aliasing support -GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12, panfrost +GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12, panfrost, asahi Forward compatible context support/deprecations DONE - GL_ARB_draw_instanced (Instanced drawing) DONE (etnaviv/HALTI2, v3d, asahi) - GL_ARB_copy_buffer (Buffer copying) DONE (v3d, vc4, lima, asahi) - GL_NV_primitive_restart (Primitive restart) DONE (v3d, asahi) - 16 vertex texture image units DONE (asahi) + GL_ARB_draw_instanced (Instanced drawing) DONE (etnaviv/HALTI2, v3d) + GL_ARB_copy_buffer (Buffer copying) DONE (v3d, vc4, lima) + GL_NV_primitive_restart (Primitive restart) DONE (v3d) + 16 vertex texture image units DONE () GL_ARB_texture_buffer_object (Texture buffer objs) DONE (v3d) - GL_ARB_texture_rectangle (Rectangular textures) DONE (v3d, vc4, lima, asahi) - GL_ARB_uniform_buffer_object (Uniform buffer objs) DONE (v3d, asahi) - GL_EXT_texture_snorm (Signed normalized textures) DONE (v3d, asahi) + GL_ARB_texture_rectangle (Rectangular textures) DONE (v3d, vc4, lima) + GL_ARB_uniform_buffer_object (Uniform buffer objs) DONE (v3d) + GL_EXT_texture_snorm (Signed normalized textures) DONE (v3d) GL 3.2, GLSL 1.50 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, virgl, zink, d3d12 @@ -132,7 +132,7 @@ GL 4.0, GLSL 4.00 --- all DONE: freedreno/a6xx, i965/gen7+, nvc0, r600, radeonsi GL_ARB_sample_shading DONE (freedreno/a6xx, i965/gen6+, nv50, panfrost) GL_ARB_shader_subroutine DONE (freedreno, i965/gen6+, nv50, softpipe) GL_ARB_tessellation_shader DONE (freedreno/a6xx, i965/gen7+, ) - GL_ARB_texture_buffer_object_rgb32 DONE (freedreno, i965/gen6+, softpipe, panfrost) + GL_ARB_texture_buffer_object_rgb32 DONE (freedreno, i965/gen6+, softpipe, panfrost, asahi) GL_ARB_texture_cube_map_array DONE (freedreno/a4xx+, i965/gen6+, nv50, softpipe) GL_ARB_texture_gather DONE (freedreno, i965/gen6+, nv50, softpipe, v3d, panfrost, asahi) GL_ARB_texture_query_lod DONE (freedreno, i965, nv50, softpipe, v3d, panfrost) @@ -288,7 +288,7 @@ GLES3.2, GLSL ES 3.2 -- all DONE: freedreno/a6xx, i965/gen9+, radeonsi, virgl, l GL_OES_shader_multisample_interpolation DONE (freedreno/a6xx, i965, nvc0, r600) GL_OES_tessellation_shader DONE (freedreno/a6xx, all drivers that support GL_ARB_tessellation_shader) GL_OES_texture_border_clamp DONE (all drivers) - GL_OES_texture_buffer DONE (freedreno, i965, nvc0, r600, softpipe, panfrost) + GL_OES_texture_buffer DONE (freedreno, i965, nvc0, r600, softpipe, panfrost, asahi) GL_OES_texture_cube_map_array DONE (freedreno/a4xx+, i965/hsw+, nvc0, r600, softpipe) GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8) GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index cca0be072c0..303ea7bb493 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -597,7 +597,6 @@ agx_tex_dim(enum glsl_sampler_dim dim, bool array) { switch (dim) { case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: return array ? AGX_DIM_1D_ARRAY : AGX_DIM_1D; case GLSL_SAMPLER_DIM_2D: @@ -615,6 +614,9 @@ agx_tex_dim(enum glsl_sampler_dim dim, bool array) case GLSL_SAMPLER_DIM_CUBE: return array ? AGX_DIM_CUBE_ARRAY : AGX_DIM_CUBE; + case GLSL_SAMPLER_DIM_BUF: + unreachable("Buffer textures should have been lowered"); + default: unreachable("Invalid sampler dim\n"); } diff --git a/src/asahi/compiler/agx_nir_lower_texture.c b/src/asahi/compiler/agx_nir_lower_texture.c index fddec9ab2c3..d97ec4b30e0 100644 --- a/src/asahi/compiler/agx_nir_lower_texture.c +++ b/src/asahi/compiler/agx_nir_lower_texture.c @@ -27,8 +27,10 @@ #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_builtin_builder.h" #include "agx_compiler.h" +#include "agx_internal_formats.h" -#define AGX_TEXTURE_DESC_STRIDE 24 +#define AGX_TEXTURE_DESC_STRIDE 24 +#define AGX_FORMAT_RGB32_EMULATED 0x36 static nir_ssa_def * texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex) @@ -66,12 +68,28 @@ steal_tex_src(nir_tex_instr *tex, nir_tex_src_type type_) return ssa; } +/* Implement txs for buffer textures. There is no mipmapping to worry about, so + * this is just a uniform pull. However, we lower buffer textures to 2D so the + * original size is irrecoverable. Instead, we stash it in the "Acceleration + * buffer" field, which is unused for linear images. Fetch just that. + */ +static nir_ssa_def * +agx_txs_buffer(nir_builder *b, nir_ssa_def *descriptor) +{ + nir_ssa_def *size_ptr = nir_iadd_imm(b, descriptor, 16); + + return nir_load_global_constant(b, size_ptr, 8, 1, 32); +} + static nir_ssa_def * agx_txs(nir_builder *b, nir_tex_instr *tex) { nir_ssa_def *ptr = texture_descriptor_ptr(b, tex); nir_ssa_def *comp[4] = {NULL}; + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + return agx_txs_buffer(b, ptr); + nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32); nir_ssa_def *w0 = nir_channel(b, desc, 0); nir_ssa_def *w1 = nir_channel(b, desc, 1); @@ -149,6 +167,97 @@ lower_txs(nir_builder *b, nir_instr *instr, UNUSED void *data) return true; } +static nir_ssa_def * +format_is_rgb32(nir_builder *b, nir_tex_instr *tex) +{ + nir_ssa_def *ptr = texture_descriptor_ptr(b, tex); + nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 1, 32); + nir_ssa_def *channels = + nir_iand_imm(b, nir_ushr_imm(b, desc, 6), BITFIELD_MASK(7)); + + return nir_ieq_imm(b, channels, AGX_FORMAT_RGB32_EMULATED); +} + +/* Load from an RGB32 buffer texture */ +static nir_ssa_def * +load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coordinate) +{ + /* Base address right-shifted 4: bits [66, 102) */ + nir_ssa_def *ptr_hi = nir_iadd_imm(b, texture_descriptor_ptr(b, tex), 8); + nir_ssa_def *desc_hi_words = nir_load_global_constant(b, ptr_hi, 8, 2, 32); + nir_ssa_def *desc_hi = nir_pack_64_2x32(b, desc_hi_words); + nir_ssa_def *base_shr4 = + nir_iand_imm(b, nir_ushr_imm(b, desc_hi, 2), BITFIELD64_MASK(36)); + nir_ssa_def *base = nir_ishl_imm(b, base_shr4, 4); + + nir_ssa_def *raw = nir_load_constant_agx( + b, 3, nir_dest_bit_size(tex->dest), base, nir_imul_imm(b, coordinate, 3), + .format = AGX_INTERNAL_FORMAT_I32); + + /* Set alpha to 1 (in the appropriate format) */ + bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float; + + nir_ssa_def *swizzled[4] = { + nir_channel(b, raw, 0), nir_channel(b, raw, 1), nir_channel(b, raw, 2), + is_float ? nir_imm_float(b, 1.0) : nir_imm_int(b, 1)}; + + return nir_vec(b, swizzled, nir_tex_instr_dest_size(tex)); +} + +/* + * Buffer textures are lowered to 2D (1024xN) textures in the driver to access + * more storage. When lowering, we need to fix up the coordinate accordingly. + * + * Furthermore, RGB32 formats are emulated by lowering to global memory access, + * so to read a buffer texture we generate code that looks like: + * + * if (descriptor->format == RGB32) + * return ((uint32_t *) descriptor->address)[x]; + * else + * return txf(texture_as_2d, vec2(x % 1024, x / 1024)); + */ +static bool +lower_buffer_texture(nir_builder *b, nir_tex_instr *tex) +{ + nir_ssa_def *coord = steal_tex_src(tex, nir_tex_src_coord); + + /* The OpenGL ES 3.2 specification says on page 187: + * + * When a buffer texture is accessed in a shader, the results of a texel + * fetch are undefined if the specified texel coordinate is negative, or + * greater than or equal to the clamped number of texels in the texture + * image. + * + * However, faulting would be undesirable for robustness, so clamp. + */ + nir_ssa_def *size = nir_get_texture_size(b, tex); + coord = nir_umin(b, coord, nir_iadd_imm(b, size, -1)); + + /* Lower RGB32 reads if the format requires */ + nir_if *nif = nir_push_if(b, format_is_rgb32(b, tex)); + nir_ssa_def *rgb32 = load_rgb32(b, tex, coord); + nir_push_else(b, nif); + + /* Otherwise, lower the texture instruction to read from 2D */ + assert(coord->num_components == 1 && "buffer textures are 1D"); + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + nir_ssa_def *coord2d = nir_vec2(b, nir_iand_imm(b, coord, BITFIELD_MASK(10)), + nir_ushr_imm(b, coord, 10)); + nir_instr_remove(&tex->instr); + nir_builder_instr_insert(b, &tex->instr); + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord2d)); + nir_block *else_block = nir_cursor_current_block(b->cursor); + nir_pop_if(b, nif); + + /* Put it together with a phi */ + nir_ssa_def *phi = nir_if_phi(b, rgb32, &tex->dest.ssa); + nir_ssa_def_rewrite_uses(&tex->dest.ssa, phi); + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr); + nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block); + nir_instr_rewrite_src_ssa(phi->parent_instr, &else_src->src, &tex->dest.ssa); + return true; +} + /* * NIR indexes into array textures with unclamped floats (integer for txf). AGX * requires the index to be a clamped integer. Lower tex_src_coord into @@ -166,6 +275,9 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data) if (nir_tex_instr_is_query(tex)) return false; + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + return lower_buffer_texture(b, tex); + /* Get the coordinates */ nir_ssa_def *coord = steal_tex_src(tex, nir_tex_src_coord); nir_ssa_def *ms_idx = steal_tex_src(tex, nir_tex_src_ms_index); diff --git a/src/asahi/lib/agx_formats.c b/src/asahi/lib/agx_formats.c index d2b870dfdce..46c5a6367a5 100644 --- a/src/asahi/lib/agx_formats.c +++ b/src/asahi/lib/agx_formats.c @@ -135,6 +135,11 @@ const struct agx_pixel_format_entry agx_pixel_format[PIPE_FORMAT_COUNT] = { AGX_FMT(R11G11B10_FLOAT, R11G11B10, FLOAT, T, RG11B10F), AGX_FMT(R9G9B9E5_FLOAT, R9G9B9E5, FLOAT, F, RGB9E5), + /* These formats are emulated for texture buffers only */ + AGX_FMT(R32G32B32_FLOAT, R32G32B32_EMULATED, FLOAT, F, _), + AGX_FMT(R32G32B32_UINT, R32G32B32_EMULATED, UINT, F, _), + AGX_FMT(R32G32B32_SINT, R32G32B32_EMULATED, SINT, F, _), + AGX_FMT(ETC1_RGB8, ETC2_RGB8, UNORM, F,_), AGX_FMT(ETC2_RGB8, ETC2_RGB8, UNORM, F,_), AGX_FMT(ETC2_SRGB8, ETC2_RGB8, UNORM, F,_), diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index b889bc4058f..900fbdd5aa2 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -100,6 +100,12 @@ + + + + diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 56a7daee335..66d6c02e509 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1275,12 +1275,12 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return 1; case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_SURFACE_SAMPLE_COUNT: case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_IMAGE_LOAD_FORMATTED: case PIPE_CAP_IMAGE_STORE_FORMATTED: case PIPE_CAP_COMPUTE: @@ -1313,8 +1313,9 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 16; + /* Texel buffers lowered to (at most) 1024x16384 2D textures */ case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT: - return 65536; + return 1024 * 16384; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 64; @@ -1645,6 +1646,11 @@ agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, if (!agx_is_valid_pixel_format(tex_format)) return false; + /* RGB32 is emulated for texture buffers only */ + if (ent.channels == AGX_CHANNELS_R32G32B32_EMULATED && + target != PIPE_BUFFER) + return false; + if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable) return false; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 0cbd06af933..f426f013922 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -630,6 +630,11 @@ agx_translate_tex_dim(enum pipe_texture_target dim, unsigned samples) assert(samples >= 1); switch (dim) { + case PIPE_BUFFER: + /* Lowered to 2D */ + assert(samples == 1); + return AGX_TEXTURE_DIMENSION_2D; + case PIPE_TEXTURE_1D: assert(samples == 1); return AGX_TEXTURE_DIMENSION_1D; @@ -721,10 +726,27 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]); cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]); cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]); - cfg.width = rsrc->base.width0; - cfg.height = rsrc->base.height0; - cfg.first_level = state->u.tex.first_level; - cfg.last_level = state->u.tex.last_level; + + if (state->target == PIPE_BUFFER) { + unsigned size_el = + state->u.buf.size / util_format_get_blocksize(format); + + /* Use a 2D texture to increase the maximum size */ + cfg.width = 1024; + cfg.height = DIV_ROUND_UP(size_el, cfg.width); + cfg.first_level = cfg.last_level = 0; + + /* Stash the actual size in an unused part of the texture descriptor, + * which we'll read later to implement txs. + */ + cfg.acceleration_buffer = (size_el << 4); + } else { + cfg.width = rsrc->base.width0; + cfg.height = rsrc->base.height0; + cfg.first_level = state->u.tex.first_level; + cfg.last_level = state->u.tex.last_level; + } + cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); cfg.unk_mipmapped = rsrc->mipmapped; cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3; @@ -735,7 +757,10 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, } if (include_bo) { - cfg.address = agx_map_texture_gpu(rsrc, state->u.tex.first_layer); + cfg.address = agx_map_texture_gpu(rsrc, first_layer); + + if (state->target == PIPE_BUFFER) + cfg.address += state->u.buf.offset; if (ail_is_compressed(&rsrc->layout)) { cfg.acceleration_buffer = @@ -746,6 +771,8 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, if (state->target == PIPE_TEXTURE_3D) { cfg.depth = rsrc->base.depth0; + } else if (state->target == PIPE_BUFFER) { + cfg.depth = 1; } else { unsigned layers = state->u.tex.last_layer - state->u.tex.first_layer + 1; @@ -768,7 +795,9 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, if (rsrc->base.nr_samples > 1) cfg.samples = agx_translate_sample_count(rsrc->base.nr_samples); - if (rsrc->layout.tiling == AIL_TILING_LINEAR) { + if (state->target == PIPE_BUFFER) { + cfg.stride = (cfg.width * util_format_get_blocksize(format)) - 16; + } else if (rsrc->layout.tiling == AIL_TILING_LINEAR) { cfg.stride = ail_get_linear_stride_B(&rsrc->layout, 0) - 16; } else { assert(rsrc->layout.tiling == AIL_TILING_TWIDDLED || @@ -1779,7 +1808,11 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, continue; } - agx_batch_reads(batch, agx_resource(tex->base.texture)); + struct agx_resource *rsrc = tex->rsrc; + agx_batch_reads(batch, tex->rsrc); + + unsigned first_layer = + (tex->base.target == PIPE_BUFFER) ? 0 : tex->base.u.tex.first_layer; /* Without the address */ struct agx_texture_packed texture = tex->desc; @@ -1787,12 +1820,15 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, /* Just the address */ struct agx_texture_packed texture2; agx_pack(&texture2, TEXTURE, cfg) { - cfg.address = - agx_map_texture_gpu(tex->rsrc, tex->base.u.tex.first_layer); + cfg.address = agx_map_texture_gpu(rsrc, first_layer); - if (ail_is_compressed(&tex->rsrc->layout)) { + if (rsrc->base.target == PIPE_BUFFER) + cfg.address += tex->base.u.buf.offset; + + if (ail_is_compressed(&rsrc->layout)) { cfg.acceleration_buffer = - cfg.address + tex->rsrc->layout.metadata_offset_B; + agx_map_texture_gpu(rsrc, 0) + rsrc->layout.metadata_offset_B + + (first_layer * rsrc->layout.compression_layer_stride_B); } }