From 17f348f73a9b5c6aaa3159f31565968c58220b98 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 28 Nov 2023 20:39:30 -0400 Subject: [PATCH] asahi: Implement ARB_viewport_array Signed-off-by: Alyssa Rosenzweig Part-of: --- docs/features.txt | 8 +- docs/relnotes/new_features.txt | 2 + src/asahi/lib/agx_helpers.h | 2 + src/gallium/drivers/asahi/agx_blit.c | 4 +- src/gallium/drivers/asahi/agx_pipe.c | 5 ++ src/gallium/drivers/asahi/agx_state.c | 116 +++++++++++++++----------- src/gallium/drivers/asahi/agx_state.h | 5 +- 7 files changed, 85 insertions(+), 57 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 3460be1aced..8ce06da69c5 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -147,7 +147,7 @@ GL 4.1, GLSL 4.10 --- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_shader_precision DONE (freedreno/a6xx, all drivers that support GLSL 4.10, asahi) GL_ARB_vertex_attrib_64bit DONE (freedreno/a6xx, softpipe) - GL_ARB_viewport_array DONE (freedreno/a6xx, nv50, softpipe, crocus/gen6+) + GL_ARB_viewport_array DONE (freedreno/a6xx, nv50, softpipe, crocus/gen6+, asahi) GL 4.2, GLSL 4.20 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, virgl, zink, d3d12, iris, crocus/gen7+, asahi @@ -175,7 +175,7 @@ GL 4.3, GLSL 4.30 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, v GL_ARB_copy_image DONE (freedreno/a6xx, nv50, softpipe, v3d, crocus) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) - GL_ARB_fragment_layer_viewport DONE (freedreno/a6xx, nv50, softpipe, crocus/gen6+) + GL_ARB_fragment_layer_viewport DONE (freedreno/a6xx, nv50, softpipe, crocus/gen6+, asahi) GL_ARB_framebuffer_no_attachments DONE (freedreno, softpipe, v3d, asahi, crocus/gen7+) GL_ARB_internalformat_query2 DONE (all drivers) GL_ARB_invalidate_subdata DONE (all drivers) @@ -215,7 +215,7 @@ GL 4.5, GLSL 4.50 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, v GL_ARB_ES3_1_compatibility DONE (freedreno/a6xx, softpipe) GL_ARB_clip_control DONE (freedreno, nv50, softpipe, lima, crocus, asahi) GL_ARB_conditional_render_inverted DONE (freedreno, nv50, softpipe, panfrost, asahi, crocus/gen6+) - GL_ARB_cull_distance DONE (freedreno/a6xx, nv50, softpipe, crocus/gen6+) + GL_ARB_cull_distance DONE (freedreno/a6xx, nv50, softpipe, crocus/gen6+, asahi) GL_ARB_derivative_control DONE (freedreno/a3xx+, nv50, softpipe, asahi, crocus/gen7+) GL_ARB_direct_state_access DONE (all drivers) GL_ARB_get_texture_sub_image DONE (all drivers) @@ -348,7 +348,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_OES_texture_half_float DONE (freedreno, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe, panfrost, v3d, zink, lima, asahi, iris, crocus, etnaviv/HALF_FLOAT) GL_OES_texture_half_float_linear DONE (freedreno, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe, panfrost, v3d, zink, lima, asahi, iris, crocus, etnaviv/HALF_FLOAT) GL_OES_texture_view DONE (freedreno, r600, radeonsi, nv50, nvc0, softpipe, llvmpipe, v3d, zink, iris, crocus/gen7+, d3d12) - GL_OES_viewport_array DONE (freedreno/a6xx, nvc0, r600, radeonsi, softpipe, zink, iris, crocus/gen7.5+) + GL_OES_viewport_array DONE (freedreno/a6xx, nvc0, r600, radeonsi, softpipe, zink, iris, crocus/gen7.5+, asahi) GLX_ARB_context_flush_control DONE (all drivers) GLX_ARB_robustness_application_isolation not started GLX_ARB_robustness_share_group_isolation not started diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 2ae9ab08194..047e6294ea7 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -13,3 +13,5 @@ OpenGL 4.6 (up from 4.2) on d3d12 VK_EXT_depth_clamp_zero_one on RADV GL_ARB_shader_texture_image_samples on Asahi GL_ARB_indirect_parameters on Asahi +GL_ARB_viewport_array on Asahi +GL_ARB_fragment_layer_viewport on Asahi diff --git a/src/asahi/lib/agx_helpers.h b/src/asahi/lib/agx_helpers.h index 2280298c4f3..14d901c7bd6 100644 --- a/src/asahi/lib/agx_helpers.h +++ b/src/asahi/lib/agx_helpers.h @@ -13,6 +13,8 @@ #include "agx_pack.h" #include "agx_ppp.h" +#define AGX_MAX_VIEWPORTS (16) + #define agx_push(ptr, T, cfg) \ for (unsigned _loop = 0; _loop < 1; ++_loop, ptr += AGX_##T##_LENGTH) \ agx_pack(ptr, T, cfg) diff --git a/src/gallium/drivers/asahi/agx_blit.c b/src/gallium/drivers/asahi/agx_blit.c index 99fdc6592c5..669c3506356 100644 --- a/src/gallium/drivers/asahi/agx_blit.c +++ b/src/gallium/drivers/asahi/agx_blit.c @@ -22,8 +22,8 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter, util_blitter_save_geometry_shader(blitter, ctx->stage[PIPE_SHADER_GEOMETRY].shader); util_blitter_save_rasterizer(blitter, ctx->rast); - util_blitter_save_viewport(blitter, &ctx->viewport); - util_blitter_save_scissor(blitter, &ctx->scissor); + util_blitter_save_viewport(blitter, &ctx->viewport[0]); + util_blitter_save_scissor(blitter, &ctx->scissor[0]); util_blitter_save_fragment_shader(blitter, ctx->stage[PIPE_SHADER_FRAGMENT].shader); util_blitter_save_blend(blitter, ctx->blend); diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 409cbc810d9..a9b8207cfc1 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -41,6 +41,8 @@ #include "agx_device.h" #include "agx_disk_cache.h" #include "agx_fence.h" +#include "agx_helpers.h" +#include "agx_pack.h" #include "agx_public.h" #include "agx_state.h" #include "agx_tilebuffer.h" @@ -1733,6 +1735,9 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_QUERY_SAMPLES: return true; + case PIPE_CAP_MAX_VIEWPORTS: + return AGX_MAX_VIEWPORTS; + case PIPE_CAP_VIDEO_MEMORY: { uint64_t system_memory; diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 76f36509e40..1110f9ca23a 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -940,10 +940,10 @@ agx_set_scissor_states(struct pipe_context *pctx, unsigned start_slot, { struct agx_context *ctx = agx_context(pctx); - assert(start_slot == 0 && "no geometry shaders"); - assert(num_scissors == 1 && "no geometry shaders"); + STATIC_ASSERT(sizeof(ctx->scissor[0]) == sizeof(*scissor)); + assert(start_slot + num_scissors <= AGX_MAX_VIEWPORTS); - ctx->scissor = *scissor; + memcpy(&ctx->scissor[start_slot], scissor, sizeof(*scissor) * num_scissors); ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS; } @@ -963,11 +963,11 @@ agx_set_viewport_states(struct pipe_context *pctx, unsigned start_slot, { struct agx_context *ctx = agx_context(pctx); - assert(start_slot == 0 && "no geometry shaders"); - assert(num_viewports == 1 && "no geometry shaders"); + STATIC_ASSERT(sizeof(ctx->viewport[0]) == sizeof(*vp)); + assert(start_slot + num_viewports <= AGX_MAX_VIEWPORTS); + memcpy(&ctx->viewport[start_slot], vp, sizeof(*vp) * num_viewports); ctx->dirty |= AGX_DIRTY_VIEWPORT; - ctx->viewport = *vp; } static void @@ -1010,28 +1010,39 @@ static void agx_upload_viewport_scissor(struct agx_pool *pool, struct agx_batch *batch, uint8_t **out, const struct pipe_viewport_state *vp, const struct pipe_scissor_state *ss, - bool clip_halfz) + bool clip_halfz, bool multi_viewport) { - unsigned minx, miny, maxx, maxy; + /* Number of viewports/scissors isn't precisely determinable in Gallium, so + * just key off whether we can write to anything other than viewport 0. This + * could be tuned in the future. + */ + unsigned count = multi_viewport ? AGX_MAX_VIEWPORTS : 1; - agx_get_scissor_extents(vp, ss, &batch->key, &minx, &miny, &maxx, &maxy); - - assert(maxx >= minx && maxy >= miny); - - float minz, maxz; - util_viewport_zmin_zmax(vp, clip_halfz, &minz, &maxz); - - /* Allocate a new scissor descriptor */ + /* Allocate scissor descriptors */ unsigned index = batch->scissor.size / AGX_SCISSOR_LENGTH; - void *ptr = util_dynarray_grow_bytes(&batch->scissor, 1, AGX_SCISSOR_LENGTH); + struct agx_scissor_packed *scissors = + util_dynarray_grow_bytes(&batch->scissor, count, AGX_SCISSOR_LENGTH); - agx_pack(ptr, SCISSOR, cfg) { - cfg.min_x = minx; - cfg.min_y = miny; - cfg.min_z = minz; - cfg.max_x = maxx; - cfg.max_y = maxy; - cfg.max_z = maxz; + unsigned minx[AGX_MAX_VIEWPORTS], miny[AGX_MAX_VIEWPORTS]; + unsigned maxx[AGX_MAX_VIEWPORTS], maxy[AGX_MAX_VIEWPORTS]; + + /* Upload each scissor */ + for (unsigned i = 0; i < count; ++i) { + agx_get_scissor_extents(&vp[i], &ss[i], &batch->key, &minx[i], &miny[i], + &maxx[i], &maxy[i]); + assert(maxx[i] >= minx[i] && maxy[i] >= miny[i]); + + float minz, maxz; + util_viewport_zmin_zmax(vp, clip_halfz, &minz, &maxz); + + agx_pack(scissors + i, SCISSOR, cfg) { + cfg.min_x = minx[i]; + cfg.min_y = miny[i]; + cfg.min_z = minz; + cfg.max_x = maxx[i]; + cfg.max_y = maxy[i]; + cfg.max_z = maxz; + } } /* Upload state */ @@ -1040,7 +1051,7 @@ agx_upload_viewport_scissor(struct agx_pool *pool, struct agx_batch *batch, .depth_bias_scissor = true, .region_clip = true, .viewport = true, - .viewport_count = 1, + .viewport_count = count, }); agx_ppp_push(&ppp, DEPTH_BIAS_SCISSOR, cfg) { @@ -1051,28 +1062,33 @@ agx_upload_viewport_scissor(struct agx_pool *pool, struct agx_batch *batch, cfg.depth_bias = count ? count - 1 : 0; }; - agx_ppp_push(&ppp, REGION_CLIP, cfg) { - cfg.enable = true; - cfg.min_x = minx / 32; - cfg.min_y = miny / 32; - cfg.max_x = DIV_ROUND_UP(MAX2(maxx, 1), 32); - cfg.max_y = DIV_ROUND_UP(MAX2(maxy, 1), 32); + for (unsigned i = 0; i < count; ++i) { + agx_ppp_push(&ppp, REGION_CLIP, cfg) { + cfg.enable = true; + cfg.min_x = minx[i] / 32; + cfg.min_y = miny[i] / 32; + cfg.max_x = DIV_ROUND_UP(MAX2(maxx[i], 1), 32); + cfg.max_y = DIV_ROUND_UP(MAX2(maxy[i], 1), 32); + } } agx_ppp_push(&ppp, VIEWPORT_CONTROL, cfg) ; - agx_ppp_push(&ppp, VIEWPORT, cfg) { - cfg.translate_x = vp->translate[0]; - cfg.translate_y = vp->translate[1]; - cfg.translate_z = vp->translate[2]; - cfg.scale_x = vp->scale[0]; - cfg.scale_y = vp->scale[1]; - cfg.scale_z = vp->scale[2]; + /* Upload viewports */ + for (unsigned i = 0; i < count; ++i) { + agx_ppp_push(&ppp, VIEWPORT, cfg) { + cfg.translate_x = vp[i].translate[0]; + cfg.translate_y = vp[i].translate[1]; + cfg.translate_z = vp[i].translate[2]; + cfg.scale_x = vp[i].scale[0]; + cfg.scale_y = vp[i].scale[1]; + cfg.scale_z = vp[i].scale[2]; - if (should_lower_clip_m1_1(pool->dev, clip_halfz)) { - cfg.translate_z -= cfg.scale_z; - cfg.scale_z *= 2; + if (should_lower_clip_m1_1(pool->dev, clip_halfz)) { + cfg.translate_z -= cfg.scale_z; + cfg.scale_z *= 2; + } } } @@ -3096,13 +3112,13 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS; } - if (ctx->dirty & - (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS | AGX_DIRTY_RS)) { + if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS | + AGX_DIRTY_RS | AGX_DIRTY_VS)) { - agx_upload_viewport_scissor( - pool, batch, &out, &ctx->viewport, - ctx->rast->base.scissor ? &ctx->scissor : NULL, - ctx->rast->base.clip_halfz); + agx_upload_viewport_scissor(pool, batch, &out, ctx->viewport, + ctx->rast->base.scissor ? ctx->scissor : NULL, + ctx->rast->base.clip_halfz, + ctx->vs->info.nonzero_viewport); } bool varyings_dirty = false; @@ -4263,8 +4279,10 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, /* The scissor/zbias arrays are indexed with 16-bit integers, imposigin a * maximum of UINT16_MAX descriptors. Flush if the next draw would overflow */ - if (unlikely((batch->scissor.size / AGX_SCISSOR_LENGTH) >= UINT16_MAX) || - (batch->depth_bias.size / AGX_DEPTH_BIAS_LENGTH) >= UINT16_MAX) { + if (unlikely( + (((batch->scissor.size / AGX_SCISSOR_LENGTH) + AGX_MAX_VIEWPORTS) > + UINT16_MAX) || + (batch->depth_bias.size / AGX_DEPTH_BIAS_LENGTH) >= UINT16_MAX)) { agx_flush_batch_for_reason(ctx, batch, "Scissor/depth bias overflow"); } else if (unlikely(batch->draws > 100000)) { /* Mostly so drawoverhead doesn't OOM */ diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 29225ce2f91..093aed42f6c 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -26,6 +26,7 @@ #include "util/disk_cache.h" #include "util/hash_table.h" #include "util/u_range.h" +#include "agx_helpers.h" #include "agx_meta.h" #ifdef __GLIBC__ @@ -482,8 +483,8 @@ struct agx_context { struct agx_zsa *zs; struct agx_blend *blend; struct pipe_blend_color blend_color; - struct pipe_viewport_state viewport; - struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport[AGX_MAX_VIEWPORTS]; + struct pipe_scissor_state scissor[AGX_MAX_VIEWPORTS]; struct pipe_stencil_ref stencil_ref; struct agx_streamout streamout; uint16_t sample_mask;