diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index adea166c83d..000e3f3c263 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -2378,7 +2378,10 @@ static unsigned find_rp_state(struct zink_context *ctx) { bool found = false; - struct set_entry *he = _mesa_set_search_or_add(&ctx->rendering_state_cache, &ctx->gfx_pipeline_state.rendering_info, &found); + /* calc the state idx using the samples to account for msrtss */ + unsigned idx = zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ? + util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples + 1) : 0; + struct set_entry *he = _mesa_set_search_or_add(&ctx->rendering_state_cache[idx], &ctx->gfx_pipeline_state.rendering_info, &found); struct zink_rendering_info *info; if (found) { info = (void*)he->key; @@ -2386,7 +2389,7 @@ find_rp_state(struct zink_context *ctx) } info = ralloc(ctx, struct zink_rendering_info); memcpy(info, &ctx->gfx_pipeline_state.rendering_info, sizeof(VkPipelineRenderingCreateInfo)); - info->id = ctx->rendering_state_cache.entries; + info->id = ctx->rendering_state_cache[idx].entries; he->key = info; return info->id; } @@ -2569,6 +2572,15 @@ begin_rendering(struct zink_context *ctx) ctx->gfx_pipeline_state.dirty |= rp_changed; ctx->gfx_pipeline_state.rp_state = rp_state; + VkMultisampledRenderToSingleSampledInfoEXT msrtss = { + VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + NULL, + VK_TRUE, + ctx->gfx_pipeline_state.rast_samples + 1, + }; + + ctx->dynamic_fb.info.pNext = ctx->transient_attachments ? &msrtss : NULL; + assert(!ctx->transient_attachments || msrtss.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT); VKCTX(CmdBeginRendering)(ctx->batch.state->cmdbuf, &ctx->dynamic_fb.info); ctx->batch.in_rp = true; return clear_buffers; @@ -2609,7 +2621,8 @@ zink_batch_rp(struct zink_context *ctx) * - msrtss is TODO * - dynamic rendering doesn't have input attachments */ - if (!zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering || ctx->transient_attachments || ctx->fbfetch_outputs) + if (!zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering || + (ctx->transient_attachments && !zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled) || ctx->fbfetch_outputs) clear_buffers = zink_begin_render_pass(ctx); else clear_buffers = begin_rendering(ctx); @@ -3114,10 +3127,10 @@ zink_set_framebuffer_state(struct pipe_context *pctx, struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; if (psurf) { struct zink_surface *transient = zink_transient_surface(psurf); - if (transient) + if (transient || psurf->nr_samples) ctx->transient_attachments |= BITFIELD_BIT(i); if (!samples) - samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, 1); + samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1); struct zink_resource *res = zink_resource(psurf->texture); if (zink_csurface(psurf)->info.layerCount > layers) ctx->fb_layer_mismatch |= BITFIELD_BIT(i); @@ -3148,10 +3161,10 @@ zink_set_framebuffer_state(struct pipe_context *pctx, if (ctx->fb_state.zsbuf) { struct pipe_surface *psurf = ctx->fb_state.zsbuf; struct zink_surface *transient = zink_transient_surface(psurf); - if (transient) + if (transient || psurf->nr_samples) ctx->transient_attachments |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); if (!samples) - samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, 1); + samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1); if (zink_csurface(psurf)->info.layerCount > layers) ctx->fb_layer_mismatch |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); zink_resource(psurf->texture)->fb_bind_count++; @@ -4981,7 +4994,8 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) _mesa_hash_table_init(&ctx->framebuffer_cache, ctx, hash_framebuffer_imageless, equals_framebuffer_imageless); if (!zink_init_render_pass(ctx)) goto fail; - _mesa_set_init(&ctx->rendering_state_cache, ctx, hash_rendering_state, equals_rendering_state); + for (unsigned i = 0; i < ARRAY_SIZE(ctx->rendering_state_cache); i++) + _mesa_set_init(&ctx->rendering_state_cache[i], ctx, hash_rendering_state, equals_rendering_state); ctx->dynamic_fb.info.pColorAttachments = ctx->dynamic_fb.attachments; ctx->dynamic_fb.info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO; for (unsigned i = 0; i < ARRAY_SIZE(ctx->dynamic_fb.attachments); i++) { diff --git a/src/gallium/drivers/zink/zink_render_pass.c b/src/gallium/drivers/zink/zink_render_pass.c index 1f9a40094bc..80bae85cee5 100644 --- a/src/gallium/drivers/zink/zink_render_pass.c +++ b/src/gallium/drivers/zink/zink_render_pass.c @@ -78,6 +78,7 @@ create_render_pass2(struct zink_screen *screen, struct zink_render_pass_state *s pstate->num_cresolves = state->num_cresolves; pstate->num_zsresolves = state->num_zsresolves; pstate->fbfetch = 0; + pstate->msaa_samples = state->msaa_samples; for (int i = 0; i < state->num_cbufs; i++) { struct zink_rt_attrib *rt = state->rts + i; attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; @@ -215,6 +216,15 @@ create_render_pass2(struct zink_screen *screen, struct zink_render_pass_state *s } else subpass.pNext = NULL; + VkMultisampledRenderToSingleSampledInfoEXT msrtss = { + VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + &subpass.pNext, + VK_TRUE, + state->msaa_samples, + }; + if (state->msaa_samples) + subpass.pNext = &msrtss; + VkRenderPassCreateInfo2 rpci = {0}; rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2; rpci.attachmentCount = num_attachments + state->num_cresolves + state->num_zsresolves; @@ -466,6 +476,8 @@ get_render_pass(struct zink_context *ctx) } state.num_rts++; } + state.msaa_samples = screen->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ? + ctx->gfx_pipeline_state.rast_samples + 1 : 0; state.num_cbufs = fb->nr_cbufs; assert(!state.num_cresolves || state.num_cbufs == state.num_cresolves); @@ -751,7 +763,7 @@ zink_begin_render_pass(struct zink_context *ctx) setup_framebuffer(ctx); if (ctx->batch.in_rp) return 0; - /* TODO: use VK_EXT_multisampled_render_to_single_sampled */ + if (ctx->framebuffer->rp->state.msaa_expand_mask) { uint32_t rp_state = ctx->gfx_pipeline_state.rp_state; struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass; @@ -796,7 +808,7 @@ zink_end_render_pass(struct zink_context *ctx) { if (ctx->batch.in_rp) { VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf); - /* TODO: use VK_EXT_multisampled_render_to_single_sampled */ + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i]; if (csurf) diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index b99c7c4e82e..b1c812eac46 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -520,6 +520,11 @@ create_ici(struct zink_screen *screen, VkImageCreateInfo *ici, const struct pipe ici->tiling = screen->info.have_EXT_image_drm_format_modifier && modifiers_count ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : bind & (PIPE_BIND_LINEAR | ZINK_BIND_DMABUF) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + /* XXX: does this have perf implications anywhere? hopefully not */ + if (ici->samples == VK_SAMPLE_COUNT_1_BIT && + screen->info.have_EXT_multisampled_render_to_single_sampled && + ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) + ici->flags |= VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT; ici->sharingMode = VK_SHARING_MODE_EXCLUSIVE; ici->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; diff --git a/src/gallium/drivers/zink/zink_surface.c b/src/gallium/drivers/zink/zink_surface.c index d0afa8e5971..c1602a2a6cf 100644 --- a/src/gallium/drivers/zink/zink_surface.c +++ b/src/gallium/drivers/zink/zink_surface.c @@ -203,7 +203,7 @@ do_create_surface(struct pipe_context *pctx, struct pipe_resource *pres, const s /* create a new surface */ struct zink_surface *surface = create_surface(pctx, pres, templ, ivci, actually); /* only transient surfaces have nr_samples set */ - surface->base.nr_samples = 0; + surface->base.nr_samples = zink_screen(pctx->screen)->info.have_EXT_multisampled_render_to_single_sampled ? templ->nr_samples : 0; surface->hash = hash; surface->ivci = *ivci; return surface; @@ -292,8 +292,7 @@ zink_create_surface(struct pipe_context *pctx, struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)wrap_surface(pctx, psurf); - /* TODO: use VK_EXT_multisampled_render_to_single_sampled and skip this entirely */ - if (templ->nr_samples) { + if (templ->nr_samples && !zink_screen(pctx->screen)->info.have_EXT_multisampled_render_to_single_sampled) { /* transient fb attachment: not cached */ struct pipe_resource rtempl = *pres; rtempl.nr_samples = templ->nr_samples; @@ -319,7 +318,7 @@ zink_destroy_surface(struct zink_screen *screen, struct pipe_surface *psurface) { struct zink_surface *surface = zink_surface(psurface); struct zink_resource *res = zink_resource(psurface->texture); - if (!psurface->nr_samples && !surface->is_swapchain) { + if ((!psurface->nr_samples || screen->info.have_EXT_multisampled_render_to_single_sampled) && !surface->is_swapchain) { simple_mtx_lock(&res->surface_mtx); if (psurface->reference.count) { /* a different context got a cache hit during deletion: this surface is alive again */ diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 3584277265f..1e0208c1896 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -1037,6 +1037,7 @@ struct zink_render_pass_state { unsigned num_rts; uint32_t clears; //for extra verification and update flagging uint16_t msaa_expand_mask; + uint16_t msaa_samples; //used with VK_EXT_multisampled_render_to_single_sampled }; struct zink_pipeline_rt { @@ -1045,7 +1046,8 @@ struct zink_pipeline_rt { }; struct zink_render_pass_pipeline_state { - uint32_t num_attachments:22; + uint32_t num_attachments:14; + uint32_t msaa_samples : 8; uint32_t fbfetch:1; uint32_t color_read:1; uint32_t depth_read:1; @@ -1397,7 +1399,6 @@ struct zink_surface { struct zink_ctx_surface { struct pipe_surface base; struct zink_surface *surf; //the actual surface - /* TODO: use VK_EXT_multisampled_render_to_single_sampled */ struct zink_ctx_surface *transient; //for use with EXT_multisample_render_to_texture bool transient_init; //whether the transient surface has data }; @@ -1619,7 +1620,7 @@ struct zink_context { } dynamic_fb; uint32_t fb_layer_mismatch; //bitmask unsigned depth_bias_scale_factor; - struct set rendering_state_cache; + struct set rendering_state_cache[6]; //[util_logbase2_ceil(msrtss samplecount)] struct set render_pass_state_cache; struct hash_table *render_pass_cache; VkExtent2D swapchain_size;