From 6478a6d888a1712ba9206a1dc7ecea76ed1cea5d Mon Sep 17 00:00:00 2001 From: David Rosca Date: Sat, 28 Dec 2024 14:20:24 +0100 Subject: [PATCH] frontends/va: Use compute only context if driver prefers compute Enables use of async compute for shader format conversions. Deinterlace filter still need gfx. On my system with RX570 this fixes performance issues when using gpu-screen-recorder to record gameplay at full GPU load. It can now record with full framerate, compared to half framerate before. Reviewed-by: Leo Liu Part-of: --- src/gallium/frontends/va/context.c | 7 +++-- src/gallium/frontends/va/postproc.c | 11 ++++++- src/gallium/frontends/va/surface.c | 41 ++++++++++++++------------- src/gallium/frontends/va/va_private.h | 1 + 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/gallium/frontends/va/context.c b/src/gallium/frontends/va/context.c index f3222abda65..bcc2c3bc7b1 100644 --- a/src/gallium/frontends/va/context.c +++ b/src/gallium/frontends/va/context.c @@ -208,7 +208,8 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) if (!drv->vscreen->pscreen->get_video_param || !drv->vscreen->pscreen->is_video_format_supported) goto error_pipe; - drv->pipe = pipe_create_multimedia_context(drv->vscreen->pscreen, false); + bool compute_only = drv->vscreen->pscreen->get_param(drv->vscreen->pscreen, PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA); + drv->pipe = pipe_create_multimedia_context(drv->vscreen->pscreen, compute_only); if (!drv->pipe) goto error_pipe; @@ -220,7 +221,7 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) drv->vscreen->pscreen->get_param(drv->vscreen->pscreen, PIPE_CAP_COMPUTE)); if (can_init_compositor) { - if (!vl_compositor_init(&drv->compositor, drv->pipe, false)) + if (!vl_compositor_init(&drv->compositor, drv->pipe, compute_only)) goto error_compositor; if (!vl_compositor_init_state(&drv->cstate, drv->pipe)) goto error_compositor_state; @@ -577,6 +578,8 @@ vlVaTerminate(VADriverContextP ctx) drv = ctx->pDriverData; vl_compositor_cleanup_state(&drv->cstate); vl_compositor_cleanup(&drv->compositor); + if (drv->pipe_gfx) + drv->pipe_gfx->destroy(drv->pipe_gfx); drv->pipe->destroy(drv->pipe); drv->vscreen->destroy(drv->vscreen); handle_table_destroy(drv->htab); diff --git a/src/gallium/frontends/va/postproc.c b/src/gallium/frontends/va/postproc.c index ae24aed17c3..380afaf0737 100644 --- a/src/gallium/frontends/va/postproc.c +++ b/src/gallium/frontends/va/postproc.c @@ -503,9 +503,15 @@ vlVaApplyDeint(vlVaDriver *drv, vlVaContext *context, context->deint = NULL; } + if (!drv->pipe_gfx) { + drv->pipe_gfx = pipe_create_multimedia_context(drv->pipe->screen, false); + if (!drv->pipe_gfx) + return current; + } + if (!context->deint) { context->deint = MALLOC(sizeof(struct vl_deint_filter)); - if (!vl_deint_filter_init(context->deint, drv->pipe, current->width, + if (!vl_deint_filter_init(context->deint, drv->pipe_gfx, current->width, current->height, false, false, !current->interlaced)) { FREE(context->deint); context->deint = NULL; @@ -519,6 +525,9 @@ vlVaApplyDeint(vlVaDriver *drv, vlVaContext *context, vl_deint_filter_render(context->deint, prevprev->buffer, prev->buffer, current, next->buffer, field); + + drv->pipe_gfx->flush(drv->pipe_gfx, NULL, 0); + return context->deint->video_buffer; } diff --git a/src/gallium/frontends/va/surface.c b/src/gallium/frontends/va/surface.c index b27cc3b8c8b..577dc24c92b 100644 --- a/src/gallium/frontends/va/surface.c +++ b/src/gallium/frontends/va/surface.c @@ -321,7 +321,7 @@ vlVaPutSubpictures(vlVaSurface *surf, vlVaDriver *drv, for (i = 0; i < surf->subpics.size/sizeof(vlVaSubpicture *); i++) { struct pipe_blend_state blend; - void *blend_state; + void *blend_state = NULL; vlVaBuffer *buf; struct pipe_box box; struct u_rect *s, *d, sr, dr, c; @@ -368,30 +368,32 @@ vlVaPutSubpictures(vlVaSurface *surf, vlVaDriver *drv, dr.x1 = d->x0 + c.x1*(dw/(float)sw); dr.y1 = d->y0 + c.y1*(dh/(float)sh); - memset(&blend, 0, sizeof(blend)); - blend.independent_blend_enable = 0; - blend.rt[0].blend_enable = 1; - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].rgb_func = PIPE_BLEND_ADD; - blend.rt[0].alpha_func = PIPE_BLEND_ADD; - blend.rt[0].colormask = PIPE_MASK_RGBA; - blend.logicop_enable = 0; - blend.logicop_func = PIPE_LOGICOP_CLEAR; - blend.dither = 0; - blend_state = drv->pipe->create_blend_state(drv->pipe, &blend); - vl_compositor_clear_layers(&drv->cstate); - vl_compositor_set_layer_blend(&drv->cstate, 0, blend_state, false); + if (drv->pipe->create_blend_state) { + memset(&blend, 0, sizeof(blend)); + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + blend.dither = 0; + blend_state = drv->pipe->create_blend_state(drv->pipe, &blend); + vl_compositor_set_layer_blend(&drv->cstate, 0, blend_state, false); + } upload_sampler(drv->pipe, sub->sampler, &box, buf->data, sub->image->pitches[0], 0, 0); vl_compositor_set_rgba_layer(&drv->cstate, &drv->compositor, 0, sub->sampler, &sr, NULL, NULL); vl_compositor_set_layer_dst_area(&drv->cstate, 0, &dr); vl_compositor_render(&drv->cstate, &drv->compositor, surf_draw, dirty_area, false); - drv->pipe->delete_blend_state(drv->pipe, blend_state); + if (blend_state) + drv->pipe->delete_blend_state(drv->pipe, blend_state); } return VA_STATUS_SUCCESS; @@ -476,7 +478,8 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s return status; } - drv->pipe->flush_resource(drv->pipe, tex); + if (drv->pipe->flush_resource) + drv->pipe->flush_resource(drv->pipe, tex); /* flush before calling flush_frontbuffer so that rendering is flushed * to back buffer so the texture can be copied in flush_frontbuffer diff --git a/src/gallium/frontends/va/va_private.h b/src/gallium/frontends/va/va_private.h index 9725d26bf89..d7bff14f606 100644 --- a/src/gallium/frontends/va/va_private.h +++ b/src/gallium/frontends/va/va_private.h @@ -340,6 +340,7 @@ ProfileToPipe(VAProfile profile) typedef struct { struct vl_screen *vscreen; struct pipe_context *pipe; + struct pipe_context *pipe_gfx; struct handle_table *htab; struct vl_compositor compositor; struct vl_compositor_state cstate;