frontends/va: Move encode fence to coded buffer

Instead of using the surface fence, store the fence in buffer. This way the fence won't be overwritten when encoding multiple frames using the same source surface and SyncBuffer will sync the correct job. Also fixes possible crash when destroying coded buffer before calling SyncSurface and possible leak when destroying or reusing coded buffer with pending encode job without calling SyncSurface/SyncBuffer. Reviewed-by: Ruijing Dong <ruijing.dong@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31959>
2024-11-02 10:28:06 +01:00
parent 9bcf17ef5a
commit 93d434362b
5 changed files with 105 additions and 56 deletions
--- a/src/gallium/frontends/va/buffer.c
+++ b/src/gallium/frontends/va/buffer.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_handle_table.h"
 #include "util/u_transfer.h"
+#include "util/set.h"
 #include "vl/vl_winsys.h"

 #include "va_private.h"
@@ -197,6 +198,8 @@ VAStatus vlVaMapBuffer2(VADriverContextP ctx, VABufferID buf_id,
      if (buf->type == VAEncCodedBufferType) {
         VACodedBufferSegment* curr_buf_ptr = (VACodedBufferSegment*) buf->data;

+         vlVaGetBufferFeedback(buf);
+
         if ((buf->extended_metadata.present_metadata & PIPE_VIDEO_FEEDBACK_METADATA_TYPE_ENCODE_RESULT) &&
             (buf->extended_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED)) {
            curr_buf_ptr->status = VA_CODED_BUF_STATUS_BAD_BITSTREAM;
@@ -335,6 +338,17 @@ vlVaDestroyBuffer(VADriverContextP ctx, VABufferID buf_id)
      FREE(buf->data);
   }

+   if (buf->ctx) {
+      assert(_mesa_set_search(buf->ctx->buffers, buf));
+      _mesa_set_remove_key(buf->ctx->buffers, buf);
+      vlVaGetBufferFeedback(buf);
+      if (buf->fence && buf->ctx->decoder && buf->ctx->decoder->destroy_fence)
+         buf->ctx->decoder->destroy_fence(buf->ctx->decoder, buf->fence);
+   }
+
+   if (buf->coded_surf)
+      buf->coded_surf->coded_buf = NULL;
+
   FREE(buf);
   handle_table_remove(VL_VA_DRIVER(ctx)->htab, buf_id);
   mtx_unlock(&drv->mutex);
@@ -551,38 +565,35 @@ vlVaSyncBuffer(VADriverContextP ctx, VABufferID buf_id, uint64_t timeout_ns)
      return VA_STATUS_ERROR_INVALID_BUFFER;
   }

-   if (!buf->feedback) {
+   if (!buf->fence) {
      /* No outstanding operation: nothing to do. */
      mtx_unlock(&drv->mutex);
      return VA_STATUS_SUCCESS;
   }

-   context = handle_table_get(drv->htab, buf->ctx);
+   context = buf->ctx;
   if (!context) {
      mtx_unlock(&drv->mutex);
      return VA_STATUS_ERROR_INVALID_CONTEXT;
   }

-   vlVaSurface* surf = handle_table_get(drv->htab, buf->associated_encode_input_surf);
-
-   if ((buf->feedback) && (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)) {
-      if (surf && context->decoder->fence_wait &&
-          !context->decoder->fence_wait(context->decoder, surf->fence, timeout_ns)) {
-         mtx_unlock(&drv->mutex);
-         return VA_STATUS_ERROR_TIMEDOUT;
-      }
-      context->decoder->get_feedback(context->decoder, buf->feedback, &(buf->coded_size), &(buf->extended_metadata));
-      buf->feedback = NULL;
-      /* Also mark the associated render target (encode source texture) surface as done
-         in case they call vaSyncSurface on it to avoid getting the feedback twice*/
-      if(surf)
-      {
-         surf->feedback = NULL;
-         buf->associated_encode_input_surf = VA_INVALID_ID;
-      }
+   if (!context->decoder) {
+      mtx_unlock(&drv->mutex);
+      return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
   }

+   int ret = context->decoder->fence_wait(context->decoder, buf->fence, timeout_ns);
   mtx_unlock(&drv->mutex);
-   return VA_STATUS_SUCCESS;
+   return ret ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_TIMEDOUT;
 }
 #endif
+
+void vlVaGetBufferFeedback(vlVaBuffer *buf)
+{
+   if (!buf->ctx || !buf->ctx->decoder || !buf->feedback)
+      return;
+
+   buf->ctx->decoder->get_feedback(buf->ctx->decoder, buf->feedback,
+                                   &buf->coded_size, &buf->extended_metadata);
+   buf->feedback = NULL;
+}
--- a/src/gallium/frontends/va/context.c
+++ b/src/gallium/frontends/va/context.c
@@ -451,6 +451,7 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width,
   }

   context->surfaces = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   context->buffers = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);

   mtx_lock(&drv->mutex);
   *context_id = handle_table_add(drv->htab, context);
@@ -490,6 +491,18 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id)
   }
   _mesa_set_destroy(context->surfaces, NULL);

+   set_foreach(context->buffers, entry) {
+      vlVaBuffer *buf = (vlVaBuffer *)entry->key;
+      assert(buf->ctx == context);
+      vlVaGetBufferFeedback(buf);
+      buf->ctx = NULL;
+      if (buf->fence && context->decoder && context->decoder->destroy_fence) {
+         context->decoder->destroy_fence(context->decoder, buf->fence);
+         buf->fence = NULL;
+      }
+   }
+   _mesa_set_destroy(context->buffers, NULL);
+
   if (context->decoder) {
      if (context->desc.base.entry_point == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
         if (u_reduce_video_profile(context->decoder->profile) ==
--- a/src/gallium/frontends/va/picture.c
+++ b/src/gallium/frontends/va/picture.c
@@ -70,6 +70,21 @@ vlVaSetSurfaceContext(vlVaDriver *drv, vlVaSurface *surf, vlVaContext *context)
   _mesa_set_add(surf->ctx->surfaces, surf);
 }

+static void
+vlVaSetBufferContext(vlVaDriver *drv, vlVaBuffer *buf, vlVaContext *context)
+{
+   if (buf->ctx == context)
+      return;
+
+   if (buf->ctx) {
+      assert(_mesa_set_search(buf->ctx->buffers, buf));
+      _mesa_set_remove_key(buf->ctx->buffers, buf);
+   }
+
+   buf->ctx = context;
+   _mesa_set_add(buf->ctx->buffers, buf);
+}
+
 VAStatus
 vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID render_target)
 {
@@ -103,8 +118,16 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
      return VA_STATUS_ERROR_INVALID_SURFACE;
   }

+   if (surf->coded_buf) {
+      surf->coded_buf->coded_surf = NULL;
+      surf->coded_buf = NULL;
+   }
+
+   /* Encode only reads from the surface and doesn't set surface fence. */
+   if (context->templat.entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE)
+      vlVaSetSurfaceContext(drv, surf, context);
+
   context->target_id = render_target;
-   vlVaSetSurfaceContext(drv, surf, context);
   context->target = surf->buffer;
   context->mjpeg.sampling_factor = 0;

@@ -1270,9 +1293,9 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id)
   }

   if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
-      context->desc.base.fence = &surf->fence;
      struct pipe_screen *screen = context->decoder->context->screen;
      coded_buf = context->coded_buf;
+      context->desc.base.fence = &coded_buf->fence;
      if (u_reduce_video_profile(context->templat.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC)
         context->desc.h264enc.frame_num_cnt++;

@@ -1299,6 +1322,11 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id)
            return VA_STATUS_ERROR_INVALID_SURFACE;
      }

+      if (coded_buf->coded_surf)
+         coded_buf->coded_surf->coded_buf = NULL;
+      vlVaGetBufferFeedback(coded_buf);
+      vlVaSetBufferContext(drv, coded_buf, context);
+
      int driver_metadata_support = drv->pipe->screen->get_video_param(drv->pipe->screen,
                                                                       context->decoder->profile,
                                                                       context->decoder->entrypoint,
@@ -1314,10 +1342,8 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id)
      context->decoder->encode_bitstream(context->decoder, context->target,
                                         coded_buf->derived_surface.resource, &feedback);
      coded_buf->feedback = feedback;
-      coded_buf->ctx = context_id;
-      surf->feedback = feedback;
+      coded_buf->coded_surf = surf;
      surf->coded_buf = coded_buf;
-      coded_buf->associated_encode_input_surf = context->target_id;
   } else if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
      context->desc.base.fence = &surf->fence;
   } else if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) {
--- a/src/gallium/frontends/va/surface.c
+++ b/src/gallium/frontends/va/surface.c
@@ -138,6 +138,8 @@ vlVaDestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_sur
            drv->efc_count = -1;
         }
      }
+      if (surf->coded_buf)
+         surf->coded_buf->coded_surf = NULL;
      util_dynarray_fini(&surf->subpics);
      FREE(surf);
      handle_table_remove(drv->htab, surface_list[i]);
@@ -153,6 +155,7 @@ _vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target, uint64_t timeo
   vlVaDriver *drv;
   vlVaContext *context;
   vlVaSurface *surf;
+   struct pipe_fence_handle *fence;

   if (!ctx)
      return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -168,19 +171,26 @@ _vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target, uint64_t timeo
      return VA_STATUS_ERROR_INVALID_SURFACE;
   }

+   if (surf->coded_buf) {
+      context = surf->coded_buf->ctx;
+      fence = surf->coded_buf->fence;
+   } else {
+      context = surf->ctx;
+      fence = surf->fence;
+   }
+
   /* This is checked before getting the context below as
    * surf->ctx is only set in begin_frame
    * and not when the surface is created
    * Some apps try to sync/map the surface right after creation and
    * would get VA_STATUS_ERROR_INVALID_CONTEXT
    */
-   if (!surf->buffer || (!surf->feedback && !surf->fence)) {
+   if (!surf->buffer || !fence) {
      // No outstanding encode/decode operation: nothing to do.
      mtx_unlock(&drv->mutex);
      return VA_STATUS_SUCCESS;
   }

-   context = surf->ctx;
   if (!context) {
      mtx_unlock(&drv->mutex);
      return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -191,19 +201,7 @@ _vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target, uint64_t timeo
      return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
   }

-   /* If driver does not implement fence_wait assume no
-    * async work needed to be waited on and return success
-    */
-   int ret = (context->decoder->fence_wait) ? 0 : 1;
-   if (context->decoder->fence_wait)
-      ret = context->decoder->fence_wait(context->decoder, surf->fence, timeout_ns);
-
-   if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE && surf->feedback && ret) {
-      context->decoder->get_feedback(context->decoder, surf->feedback, &(surf->coded_buf->coded_size), &(surf->coded_buf->extended_metadata));
-      surf->feedback = NULL;
-      surf->coded_buf->feedback = NULL;
-      surf->coded_buf->associated_encode_input_surf = VA_INVALID_ID;
-   }
+   int ret = context->decoder->fence_wait(context->decoder, fence, timeout_ns);
   mtx_unlock(&drv->mutex);
   return ret ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_TIMEDOUT;
 }
@@ -228,6 +226,7 @@ vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfac
   vlVaDriver *drv;
   vlVaSurface *surf;
   vlVaContext *context;
+   struct pipe_fence_handle *fence;

   if (!ctx)
      return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -244,20 +243,27 @@ vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfac
      return VA_STATUS_ERROR_INVALID_SURFACE;
   }

+   if (surf->coded_buf) {
+      context = surf->coded_buf->ctx;
+      fence = surf->coded_buf->fence;
+   } else {
+      context = surf->ctx;
+      fence = surf->fence;
+   }
+
   /* This is checked before getting the context below as
    * surf->ctx is only set in begin_frame
    * and not when the surface is created
    * Some apps try to sync/map the surface right after creation and
    * would get VA_STATUS_ERROR_INVALID_CONTEXT
    */
-   if (!surf->buffer || (!surf->feedback && !surf->fence)) {
+   if (!surf->buffer || !fence) {
      // No outstanding encode/decode operation: nothing to do.
      *status = VASurfaceReady;
      mtx_unlock(&drv->mutex);
      return VA_STATUS_SUCCESS;
   }

-   context = surf->ctx;
   if (!context) {
      mtx_unlock(&drv->mutex);
      return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -268,16 +274,7 @@ vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfac
      return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
   }

-   /* If driver does not implement fence_wait assume no
-    * async work needed to be waited on and return surface ready
-    */
-   int ret = (context->decoder->fence_wait) ? 0 : 1;
-
-   if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE && !surf->feedback)
-      ret = 1;
-   else if (context->decoder->fence_wait)
-      ret = context->decoder->fence_wait(context->decoder, surf->fence, 0);
-
+   int ret = context->decoder->fence_wait(context->decoder, fence, 0);
   mtx_unlock(&drv->mutex);

   if (ret)
--- a/src/gallium/frontends/va/va_private.h
+++ b/src/gallium/frontends/va/va_private.h
@@ -364,11 +364,12 @@ typedef struct {
   struct pipe_enc_feedback_metadata extended_metadata;
   struct pipe_video_buffer *derived_image_buffer;
   void *feedback;
-   VASurfaceID associated_encode_input_surf;
-   VAContextID ctx;
+   struct vlVaContext *ctx;
+   struct vlVaSurface *coded_surf;
+   struct pipe_fence_handle *fence;
 } vlVaBuffer;

-typedef struct {
+typedef struct vlVaContext {
   struct pipe_video_codec templat, *decoder;
   struct pipe_video_buffer *target;
   union {
@@ -416,6 +417,7 @@ typedef struct {
   int packed_header_type;
   bool packed_header_emulation_bytes;
   struct set *surfaces;
+   struct set *buffers;
   unsigned slice_data_offset;
   bool have_slice_params;

@@ -439,7 +441,6 @@ typedef struct vlVaSurface {
   struct util_dynarray subpics; /* vlVaSubpicture */
   vlVaContext *ctx;
   vlVaBuffer *coded_buf;
-   void *feedback;
   bool full_range;
   struct pipe_fence_handle *fence;
   struct vlVaSurface *efc_surface; /* input surface for EFC */
@@ -564,6 +565,7 @@ VAStatus vlVaHandleSurfaceAllocate(vlVaDriver *drv, vlVaSurface *surface, struct
 struct pipe_video_buffer *vlVaGetSurfaceBuffer(vlVaDriver *drv, vlVaSurface *surface);
 void vlVaAddRawHeader(struct util_dynarray *headers, uint8_t type, uint32_t size, uint8_t *buf,
                      bool is_slice, uint32_t emulation_bytes_start);
+void vlVaGetBufferFeedback(vlVaBuffer *buf);
 void vlVaSetSurfaceContext(vlVaDriver *drv, vlVaSurface *surf, vlVaContext *context);
 void vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID surface_id, struct pipe_video_buffer **ref_frame);
 void vlVaHandlePictureParameterBufferMPEG12(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf);