From 11dce2ac8154237b74ee3c4bd2aaec685739f0cc Mon Sep 17 00:00:00 2001 From: Jose Maria Casanova Crespo Date: Wed, 22 Nov 2023 05:17:38 +0100 Subject: [PATCH] v3d: fix CLE MMU errors avoiding using last bytes of CL BOs. The last V3D_CLE_READAHEAD bytes of the CLE buffer are unusable because using them would prefetch the next readahead bytes of the CL that would be outside the allocated BO. To guarantee that we can chain a BO to the current CL we always reserve space for the BRANCH packet. Not taking this into account has been generating kernel dmesg errors like "MMU error from client CLE". As V3D_CLE_READAHEAD is different from RPi4 (256 bytes) to RPi5 (1024 bytes). So we needed to rename v3d_cl.c to v3dX_cl.c to have different objects per V3D_VERSION. Extra assertions have been included to validate that we don't write packets over the usable size of the CL silently. v2: - Remove spurious blank line (Iago Toral) - Do not declare unusable the space needed for the BRANCH packet, and take it into account for all reservations. v3: - Handle BRANCH packet reserve only when CLE BO allocation is done. v4: - Assert on BO size updates that we are within the BO size. (Iago Toral) Reviewed-by: Iago Toral Quiroga cc: mesa-stable Part-of: --- src/gallium/drivers/v3d/meson.build | 2 +- src/gallium/drivers/v3d/v3d_cl.h | 8 ++-- .../drivers/v3d/{v3d_cl.c => v3dx_cl.c} | 40 ++++++++++++++----- 3 files changed, 37 insertions(+), 13 deletions(-) rename src/gallium/drivers/v3d/{v3d_cl.c => v3dx_cl.c} (65%) diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build index d5f2a24b156..59da0fdc60c 100644 --- a/src/gallium/drivers/v3d/meson.build +++ b/src/gallium/drivers/v3d/meson.build @@ -22,7 +22,6 @@ files_libv3d = files( 'v3d_blit.c', 'v3d_bufmgr.c', 'v3d_bufmgr.h', - 'v3d_cl.c', 'v3d_cl.h', 'v3d_context.c', 'v3d_context.h', @@ -50,6 +49,7 @@ files_per_version = files( 'v3dx_rcl.c', 'v3dx_state.c', 'v3dx_tfu.c', + 'v3dx_cl.c', ) v3d_args = ['-DV3D_BUILD_NEON'] diff --git a/src/gallium/drivers/v3d/v3d_cl.h b/src/gallium/drivers/v3d/v3d_cl.h index de966d2baad..76d8c3aa300 100644 --- a/src/gallium/drivers/v3d/v3d_cl.h +++ b/src/gallium/drivers/v3d/v3d_cl.h @@ -234,6 +234,7 @@ cl_get_emit_space(struct v3d_cl_out **cl, size_t size) cl_advance(&cl_out, cl_packet_length(packet)); \ cl_end(cl, cl_out); \ _loop_terminate = NULL; \ + assert(cl_offset(cl) <= (cl)->size); \ })) \ #define cl_emit_with_prepacked(cl, packet, prepacked, name) \ @@ -253,9 +254,10 @@ cl_get_emit_space(struct v3d_cl_out **cl, size_t size) _loop_terminate = NULL; \ })) \ -#define cl_emit_prepacked_sized(cl, packet, size) do { \ - memcpy((cl)->next, packet, size); \ - cl_advance(&(cl)->next, size); \ +#define cl_emit_prepacked_sized(cl, packet, psize) do { \ + memcpy((cl)->next, packet, psize); \ + cl_advance(&(cl)->next, psize); \ + assert(cl_offset(cl) <= (cl)->size); \ } while (0) #define cl_emit_prepacked(cl, packet) \ diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3dx_cl.c similarity index 65% rename from src/gallium/drivers/v3d/v3d_cl.c rename to src/gallium/drivers/v3d/v3dx_cl.c index d8ee4ffc206..6e767dca6ff 100644 --- a/src/gallium/drivers/v3d/v3d_cl.c +++ b/src/gallium/drivers/v3d/v3dx_cl.c @@ -24,14 +24,21 @@ #include "util/u_math.h" #include "util/ralloc.h" #include "v3d_context.h" -/* We don't expect that the packets we use in this file change across across - * hw versions, so we just explicitly set the V3D_VERSION and include - * v3dx_pack here - */ -#define V3D_VERSION 42 #include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" +/* The Control List Executor (CLE) pre-fetches V3D_CLE_READAHEAD bytes from + * the Control List buffer. The usage of these last bytes should be avoided or + * the CLE would pre-fetch the data after the end of the CL buffer, reporting + * the kernel "MMU error from client CLE". + */ +#if V3D_VERSION == 42 +#define V3D_CLE_READAHEAD 256 +#endif +#if V3D_VERSION >= 71 +#define V3D_CLE_READAHEAD 1024 +#endif + void v3d_init_cl(struct v3d_job *job, struct v3d_cl *cl) { @@ -63,14 +70,25 @@ v3d_cl_ensure_space(struct v3d_cl *cl, uint32_t space, uint32_t alignment) void v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t space) { - if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) + if (cl_offset(cl) + space <= cl->size) return; - struct v3d_bo *new_bo = v3d_bo_alloc(cl->job->v3d->screen, space, "CL"); - assert(space <= new_bo->size); + /* The last V3D_CLE_READAHEAD bytes of the buffer are unusable, so we + * need to take them into account when allocating a new BO for the + * CL. We have to be sure that we have room for a BRANCH packet so we + * can always chain a next BO if needed. We will need to increase + * cl->size by the packet length before calling cl_summit to use this + * reserved space. + */ + uint32_t unusable_size = V3D_CLE_READAHEAD + cl_packet_length(BRANCH); + struct v3d_bo *new_bo = v3d_bo_alloc(cl->job->v3d->screen, + space + unusable_size, "CL"); + assert(space + unusable_size <= new_bo->size); /* Chain to the new BO from the old one. */ if (cl->bo) { + cl->size += cl_packet_length(BRANCH); + assert(cl->size + V3D_CLE_READAHEAD <= cl->bo->size); cl_emit(cl, BRANCH, branch) { branch.address = cl_address(new_bo, 0); } @@ -82,7 +100,11 @@ v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t space) cl->bo = new_bo; cl->base = v3d_bo_map(cl->bo); - cl->size = cl->bo->size; + /* Take only into account the usable size of the BO to guarantee that + * we never write in the last bytes of the CL buffer because of the + * readahead of the CLE + */ + cl->size = cl->bo->size - unusable_size; cl->next = cl->base; }