diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build index d5f2a24b156..59da0fdc60c 100644 --- a/src/gallium/drivers/v3d/meson.build +++ b/src/gallium/drivers/v3d/meson.build @@ -22,7 +22,6 @@ files_libv3d = files( 'v3d_blit.c', 'v3d_bufmgr.c', 'v3d_bufmgr.h', - 'v3d_cl.c', 'v3d_cl.h', 'v3d_context.c', 'v3d_context.h', @@ -50,6 +49,7 @@ files_per_version = files( 'v3dx_rcl.c', 'v3dx_state.c', 'v3dx_tfu.c', + 'v3dx_cl.c', ) v3d_args = ['-DV3D_BUILD_NEON'] diff --git a/src/gallium/drivers/v3d/v3d_cl.h b/src/gallium/drivers/v3d/v3d_cl.h index de966d2baad..76d8c3aa300 100644 --- a/src/gallium/drivers/v3d/v3d_cl.h +++ b/src/gallium/drivers/v3d/v3d_cl.h @@ -234,6 +234,7 @@ cl_get_emit_space(struct v3d_cl_out **cl, size_t size) cl_advance(&cl_out, cl_packet_length(packet)); \ cl_end(cl, cl_out); \ _loop_terminate = NULL; \ + assert(cl_offset(cl) <= (cl)->size); \ })) \ #define cl_emit_with_prepacked(cl, packet, prepacked, name) \ @@ -253,9 +254,10 @@ cl_get_emit_space(struct v3d_cl_out **cl, size_t size) _loop_terminate = NULL; \ })) \ -#define cl_emit_prepacked_sized(cl, packet, size) do { \ - memcpy((cl)->next, packet, size); \ - cl_advance(&(cl)->next, size); \ +#define cl_emit_prepacked_sized(cl, packet, psize) do { \ + memcpy((cl)->next, packet, psize); \ + cl_advance(&(cl)->next, psize); \ + assert(cl_offset(cl) <= (cl)->size); \ } while (0) #define cl_emit_prepacked(cl, packet) \ diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3dx_cl.c similarity index 65% rename from src/gallium/drivers/v3d/v3d_cl.c rename to src/gallium/drivers/v3d/v3dx_cl.c index d8ee4ffc206..6e767dca6ff 100644 --- a/src/gallium/drivers/v3d/v3d_cl.c +++ b/src/gallium/drivers/v3d/v3dx_cl.c @@ -24,14 +24,21 @@ #include "util/u_math.h" #include "util/ralloc.h" #include "v3d_context.h" -/* We don't expect that the packets we use in this file change across across - * hw versions, so we just explicitly set the V3D_VERSION and include - * v3dx_pack here - */ -#define V3D_VERSION 42 #include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" +/* The Control List Executor (CLE) pre-fetches V3D_CLE_READAHEAD bytes from + * the Control List buffer. The usage of these last bytes should be avoided or + * the CLE would pre-fetch the data after the end of the CL buffer, reporting + * the kernel "MMU error from client CLE". + */ +#if V3D_VERSION == 42 +#define V3D_CLE_READAHEAD 256 +#endif +#if V3D_VERSION >= 71 +#define V3D_CLE_READAHEAD 1024 +#endif + void v3d_init_cl(struct v3d_job *job, struct v3d_cl *cl) { @@ -63,14 +70,25 @@ v3d_cl_ensure_space(struct v3d_cl *cl, uint32_t space, uint32_t alignment) void v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t space) { - if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) + if (cl_offset(cl) + space <= cl->size) return; - struct v3d_bo *new_bo = v3d_bo_alloc(cl->job->v3d->screen, space, "CL"); - assert(space <= new_bo->size); + /* The last V3D_CLE_READAHEAD bytes of the buffer are unusable, so we + * need to take them into account when allocating a new BO for the + * CL. We have to be sure that we have room for a BRANCH packet so we + * can always chain a next BO if needed. We will need to increase + * cl->size by the packet length before calling cl_summit to use this + * reserved space. + */ + uint32_t unusable_size = V3D_CLE_READAHEAD + cl_packet_length(BRANCH); + struct v3d_bo *new_bo = v3d_bo_alloc(cl->job->v3d->screen, + space + unusable_size, "CL"); + assert(space + unusable_size <= new_bo->size); /* Chain to the new BO from the old one. */ if (cl->bo) { + cl->size += cl_packet_length(BRANCH); + assert(cl->size + V3D_CLE_READAHEAD <= cl->bo->size); cl_emit(cl, BRANCH, branch) { branch.address = cl_address(new_bo, 0); } @@ -82,7 +100,11 @@ v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t space) cl->bo = new_bo; cl->base = v3d_bo_map(cl->bo); - cl->size = cl->bo->size; + /* Take only into account the usable size of the BO to guarantee that + * we never write in the last bytes of the CL buffer because of the + * readahead of the CLE + */ + cl->size = cl->bo->size - unusable_size; cl->next = cl->base; }