diff --git a/docs/gallium/screen.rst b/docs/gallium/screen.rst index da8e7748692..b8f3c2fcf54 100644 --- a/docs/gallium/screen.rst +++ b/docs/gallium/screen.rst @@ -622,6 +622,7 @@ The integer capabilities: * ``PIPE_CAP_SUPPORTED_PRIM_MODES``: A bitmask of the ``pipe_prim_type`` enum values that the driver can natively support. * ``PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART``: A bitmask of the ``pipe_prim_type`` enum values that the driver can natively support for primitive restart. Only useful if ``PIPE_CAP_PRIMITIVE_RESTART`` is also exported. * ``PIPE_CAP_PREFER_BACK_BUFFER_REUSE``: Only applies to DRI_PRIME. If 1, the driver prefers that DRI3 tries to use the same back buffer each frame. If 0, this means DRI3 will at least use 2 back buffers and ping-pong between them to allow the tiled->linear copy to run in parallel. +* ``PIPE_CAP_DRAW_VERTEX_STATE``: Driver supports `pipe_screen::create_vertex_state/vertex_state_destroy` and `pipe_context::draw_vertex_state`. Only used by display lists and designed to serve vbo_save. .. _pipe_capf: diff --git a/src/gallium/auxiliary/driver_noop/noop_pipe.c b/src/gallium/auxiliary/driver_noop/noop_pipe.c index 6cb17d7dc5e..4f0746e5650 100644 --- a/src/gallium/auxiliary/driver_noop/noop_pipe.c +++ b/src/gallium/auxiliary/driver_noop/noop_pipe.c @@ -29,6 +29,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/format/u_format.h" +#include "util/u_helpers.h" #include "util/u_upload_mgr.h" #include "util/u_threaded_context.h" #include "noop_public.h" @@ -673,6 +674,32 @@ static void noop_query_dmabuf_modifiers(struct pipe_screen *screen, external_only, count); } +static struct pipe_vertex_state * +noop_create_vertex_state(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask) +{ + struct pipe_vertex_state *state = CALLOC_STRUCT(pipe_vertex_state); + + if (!state) + return NULL; + + util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, + full_velem_mask, state); + return state; +} + +static void noop_vertex_state_destroy(struct pipe_screen *screen, + struct pipe_vertex_state *state) +{ + pipe_vertex_buffer_unreference(&state->input.vbuffer); + pipe_resource_reference(&state->input.indexbuf, NULL); + FREE(state); +} + struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) { struct noop_pipe_screen *noop_screen; @@ -722,6 +749,8 @@ struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) screen->get_device_uuid = noop_get_device_uuid; screen->query_dmabuf_modifiers = noop_query_dmabuf_modifiers; screen->resource_create_with_modifiers = noop_resource_create_with_modifiers; + screen->create_vertex_state = noop_create_vertex_state; + screen->vertex_state_destroy = noop_vertex_state_destroy; slab_create_parent(&noop_screen->pool_transfers, sizeof(struct pipe_transfer), 64); diff --git a/src/gallium/auxiliary/driver_noop/noop_state.c b/src/gallium/auxiliary/driver_noop/noop_state.c index f3313cd805c..56036e22ede 100644 --- a/src/gallium/auxiliary/driver_noop/noop_state.c +++ b/src/gallium/auxiliary/driver_noop/noop_state.c @@ -38,6 +38,15 @@ static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info { } +static void noop_draw_vertex_state(struct pipe_context *ctx, + struct pipe_vertex_state *state, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ +} + static void noop_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) { @@ -459,6 +468,7 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->sampler_view_destroy = noop_sampler_view_destroy; ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; + ctx->draw_vertex_state = noop_draw_vertex_state; ctx->launch_grid = noop_launch_grid; ctx->create_stream_output_target = noop_create_stream_output_target; ctx->stream_output_target_destroy = noop_stream_output_target_destroy; diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c index 0c358a06b21..dd415b9909f 100644 --- a/src/gallium/auxiliary/util/u_helpers.c +++ b/src/gallium/auxiliary/util/u_helpers.c @@ -496,3 +496,25 @@ util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint3 } return false; } + +void +util_init_pipe_vertex_state(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask, + struct pipe_vertex_state *state) +{ + assert(num_elements == util_bitcount(full_velem_mask)); + + pipe_reference_init(&state->reference, 1); + state->screen = screen; + + pipe_vertex_buffer_reference(&state->input.vbuffer, buffer); + pipe_resource_reference(&state->input.indexbuf, indexbuf); + state->input.num_elements = num_elements; + for (unsigned i = 0; i < num_elements; i++) + state->input.elements[i] = elements[i]; + state->input.full_velem_mask = full_velem_mask; +} diff --git a/src/gallium/auxiliary/util/u_helpers.h b/src/gallium/auxiliary/util/u_helpers.h index f08f44dad99..9246d306ed8 100644 --- a/src/gallium/auxiliary/util/u_helpers.h +++ b/src/gallium/auxiliary/util/u_helpers.h @@ -121,6 +121,15 @@ void util_throttle_memory_usage(struct pipe_context *pipe, bool util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped); +void +util_init_pipe_vertex_state(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask, + struct pipe_vertex_state *state); + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 7e040138ec2..1f1215c7ff0 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -230,6 +230,18 @@ pipe_so_target_reference(struct pipe_stream_output_target **dst, *dst = src; } +static inline void +pipe_vertex_state_reference(struct pipe_vertex_state **dst, + struct pipe_vertex_state *src) +{ + struct pipe_vertex_state *old_dst = *dst; + + if (pipe_reference(old_dst ? &old_dst->reference : NULL, + src ? &src->reference : NULL)) + old_dst->screen->vertex_state_destroy(old_dst->screen, old_dst); + *dst = src; +} + static inline void pipe_vertex_buffer_unreference(struct pipe_vertex_buffer *dst) { diff --git a/src/gallium/auxiliary/util/u_prim.c b/src/gallium/auxiliary/util/u_prim.c index 9646a639ea7..a84d0e71e7c 100644 --- a/src/gallium/auxiliary/util/u_prim.c +++ b/src/gallium/auxiliary/util/u_prim.c @@ -37,6 +37,9 @@ u_prim_name(enum pipe_prim_type prim) struct pipe_draw_info info; STATIC_ASSERT(sizeof(info.mode) == 1); + struct pipe_draw_vertex_state_info dvs_info; + STATIC_ASSERT(sizeof(dvs_info.mode) == 1); + static const struct debug_named_value names[] = { DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c index eb6be76b228..eba554600fd 100644 --- a/src/gallium/auxiliary/util/u_screen.c +++ b/src/gallium/auxiliary/util/u_screen.c @@ -472,6 +472,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 1; case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART: + case PIPE_CAP_DRAW_VERTEX_STATE: return 0; default: diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index f5775e7470e..bedc9261bc7 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -51,6 +51,7 @@ struct pipe_device_reset_callback; struct pipe_draw_info; struct pipe_draw_indirect_info; struct pipe_draw_start_count_bias; +struct pipe_draw_vertex_state_info; struct pipe_grid_info; struct pipe_fence_handle; struct pipe_framebuffer_state; @@ -71,6 +72,7 @@ struct pipe_surface; struct pipe_transfer; struct pipe_vertex_buffer; struct pipe_vertex_element; +struct pipe_vertex_state; struct pipe_video_buffer; struct pipe_video_codec; struct pipe_viewport_state; @@ -142,6 +144,46 @@ struct pipe_context { const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *draws, unsigned num_draws); + + /** + * Multi draw for display lists. + * + * For more information, see pipe_vertex_state and + * pipe_draw_vertex_state_info. + * + * Explanation of partial_vertex_mask: + * + * 1. pipe_vertex_state::input::elements have a monotonic logical index + * determined by pipe_vertex_state::input::full_velem_mask, specifically, + * the position of the i-th bit set is the logical index of the i-th + * vertex element, up to 31. + * + * 2. pipe_vertex_state::input::partial_velem_mask is a subset of + * full_velem_mask where the bits set determine which vertex elements + * should be bound contiguously. The vertex elements corresponding to + * the bits not set in partial_velem_mask should be ignored. + * + * Those two allow creating pipe_vertex_state that has more vertex + * attributes than the vertex shader has inputs. The idea is that + * pipe_vertex_state can be used with any vertex shader that has the same + * number of inputs and same logical indices or less. This may sound like + * an overly complicated way to bind a subset of vertex elements, but it + * actually simplifies everything else: + * + * - In st/mesa, full_velem_mask is exactly the mask of enabled vertex + * attributes (VERT_ATTRIB_x) in the display list VAO, while + * partial_velem_mask is exactly the inputs_read mask of the vertex + * shader (also VERT_ATTRIB_x). + * + * - In the driver, some bit ops and popcnt is needed to assemble vertex + * elements very quickly. + */ + void (*draw_vertex_state)(struct pipe_context *ctx, + struct pipe_vertex_state *state, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws); /*@}*/ /** diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0bc713db69b..123c2b4d0f6 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -994,6 +994,7 @@ enum pipe_cap PIPE_CAP_SUPPORTED_PRIM_MODES, PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART, PIPE_CAP_PREFER_BACK_BUFFER_REUSE, + PIPE_CAP_DRAW_VERTEX_STATE, PIPE_CAP_LAST, /* XXX do not add caps after PIPE_CAP_LAST! */ diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 4421bee5b0f..9176bf2b586 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -58,9 +58,23 @@ struct pipe_surface; struct pipe_transfer; struct pipe_box; struct pipe_memory_info; +struct pipe_vertex_buffer; +struct pipe_vertex_element; +struct pipe_vertex_state; struct disk_cache; struct driOptionCache; struct u_transfer_helper; +struct pipe_screen; + +typedef struct pipe_vertex_state * + (*pipe_create_vertex_state_func)(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask); +typedef void (*pipe_vertex_state_destroy_func)(struct pipe_screen *screen, + struct pipe_vertex_state *); /** * Gallium screen/adapter context. Basically everything @@ -604,6 +618,13 @@ struct pipe_screen { unsigned int (*get_dmabuf_modifier_planes)(struct pipe_screen *screen, uint64_t modifier, enum pipe_format format); + + /** + * Vertex state CSO functions for precomputing vertex and index buffer + * states for display lists. + */ + pipe_create_vertex_state_func create_vertex_state; + pipe_vertex_state_destroy_func vertex_state_destroy; }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 63833a8b19f..cc600e0c762 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -708,6 +708,39 @@ struct pipe_vertex_element unsigned instance_divisor; }; +/** + * Opaque refcounted constant state object encapsulating a vertex buffer, + * index buffer, and vertex elements. Used by display lists to bind those + * states and pass buffer references quickly. + * + * The state contains 1 index buffer, 0 or 1 vertex buffer, and 0 or more + * vertex elements. + * + * Constraints on the buffers to get the fastest codepath: + * - All buffer contents are considered immutable and read-only after + * initialization. This implies the following things. + * - No place is required to track whether these buffers are busy. + * - All CPU mappings of these buffers can be forced to UNSYNCHRONIZED by + * both drivers and common code unconditionally. + * - Buffer invalidation can be skipped by both drivers and common code + * unconditionally. + */ +struct pipe_vertex_state { + struct pipe_reference reference; + struct pipe_screen *screen; + + /* The following structure is used as a key for util_vertex_state_cache + * to deduplicate identical state objects and thus enable more + * opportunities for draw merging. + */ + struct { + struct pipe_resource *indexbuf; + struct pipe_vertex_buffer vbuffer; + unsigned num_elements; + struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; + uint32_t full_velem_mask; + } input; +}; struct pipe_draw_indirect_info { @@ -766,6 +799,25 @@ struct pipe_draw_start_count_bias { int index_bias; /**< a bias to be added to each index */ }; +/** + * Draw vertex state description. It's translated to pipe_draw_info as follows: + * - mode comes from this structure + * - index_size is 4 + * - instance_count is 1 + * - index.resource comes from pipe_vertex_state + * - everything else is 0 + */ +struct pipe_draw_vertex_state_info { +#if defined(__GNUC__) + /* sizeof(mode) == 1 because it's a packed enum. */ + enum pipe_prim_type mode; /**< the mode of the primitive */ +#else + /* sizeof(mode) == 1 is required by draw merging in u_threaded_context. */ + uint8_t mode; /**< the mode of the primitive */ +#endif + bool take_vertex_state_ownership; /**< for skipping reference counting */ +}; + /** * Information to describe a draw_vbo call. */