v3dv: add a buffer to image copy path using a texel buffer

This is much faster than the blit fallback (which requires to upload
the linear buffer to a tiled image) and the CPU path.

A simple stress test involving 100 buffer to image copies of a
single layer image with 10 mipmap levels provides the following
results:

Path           | Recording Time | Execution Time |
-------------------------------------------------|
Texel Buffer   |     2.954s      |     0.137s    |
-------------------------------------------------|
Blit           |    10.732s      |     0.148s    |
-------------------------------------------------|
CPU            |     0.002s      |     1.453s    |
-------------------------------------------------|

So generally speaking, this texel buffer copy path is the fastest
of the paths that can do partial copies, however, the CPU path might
provide better results in cases where command buffer recording is
important to overall performance. This is probably the reason why
the CPU path seems to provide slightly better results for vkQuake2.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7651>
This commit is contained in:
Iago Toral Quiroga
2020-11-12 10:43:54 +01:00
parent 6304c08818
commit ba69c36ada
4 changed files with 931 additions and 128 deletions

View File

@@ -1327,6 +1327,7 @@ init_device_meta(struct v3dv_device *device)
mtx_init(&device->meta.mtx, mtx_plain);
v3dv_meta_clear_init(device);
v3dv_meta_blit_init(device);
v3dv_meta_texel_buffer_copy_init(device);
}
static void
@@ -1335,6 +1336,7 @@ destroy_device_meta(struct v3dv_device *device)
mtx_destroy(&device->meta.mtx);
v3dv_meta_clear_finish(device);
v3dv_meta_blit_finish(device);
v3dv_meta_texel_buffer_copy_finish(device);
}
VkResult

View File

@@ -505,6 +505,16 @@ buffer_format_features(VkFormat vk_format, const struct v3dv_format *v3dv_format
return flags;
}
bool
v3dv_buffer_format_supports_features(VkFormat vk_format,
VkFormatFeatureFlags features)
{
const struct v3dv_format *v3dv_format = v3dv_get_format(vk_format);
const VkFormatFeatureFlags supported =
buffer_format_features(vk_format, v3dv_format);
return (supported & features) == features;
}
void
v3dv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice,
VkFormat format,

File diff suppressed because it is too large Load Diff

View File

@@ -174,6 +174,9 @@ void v3dv_meta_clear_finish(struct v3dv_device *device);
void v3dv_meta_blit_init(struct v3dv_device *device);
void v3dv_meta_blit_finish(struct v3dv_device *device);
void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
struct v3dv_app_info {
const char *app_name;
uint32_t app_version;
@@ -246,7 +249,8 @@ struct v3dv_queue {
struct v3dv_job *noop_job;
};
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (1 * sizeof(uint32_t))
struct v3dv_meta_color_clear_pipeline {
VkPipeline pipeline;
@@ -267,6 +271,13 @@ struct v3dv_meta_blit_pipeline {
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
};
struct v3dv_meta_texel_buffer_copy_pipeline {
VkPipeline pipeline;
VkRenderPass pass;
VkRenderPass pass_no_load;
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
};
struct v3dv_pipeline_cache_stats {
uint32_t miss;
uint32_t hit;
@@ -322,6 +333,11 @@ struct v3dv_device {
VkPipelineLayout playout;
struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
} blit;
struct {
VkDescriptorSetLayout dslayout;
VkPipelineLayout playout;
struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
} texel_buffer_copy;
} meta;
struct v3dv_bo_cache {
@@ -1166,6 +1182,10 @@ struct v3dv_cmd_buffer {
/* The current descriptor pool for blit sources */
VkDescriptorPool dspool;
} blit;
struct {
/* The current descriptor pool for texel buffer copy sources */
VkDescriptorPool dspool;
} texel_buffer_copy;
} meta;
/* List of jobs in the command buffer. For primary command buffers it
@@ -1784,6 +1804,8 @@ void v3dv_get_internal_type_bpp_for_output_format(uint32_t format, uint32_t *typ
uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
bool v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
uint32_t tex_format);
bool v3dv_buffer_format_supports_features(VkFormat vk_format,
VkFormatFeatureFlags features);
bool v3dv_format_supports_tlb_resolve(const struct v3dv_format *format);
uint32_t v3d_utile_width(int cpp);