pvr: Add support to process transfer and blit cmds
Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Co-authored-by: Matt Coster <matt.coster@imgtec.com> Co-authored-by: Sarah Walker <sarah.walker@imgtec.com> Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Signed-off-by: Matt Coster <matt.coster@imgtec.com> Signed-off-by: Sarah Walker <sarah.walker@imgtec.com> Acked-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21550>
This commit is contained in:

committed by
Marge Bot

parent
1cdd0ccb37
commit
480bdff4b5
@@ -53,6 +53,8 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
|
||||
.has_num_clusters = true,
|
||||
.has_num_raster_pipes = true,
|
||||
.has_num_user_clip_planes = true,
|
||||
.has_pbe_filterable_f16 = true,
|
||||
.has_pbe_yuv = true,
|
||||
.has_slc_cache_line_size_bits = true,
|
||||
.has_slc_mcu_cache_controls = true,
|
||||
.has_tf_bicubic_filter = true,
|
||||
@@ -96,6 +98,7 @@ const struct pvr_device_enhancements pvr_device_enhancements_4_40_2_51 = {
|
||||
.has_ern35421 = true,
|
||||
.has_ern38020 = true,
|
||||
.has_ern38748 = true,
|
||||
.has_ern42064 = true,
|
||||
.has_ern42307 = true,
|
||||
};
|
||||
|
||||
@@ -126,6 +129,7 @@ const struct pvr_device_ident pvr_device_ident_33_V_11_3 = {
|
||||
const struct pvr_device_features pvr_device_features_33_V_11_3 = {
|
||||
.has_common_store_size_in_dwords = true,
|
||||
.has_compute = true,
|
||||
.has_ipf_creq_pf = true,
|
||||
.has_isp_max_tiles_in_flight = true,
|
||||
.has_isp_samples_per_pixel = true,
|
||||
.has_max_instances_per_pds_task = true,
|
||||
@@ -136,6 +140,8 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
|
||||
.has_num_raster_pipes = true,
|
||||
.has_num_user_clip_planes = true,
|
||||
.has_pbe2_in_xe = true,
|
||||
.has_pbe_filterable_f16 = true,
|
||||
.has_pbe_yuv = true,
|
||||
.has_roguexe = true,
|
||||
.has_screen_size8K = true,
|
||||
.has_simple_internal_parameter_format = true,
|
||||
@@ -205,6 +211,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
|
||||
.has_compute_overlap = true,
|
||||
.has_gpu_multicore_support = true,
|
||||
.has_gs_rta_support = true,
|
||||
.has_ipf_creq_pf = true,
|
||||
.has_isp_max_tiles_in_flight = true,
|
||||
.has_isp_samples_per_pixel = true,
|
||||
.has_max_instances_per_pds_task = true,
|
||||
@@ -216,6 +223,8 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
|
||||
.has_num_user_clip_planes = true,
|
||||
.has_paired_tiles = true,
|
||||
.has_pbe2_in_xe = true,
|
||||
.has_pbe_filterable_f16 = true,
|
||||
.has_pbe_yuv = true,
|
||||
.has_pds_ddmadt = true,
|
||||
.has_roguexe = true,
|
||||
.has_screen_size8K = true,
|
||||
|
@@ -257,6 +257,7 @@ struct pvr_device_features {
|
||||
bool has_eight_output_registers : 1;
|
||||
bool has_gpu_multicore_support : 1;
|
||||
bool has_gs_rta_support : 1;
|
||||
bool has_ipf_creq_pf : 1;
|
||||
bool has_isp_max_tiles_in_flight : 1;
|
||||
bool has_isp_samples_per_pixel : 1;
|
||||
bool has_max_instances_per_pds_task : 1;
|
||||
@@ -268,10 +269,13 @@ struct pvr_device_features {
|
||||
bool has_num_user_clip_planes : 1;
|
||||
bool has_paired_tiles : 1;
|
||||
bool has_pbe2_in_xe : 1;
|
||||
bool has_pbe_filterable_f16 : 1;
|
||||
bool has_pbe_yuv : 1;
|
||||
bool has_pds_ddmadt : 1;
|
||||
bool has_roguexe : 1;
|
||||
bool has_screen_size8K : 1;
|
||||
bool has_simple_internal_parameter_format : 1;
|
||||
bool has_simple_internal_parameter_format_v1 : 1;
|
||||
bool has_simple_internal_parameter_format_v2 : 1;
|
||||
bool has_simple_parameter_format_version : 1;
|
||||
bool has_slc_cache_line_size_bits : 1;
|
||||
@@ -327,6 +331,7 @@ struct pvr_device_enhancements {
|
||||
bool has_ern35421 : 1;
|
||||
bool has_ern38020 : 1;
|
||||
bool has_ern38748 : 1;
|
||||
bool has_ern42064 : 1;
|
||||
bool has_ern42307 : 1;
|
||||
bool has_ern45493 : 1;
|
||||
};
|
||||
|
@@ -27,9 +27,18 @@
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "pvr_types.h"
|
||||
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
static inline bool pvr_dev_addr_is_aligned(pvr_dev_addr_t addr,
|
||||
const uint32_t alignment)
|
||||
{
|
||||
assert(util_is_power_of_two_nonzero(alignment));
|
||||
return ((uintptr_t)(addr.addr) & (alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
static inline bool ptr_is_aligned(const void *const ptr,
|
||||
const uint32_t alignment)
|
||||
{
|
||||
|
@@ -499,7 +499,7 @@ SOFTWARE.
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_AA" length="1">
|
||||
<field name="mode" start="0" end="1" type="ISP_AA_MODE_TYPE"/>
|
||||
<field name="mode" start="0" end="1" type="ISP_AA_MODE_TYPE" default="AA_NONE"/>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_CTL" length="1">
|
||||
|
@@ -26,6 +26,40 @@ SOFTWARE.
|
||||
<csbgen name="ROGUE" prefix="IPF">
|
||||
|
||||
<define name="TILE_SIZE_PIXELS" value="32"/>
|
||||
<define name="CONTROL_STREAM_SIZE_DWORDS" value="32"/>
|
||||
<define name="ISP_VERTEX_XY_BIAS_VALUE" value="4096"/>
|
||||
|
||||
<enum name="COMPRESSION_FORMAT">
|
||||
<value name="UNIQUE_1" value="0"/>
|
||||
<value name="ORIGIN_1_DELTA_1" value="1"/>
|
||||
<value name="ORIGIN_1_DELTA_2" value="2"/>
|
||||
<value name="ORIGIN_1_DELTA_3" value="3"/>
|
||||
<value name="ORIGIN_1_DELTA_4" value="4"/>
|
||||
<value name="ORIGIN_1_DELTA_5" value="5"/>
|
||||
<value name="ORIGIN_1_DELTA_6" value="6"/>
|
||||
<value name="ORIGIN_1_DELTA_7" value="7"/>
|
||||
<value name="RAW_BYTE" value="8"/>
|
||||
<value name="UNIQUE_2" value="9"/>
|
||||
<value name="ORIGIN_2_DELTA_1" value="10"/>
|
||||
<value name="ORIGIN_2_DELTA_2" value="11"/>
|
||||
<value name="ORIGIN_2_DELTA_3" value="12"/>
|
||||
<value name="ORIGIN_2_DELTA_4" value="13"/>
|
||||
<value name="ORIGIN_2_DELTA_5" value="14"/>
|
||||
<value name="ORIGIN_2_DELTA_6" value="15"/>
|
||||
</enum>
|
||||
|
||||
<enum name="CS_MASK_FMT">
|
||||
<value name="INDEX" value="0"/>
|
||||
<value name="BYTE" value="1"/>
|
||||
<value name="BIT" value="2"/>
|
||||
<value name="FULL" value="3"/>
|
||||
</enum>
|
||||
|
||||
<enum name="CS_TYPE">
|
||||
<value name="PRIM" value="0"/>
|
||||
<value name="LINK" value="2"/>
|
||||
<value name="TERM" value="3"/>
|
||||
</enum>
|
||||
|
||||
<struct name="SCISSOR_WORD_0" length="1">
|
||||
<field name="scw0_xmin" start="16" end="31" type="uint"/>
|
||||
@@ -37,4 +71,100 @@ SOFTWARE.
|
||||
<field name="scw1_ymax" start="0" end="15" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="CONTROL_STREAM" length="1">
|
||||
<field name="cs_type" start="30" end="31" type="CS_TYPE"/>
|
||||
<field name="cs_link" start="3" end="29" type="uint">
|
||||
<define name="UNIT_SIZE" value="128"/>
|
||||
</field>
|
||||
</struct>
|
||||
|
||||
<struct name="PRIMITIVE_FORMAT" length="1">
|
||||
<field name="cs_type" start="30" end="31" type="CS_TYPE"/>
|
||||
<field name="cs_isp_state_read" start="29" end="29" type="bool"/>
|
||||
<field name="cs_isp_state_size" start="26" end="28" type="uint"/>
|
||||
<field name="cs_prim_total" start="19" end="25" type="uint"/>
|
||||
<field name="cs_mask_fmt" start="17" end="18" type="CS_MASK_FMT"/>
|
||||
<field name="cs_prim_base_pres" start="16" end="16" type="bool"/>
|
||||
<field name="cs_prim_base_offset" start="0" end="15" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="PRIMITIVE_BASE" length="1">
|
||||
<field name="cs_prim_base" start="0" end="31" shift="2" type="address"/>
|
||||
</struct>
|
||||
|
||||
<struct name="COMPRESSION_SIZE_WORD" length="1">
|
||||
<field name="cs_isp_comp_table_size" start="27" end="31" type="uint"/>
|
||||
<field name="cs_tsp_comp_format_size" start="21" end="26" type="uint"/>
|
||||
<field name="cs_tsp_comp_table_size" start="9" end="18" type="uint"/>
|
||||
<field name="cs_tsp_comp_vertex_size" start="0" end="8" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_COMPRESSION_WORD_0" length="1">
|
||||
<field name="cf_isp_comp_fmt_z1" start="28" end="31" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_z0" start="24" end="27" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_y2" start="20" end="23" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_y1" start="16" end="19" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_y0" start="12" end="15" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_x2" start="8" end="11" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_x1" start="4" end="7" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_x0" start="0" end="3" type="COMPRESSION_FORMAT"/>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_COMPRESSION_WORD_1" length="1">
|
||||
<field name="vf_prim_msaa" start="16" end="16" type="bool"/>
|
||||
<field name="vf_prim_id_pres" start="15" end="15" type="bool"/>
|
||||
<field name="vf_vertex_clipped" start="14" end="14" type="bool"/>
|
||||
<field name="vf_vertex_total" start="8" end="13" type="uint"/>
|
||||
<field name="cf_isp_comp_fmt_z3" start="4" end="7" type="COMPRESSION_FORMAT"/>
|
||||
<field name="cf_isp_comp_fmt_z2" start="0" end="3" type="COMPRESSION_FORMAT"/>
|
||||
</struct>
|
||||
|
||||
<struct name="INDEX_DATA" length="1">
|
||||
<field name="ix_edge_flag1_ab" start="30" end="30" type="bool"/>
|
||||
<field name="ix_index1_0" start="24" end="29" type="uint"/>
|
||||
<field name="ix_bf_flag0" start="23" end="23" type="bool"/>
|
||||
<field name="ix_edge_flag0_ca" start="22" end="22" type="bool"/>
|
||||
<field name="ix_index0_2" start="16" end="21" type="uint"/>
|
||||
<field name="ix_edge_flag0_bc" start="14" end="14" type="bool"/>
|
||||
<field name="ix_index0_1" start="8" end="13" type="uint"/>
|
||||
<field name="ix_edge_flag0_ab" start="6" end="6" type="bool"/>
|
||||
<field name="ix_index0_0" start="0" end="5" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_VERTEX_XY" length="1">
|
||||
<field name="sign" start="23" end="23" type="bool"/>
|
||||
<field name="integer" start="8" end="22" type="uint"/>
|
||||
<field name="frac" start="0" end="7" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_VERTEX_WORD_0" length="1">
|
||||
<field name="y0" start="24" end="31" type="uint"/>
|
||||
<field name="x0" start="0" end="23" type="uint"/>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_VERTEX_WORD_1" length="1">
|
||||
<field name="z0" start="16" end="31" type="uint"/>
|
||||
<field name="y0" start="0" end="15" type="uint">
|
||||
<define name="SHIFT" value="8"/>
|
||||
</field>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_VERTEX_WORD_2" length="1">
|
||||
<field name="x1" start="16" end="31" type="uint"/>
|
||||
<field name="z0" start="0" end="15" type="uint">
|
||||
<define name="SHIFT" value="16"/>
|
||||
</field>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_VERTEX_WORD_3" length="1">
|
||||
<field name="y1" start="8" end="31" type="uint"/>
|
||||
<field name="x1" start="0" end="7" type="uint">
|
||||
<define name="SHIFT" value="16"/>
|
||||
</field>
|
||||
</struct>
|
||||
|
||||
<struct name="ISP_VERTEX_WORD_4" length="1">
|
||||
<field name="z1" start="0" end="31" type="uint"/>
|
||||
</struct>
|
||||
|
||||
</csbgen>
|
||||
|
@@ -125,6 +125,16 @@
|
||||
*/
|
||||
#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
|
||||
|
||||
/* Size of the image state in 64-bit units. */
|
||||
#define ROGUE_MAXIMUM_IMAGE_STATE_SIZE_IN_ULONGLONGS 2U
|
||||
|
||||
/* Size of the image state in dwords. The last 64-bit word is optional for
|
||||
* non-YUV textures.
|
||||
*/
|
||||
#define ROGUE_MAXIMUM_IMAGE_STATE_SIZE \
|
||||
(ROGUE_MAXIMUM_IMAGE_STATE_SIZE_IN_ULONGLONGS * \
|
||||
(sizeof(uint64_t) / sizeof(uint32_t)))
|
||||
|
||||
#define PVR_NUM_PBE_EMIT_REGS 8U
|
||||
|
||||
#endif /* ROGUE_HW_DEFS_H */
|
||||
|
@@ -57,6 +57,7 @@ pvr_files = files(
|
||||
'pvr_pass.c',
|
||||
'pvr_pipeline.c',
|
||||
'pvr_pipeline_cache.c',
|
||||
'pvr_transfer_frag_store.c',
|
||||
'pvr_query.c',
|
||||
'pvr_query_compute.c',
|
||||
'pvr_queue.c',
|
||||
|
@@ -433,22 +433,18 @@ void pvr_pds_pixel_shader_sa_initialize(
|
||||
* \param dest_offset Destination offset in the attribute.
|
||||
* \param dma_size The size of the DMA in words.
|
||||
* \param src_address Source address for the burst.
|
||||
* \param last Last DMA in program.
|
||||
* \param dev_info PVR device info structure.
|
||||
* \returns The number of DMA transfers required.
|
||||
*/
|
||||
|
||||
uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
|
||||
uint64_t *dma_address,
|
||||
uint32_t dest_offset,
|
||||
uint32_t dma_size,
|
||||
uint64_t src_address,
|
||||
bool last,
|
||||
const struct pvr_device_info *dev_info)
|
||||
{
|
||||
/* Simplified for MS2. */
|
||||
|
||||
/* Force to 1 DMA. */
|
||||
const uint32_t num_kicks = 1;
|
||||
|
||||
dma_control[0] = dma_size
|
||||
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
|
||||
dma_control[0] |= dest_offset
|
||||
@@ -457,12 +453,15 @@ uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
|
||||
dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
|
||||
|
||||
dma_address[0] = src_address;
|
||||
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
|
||||
dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
|
||||
}
|
||||
if (last)
|
||||
dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
|
||||
|
||||
return num_kicks;
|
||||
dma_address[0] = src_address;
|
||||
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
|
||||
dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
|
||||
|
||||
/* Force to 1 DMA. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* FIXME: use the csbgen interface and pvr_csb_pack.
|
||||
|
@@ -624,6 +624,7 @@ uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
|
||||
uint32_t dest_offset,
|
||||
uint32_t dma_size,
|
||||
uint64_t src_address,
|
||||
bool last,
|
||||
const struct pvr_device_info *dev_info);
|
||||
|
||||
void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include "pvr_clear.h"
|
||||
#include "pvr_csb.h"
|
||||
#include "pvr_formats.h"
|
||||
#include "pvr_job_transfer.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_shader_factory.h"
|
||||
#include "pvr_static_shaders.h"
|
||||
@@ -114,35 +115,161 @@ void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
||||
assert(!"Unimplemented");
|
||||
}
|
||||
|
||||
static struct pvr_transfer_cmd *
|
||||
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct pvr_transfer_cmd *transfer_cmd;
|
||||
|
||||
transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
|
||||
sizeof(*transfer_cmd),
|
||||
8U,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!transfer_cmd) {
|
||||
cmd_buffer->state.status =
|
||||
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* transfer_cmd->mapping_count is already set to zero. */
|
||||
transfer_cmd->filter = PVR_FILTER_POINT;
|
||||
transfer_cmd->resolve_op = PVR_RESOLVE_BLEND;
|
||||
transfer_cmd->addr_mode = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
||||
transfer_cmd->cmd_buffer = cmd_buffer;
|
||||
|
||||
return transfer_cmd;
|
||||
}
|
||||
|
||||
static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
|
||||
VkRect2D *rect,
|
||||
pvr_dev_addr_t dev_addr,
|
||||
VkDeviceSize offset,
|
||||
VkFormat vk_format,
|
||||
uint32_t width,
|
||||
uint32_t height)
|
||||
{
|
||||
surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
|
||||
surface->width = width;
|
||||
surface->height = height;
|
||||
surface->stride = width;
|
||||
surface->vk_format = vk_format;
|
||||
surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
|
||||
surface->sample_count = 1;
|
||||
|
||||
/* Initialize rectangle extent. Also, rectangle.offset should be set to
|
||||
* zero, as the offset is already adjusted in the device address above. We
|
||||
* don't explicitly set offset to zero as transfer_cmd is zero allocated.
|
||||
*/
|
||||
rect->extent.width = width;
|
||||
rect->extent.height = height;
|
||||
}
|
||||
|
||||
static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
|
||||
pvr_dev_addr_t src_addr,
|
||||
VkDeviceSize src_offset,
|
||||
pvr_dev_addr_t dst_addr,
|
||||
VkDeviceSize dst_offset,
|
||||
VkDeviceSize size)
|
||||
{
|
||||
VkDeviceSize offset = 0;
|
||||
|
||||
while (offset < size) {
|
||||
VkDeviceSize remaining_size = size - offset;
|
||||
struct pvr_transfer_cmd *transfer_cmd;
|
||||
uint32_t texel_width;
|
||||
VkDeviceSize texels;
|
||||
VkFormat vk_format;
|
||||
VkResult result;
|
||||
uint32_t height;
|
||||
uint32_t width;
|
||||
|
||||
if (remaining_size >= 16U) {
|
||||
vk_format = VK_FORMAT_R32G32B32A32_UINT;
|
||||
texel_width = 16U;
|
||||
} else if (remaining_size >= 4U) {
|
||||
vk_format = VK_FORMAT_R32_UINT;
|
||||
texel_width = 4U;
|
||||
} else {
|
||||
vk_format = VK_FORMAT_R8_UINT;
|
||||
texel_width = 1U;
|
||||
}
|
||||
|
||||
texels = remaining_size / texel_width;
|
||||
|
||||
/* Try to do max-width rects, fall back to a 1-height rect for the
|
||||
* remainder.
|
||||
*/
|
||||
if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
|
||||
width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
|
||||
height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
|
||||
height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
|
||||
} else {
|
||||
width = texels;
|
||||
height = 1;
|
||||
}
|
||||
|
||||
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
||||
if (!transfer_cmd)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
if (!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL)) {
|
||||
pvr_setup_buffer_surface(&transfer_cmd->src,
|
||||
&transfer_cmd->mappings[0].src_rect,
|
||||
src_addr,
|
||||
offset + src_offset,
|
||||
vk_format,
|
||||
width,
|
||||
height);
|
||||
transfer_cmd->src_present = true;
|
||||
}
|
||||
|
||||
pvr_setup_buffer_surface(&transfer_cmd->dst,
|
||||
&transfer_cmd->scissor,
|
||||
dst_addr,
|
||||
offset + dst_offset,
|
||||
vk_format,
|
||||
width,
|
||||
height);
|
||||
|
||||
if (transfer_cmd->src_present)
|
||||
transfer_cmd->mappings[0].dst_rect = transfer_cmd->scissor;
|
||||
|
||||
transfer_cmd->mapping_count++;
|
||||
transfer_cmd->cmd_buffer = cmd_buffer;
|
||||
|
||||
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
||||
return result;
|
||||
}
|
||||
|
||||
offset += width * height * texel_width;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
|
||||
const VkCopyBufferInfo2 *pCopyBufferInfo)
|
||||
{
|
||||
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
|
||||
PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
|
||||
const size_t regions_size =
|
||||
pCopyBufferInfo->regionCount * sizeof(*pCopyBufferInfo->pRegions);
|
||||
struct pvr_transfer_cmd *transfer_cmd;
|
||||
|
||||
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
||||
|
||||
transfer_cmd = vk_alloc(&cmd_buffer->vk.pool->alloc,
|
||||
sizeof(*transfer_cmd) + regions_size,
|
||||
8U,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!transfer_cmd) {
|
||||
cmd_buffer->state.status =
|
||||
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
|
||||
VkResult result;
|
||||
|
||||
return;
|
||||
result =
|
||||
pvr_cmd_copy_buffer_region(cmd_buffer,
|
||||
src->dev_addr,
|
||||
pCopyBufferInfo->pRegions[i].srcOffset,
|
||||
dst->dev_addr,
|
||||
pCopyBufferInfo->pRegions[i].dstOffset,
|
||||
pCopyBufferInfo->pRegions[i].size);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
}
|
||||
|
||||
transfer_cmd->src = src;
|
||||
transfer_cmd->dst = dst;
|
||||
transfer_cmd->region_count = pCopyBufferInfo->regionCount;
|
||||
memcpy(transfer_cmd->regions, pCopyBufferInfo->pRegions, regions_size);
|
||||
|
||||
pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -32,6 +32,7 @@
|
||||
#include "hwdef/rogue_hw_defs.h"
|
||||
#include "hwdef/rogue_hw_utils.h"
|
||||
#include "pvr_bo.h"
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_csb.h"
|
||||
#include "pvr_csb_enum_helpers.h"
|
||||
#include "pvr_device_info.h"
|
||||
@@ -604,37 +605,6 @@ err_csb_finish:
|
||||
return result;
|
||||
}
|
||||
|
||||
struct pvr_combined_image_sampler_descriptor {
|
||||
/* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
|
||||
uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
|
||||
union pvr_sampler_descriptor sampler;
|
||||
};
|
||||
|
||||
#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
|
||||
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
|
||||
(_size), \
|
||||
"Size of '" #_field_name "' in '" #_struct_type \
|
||||
"' differs from expected")
|
||||
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
|
||||
pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
|
||||
sizeof(uint32_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
|
||||
pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
|
||||
sizeof(uint32_t));
|
||||
|
||||
#undef CHECK_STRUCT_FIELD_SIZE
|
||||
|
||||
static VkResult pvr_setup_texture_state_words(
|
||||
struct pvr_device *device,
|
||||
struct pvr_combined_image_sampler_descriptor *descriptor,
|
||||
|
@@ -39,9 +39,11 @@
|
||||
* relevant for the driver/compiler interface (no Vulkan types).
|
||||
*/
|
||||
|
||||
#include "hwdef/rogue_hw_defs.h"
|
||||
#include "pvr_limits.h"
|
||||
#include "pvr_types.h"
|
||||
#include "util/list.h"
|
||||
#include "util/macros.h"
|
||||
#include "vk_object.h"
|
||||
#include "vk_sync.h"
|
||||
|
||||
@@ -146,6 +148,13 @@ enum pvr_stage_allocation {
|
||||
PVR_STAGE_ALLOCATION_COUNT
|
||||
};
|
||||
|
||||
enum pvr_filter {
|
||||
PVR_FILTER_DONTCARE, /* Any filtering mode is acceptable. */
|
||||
PVR_FILTER_POINT,
|
||||
PVR_FILTER_LINEAR,
|
||||
PVR_FILTER_BICUBIC,
|
||||
};
|
||||
|
||||
enum pvr_resolve_op {
|
||||
PVR_RESOLVE_BLEND,
|
||||
PVR_RESOLVE_MIN,
|
||||
@@ -202,6 +211,42 @@ union pvr_sampler_descriptor {
|
||||
} data;
|
||||
};
|
||||
|
||||
struct pvr_combined_image_sampler_descriptor {
|
||||
/* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
|
||||
uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
|
||||
union pvr_sampler_descriptor sampler;
|
||||
};
|
||||
|
||||
#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
|
||||
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
|
||||
(_size), \
|
||||
"Size of '" #_field_name "' in '" #_struct_type \
|
||||
"' differs from expected")
|
||||
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t));
|
||||
#if 0
|
||||
/* TODO: Don't really want to include pvr_csb.h in here since this header is
|
||||
* shared with the compiler. Figure out a better place for these.
|
||||
*/
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
|
||||
pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
|
||||
sizeof(uint32_t));
|
||||
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
|
||||
image,
|
||||
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
|
||||
pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
|
||||
sizeof(uint32_t));
|
||||
#endif
|
||||
|
||||
#undef CHECK_STRUCT_FIELD_SIZE
|
||||
|
||||
struct pvr_sampler {
|
||||
struct vk_object_base base;
|
||||
|
||||
|
@@ -1265,6 +1265,7 @@ static VkResult pvr_pds_idfwdf_programs_create_and_upload(
|
||||
0,
|
||||
shareds,
|
||||
shareds_buffer_addr.addr,
|
||||
false,
|
||||
dev_info);
|
||||
|
||||
/* DMA temp regs. */
|
||||
|
@@ -25,21 +25,25 @@
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include "hwdef/rogue_hw_utils.h"
|
||||
#include "pvr_bo.h"
|
||||
#include "pvr_cdm_load_sr.h"
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_csb.h"
|
||||
#include "pvr_job_context.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_transfer_frag_store.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_uscgen.h"
|
||||
#include "pvr_vdm_load_sr.h"
|
||||
#include "pvr_vdm_store_sr.h"
|
||||
#include "pvr_winsys.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/os_file.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_log.h"
|
||||
@@ -1259,15 +1263,30 @@ static void pvr_transfer_eot_shaders_fini(struct pvr_device *device,
|
||||
static VkResult pvr_transfer_ctx_shaders_init(struct pvr_device *device,
|
||||
struct pvr_transfer_ctx *ctx)
|
||||
{
|
||||
/* TODO: Setup USC fragments. */
|
||||
VkResult result;
|
||||
|
||||
return pvr_transfer_eot_shaders_init(device, ctx);
|
||||
result = pvr_transfer_frag_store_init(device, &ctx->frag_store);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_out;
|
||||
|
||||
result = pvr_transfer_eot_shaders_init(device, ctx);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_frag_store_fini;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_frag_store_fini:
|
||||
pvr_transfer_frag_store_fini(device, &ctx->frag_store);
|
||||
|
||||
err_out:
|
||||
return result;
|
||||
}
|
||||
|
||||
static void pvr_transfer_ctx_shaders_fini(struct pvr_device *device,
|
||||
struct pvr_transfer_ctx *ctx)
|
||||
{
|
||||
pvr_transfer_eot_shaders_fini(device, ctx);
|
||||
pvr_transfer_frag_store_fini(device, &ctx->frag_store);
|
||||
}
|
||||
|
||||
VkResult pvr_transfer_ctx_create(struct pvr_device *const device,
|
||||
|
@@ -24,8 +24,11 @@
|
||||
#ifndef PVR_JOB_CONTEXT_H
|
||||
#define PVR_JOB_CONTEXT_H
|
||||
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_transfer_frag_store.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_uscgen.h"
|
||||
#include "pvr_winsys.h"
|
||||
|
||||
/* Support PDS code/data loading/storing to the 'B' shared register state
|
||||
@@ -143,6 +146,8 @@ struct pvr_transfer_ctx {
|
||||
|
||||
struct pvr_winsys_transfer_ctx *ws_ctx;
|
||||
|
||||
struct pvr_transfer_frag_store frag_store;
|
||||
|
||||
struct pvr_bo *usc_eot_bos[PVR_TRANSFER_MAX_RENDER_TARGETS];
|
||||
|
||||
struct pvr_pds_upload pds_unitex_code[PVR_TRANSFER_MAX_TEXSTATE_DMA]
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -27,13 +27,23 @@
|
||||
#include <stdint.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
struct pvr_device;
|
||||
struct pvr_sub_cmd_transfer;
|
||||
struct pvr_transfer_ctx;
|
||||
struct vk_sync;
|
||||
|
||||
VkResult pvr_transfer_job_submit(struct pvr_device *device,
|
||||
struct pvr_transfer_ctx *ctx,
|
||||
/**
|
||||
* Destination pixels not covered by any of the destination rectangles but
|
||||
* inside the scissor are filled with the clear color.
|
||||
*/
|
||||
#define PVR_TRANSFER_CMD_FLAGS_FILL 0x00000800U
|
||||
/** If using TQ3D, route to fast2d. */
|
||||
#define PVR_TRANSFER_CMD_FLAGS_FAST2D 0x00200000U
|
||||
/** Merge a depth or stencil against a depth + stencil texture. */
|
||||
#define PVR_TRANSFER_CMD_FLAGS_DSMERGE 0x00000200U
|
||||
/** Valid if doing a DS merge with depth + stencil to depth + stencil. */
|
||||
#define PVR_TRANSFER_CMD_FLAGS_PICKD 0x00000400U
|
||||
|
||||
VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
|
||||
struct pvr_sub_cmd_transfer *sub_cmd,
|
||||
struct vk_sync *wait,
|
||||
struct vk_sync *signal_sync);
|
||||
|
@@ -58,6 +58,7 @@
|
||||
#include "util/macros.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_math.h"
|
||||
#include "vk_buffer.h"
|
||||
#include "vk_command_buffer.h"
|
||||
#include "vk_device.h"
|
||||
@@ -353,16 +354,118 @@ struct pvr_buffer_view {
|
||||
uint64_t texture_state[2];
|
||||
};
|
||||
|
||||
#define PVR_TRANSFER_MAX_CUSTOM_MAPPINGS 6U
|
||||
|
||||
/** A surface describes a source or destination for a transfer operation. */
|
||||
struct pvr_transfer_cmd_surface {
|
||||
pvr_dev_addr_t dev_addr;
|
||||
|
||||
/* Memory address for extra U/V planes. */
|
||||
pvr_dev_addr_t uv_address[2];
|
||||
|
||||
/* Surface width in texels. */
|
||||
uint32_t width;
|
||||
|
||||
/* Surface height in texels. */
|
||||
uint32_t height;
|
||||
|
||||
uint32_t depth;
|
||||
|
||||
/* Z position in a 3D tecture. 0.0f <= z_position <= depth. */
|
||||
float z_position;
|
||||
|
||||
/* Stride in texels. */
|
||||
uint32_t stride;
|
||||
|
||||
VkFormat vk_format;
|
||||
|
||||
enum pvr_memlayout mem_layout;
|
||||
|
||||
uint32_t sample_count;
|
||||
};
|
||||
|
||||
struct pvr_rect_mapping {
|
||||
VkRect2D src_rect;
|
||||
VkRect2D dst_rect;
|
||||
};
|
||||
|
||||
/* Describes an Alpha-Transparency configuration - for Transfer Queue Use. */
|
||||
struct pvr_transfer_alpha {
|
||||
enum pvr_alpha_type type;
|
||||
/* Global alpha value. */
|
||||
uint32_t global;
|
||||
|
||||
/* Custom blend op for rgb. */
|
||||
uint32_t custom_rgb;
|
||||
/* Custom blend op for alpha. */
|
||||
uint32_t custom_alpha;
|
||||
/* Custom global alpha value for alpha output. */
|
||||
uint32_t global2;
|
||||
/* Custom multiplication of global and source alpha. */
|
||||
bool glob_src_mul;
|
||||
/* Custom zero source alpha transparency stage. */
|
||||
bool zero_src_a_trans;
|
||||
|
||||
/* Enable argb1555 alpha components. */
|
||||
bool alpha_components;
|
||||
/* Source alpha value when argb1555 alpha bit is 0. */
|
||||
uint32_t component0;
|
||||
/* Source alpha value when argb1555 alpha bit is 1. */
|
||||
uint32_t component1;
|
||||
};
|
||||
|
||||
struct pvr_transfer_blit {
|
||||
/* 16 bit rop4 (ie two 8 bit rop3's). */
|
||||
uint32_t rop_code;
|
||||
|
||||
/* Color key mask. */
|
||||
uint32_t color_mask;
|
||||
|
||||
/* Alpha blend. */
|
||||
struct pvr_transfer_alpha alpha;
|
||||
|
||||
VkOffset2D offset;
|
||||
};
|
||||
|
||||
struct pvr_transfer_cmd {
|
||||
/* Node to link this cmd into the transfer_cmds list in
|
||||
* pvr_sub_cmd::transfer structure.
|
||||
*/
|
||||
struct list_head link;
|
||||
|
||||
struct pvr_buffer *src;
|
||||
struct pvr_buffer *dst;
|
||||
uint32_t region_count;
|
||||
VkBufferCopy2 regions[0];
|
||||
uint32_t flags;
|
||||
|
||||
struct pvr_transfer_cmd_surface src;
|
||||
bool src_present;
|
||||
|
||||
union fi clear_color[4];
|
||||
|
||||
struct pvr_transfer_cmd_surface dst;
|
||||
|
||||
VkRect2D scissor;
|
||||
|
||||
uint32_t mapping_count;
|
||||
struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
|
||||
|
||||
/* In the case of a simple 1:1 copy, this setting does not affect the output
|
||||
* but will affect performance. Use clamp to edge when possible.
|
||||
*/
|
||||
/* This is of type enum PVRX(TEXSTATE_ADDRMODE). */
|
||||
int addr_mode;
|
||||
|
||||
/* Source filtering method. */
|
||||
enum pvr_filter filter;
|
||||
|
||||
/* MSAA resolve operation. */
|
||||
enum pvr_resolve_op resolve_op;
|
||||
|
||||
struct pvr_transfer_blit blit;
|
||||
|
||||
/* Pointer to cmd buffer this transfer cmd belongs to. This is mainly used
|
||||
* to link buffer objects allocated during job submission into
|
||||
* cmd_buffer::bo_list head.
|
||||
*/
|
||||
struct pvr_cmd_buffer *cmd_buffer;
|
||||
};
|
||||
|
||||
struct pvr_sub_cmd_gfx {
|
||||
|
@@ -361,8 +361,7 @@ static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
|
||||
return result;
|
||||
|
||||
result =
|
||||
pvr_transfer_job_submit(device,
|
||||
queue->transfer_ctx,
|
||||
pvr_transfer_job_submit(queue->transfer_ctx,
|
||||
sub_cmd,
|
||||
queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
|
||||
sync);
|
||||
|
@@ -218,7 +218,8 @@ pvr_pack_tex_state(struct pvr_device *device,
|
||||
if (iview_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
|
||||
array_layers /= 6;
|
||||
|
||||
word1.depth = array_layers - 1;
|
||||
if (array_layers > 0)
|
||||
word1.depth = array_layers - 1;
|
||||
}
|
||||
|
||||
word1.texaddr = PVR_DEV_ADDR_OFFSET(info->addr, info->offset);
|
||||
|
392
src/imagination/vulkan/pvr_transfer_frag_store.c
Normal file
392
src/imagination/vulkan/pvr_transfer_frag_store.c
Normal file
@@ -0,0 +1,392 @@
|
||||
/*
|
||||
* Copyright © 2023 Imagination Technologies Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
#include "hwdef/rogue_hw_utils.h"
|
||||
#include "pvr_bo.h"
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_device_info.h"
|
||||
#include "pvr_job_transfer.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_transfer_frag_store.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_uscgen.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_math.h"
|
||||
#include "vk_log.h"
|
||||
|
||||
#define PVR_TRANSFER_BYTE_UNWIND_MAX 16U
|
||||
|
||||
struct pvr_transfer_frag_store_entry_data {
|
||||
pvr_dev_addr_t kick_usc_pds_offset;
|
||||
struct pvr_bo *kick_usc_pds_upload;
|
||||
|
||||
struct pvr_bo *usc_upload;
|
||||
struct pvr_tq_frag_sh_reg_layout sh_reg_layout;
|
||||
};
|
||||
|
||||
#define to_pvr_entry_data(_entry) \
|
||||
_Generic((_entry), \
|
||||
struct hash_entry *: (struct pvr_transfer_frag_store_entry_data *)((_entry)->data), \
|
||||
const struct hash_entry *: (const struct pvr_transfer_frag_store_entry_data *)((_entry)->data))
|
||||
|
||||
VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store)
|
||||
{
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
|
||||
*store = (struct pvr_transfer_frag_store){
|
||||
.max_multisample = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 1U),
|
||||
.hash_table = _mesa_hash_table_create_u32_keys(NULL),
|
||||
};
|
||||
|
||||
if (!store->hash_table)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Returns a key based on shader properties.
|
||||
*
|
||||
* Returns a unique key that can be used to uniquely identify a transfer
|
||||
* fragment shader based on the provided shader properties.
|
||||
*
|
||||
* Make sure that the non valid parts of shader_props are memset to 0. Otherwise
|
||||
* these bits might appear in the key as uninitialized data and might not
|
||||
* match a key for the same shader.
|
||||
*/
|
||||
static uint32_t pvr_transfer_frag_shader_key(
|
||||
uint32_t max_multisample,
|
||||
const struct pvr_tq_shader_properties *shader_props)
|
||||
{
|
||||
const struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
|
||||
uint32_t resolve_op_num = max_multisample + PVR_RESOLVE_SAMPLE0;
|
||||
|
||||
uint32_t num_layers_bits = util_logbase2_ceil(PVR_TRANSFER_MAX_LAYERS + 1U);
|
||||
uint32_t layer_float_bits = util_logbase2_ceil(PVR_INT_COORD_SET_FLOATS_NUM);
|
||||
uint32_t pixel_src_bits = util_logbase2_ceil(PVR_TRANSFER_PBE_PIXEL_SRC_NUM);
|
||||
uint32_t byte_unwind_bits = util_logbase2_ceil(PVR_TRANSFER_BYTE_UNWIND_MAX);
|
||||
uint32_t resolve_op_bits = util_logbase2_ceil(resolve_op_num);
|
||||
uint32_t sample_cnt_bits = util_last_bit(util_logbase2(max_multisample));
|
||||
uint32_t hash = 0U;
|
||||
|
||||
#if defined(DEBUG)
|
||||
uint32_t max_shift = 0U;
|
||||
# define shift_hash(hash, num) \
|
||||
do { \
|
||||
max_shift += (num); \
|
||||
assert(max_shift <= 32U); \
|
||||
\
|
||||
(hash) <<= (num); \
|
||||
} while (0U)
|
||||
#else
|
||||
# define shift_hash(hash, num) hash <<= (num)
|
||||
#endif
|
||||
|
||||
/* Hash layer info. */
|
||||
|
||||
shift_hash(hash, layer_float_bits);
|
||||
hash |= (uint32_t)shader_props->layer_props.layer_floats;
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= layer->sample;
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t) false;
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t) false;
|
||||
|
||||
shift_hash(hash, pixel_src_bits);
|
||||
hash |= (uint32_t)layer->pbe_format;
|
||||
|
||||
shift_hash(hash, resolve_op_bits);
|
||||
hash |= (uint32_t)layer->resolve_op;
|
||||
|
||||
assert(util_is_power_of_two_nonzero(layer->sample_count));
|
||||
shift_hash(hash, sample_cnt_bits);
|
||||
hash |= (uint32_t)util_logbase2(layer->sample_count);
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t)layer->msaa;
|
||||
|
||||
shift_hash(hash, byte_unwind_bits);
|
||||
hash |= layer->byte_unwind;
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t)layer->linear;
|
||||
|
||||
/* End layer info. */
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t)shader_props->full_rate;
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t)shader_props->iterated;
|
||||
|
||||
shift_hash(hash, 1U);
|
||||
hash |= (uint32_t)shader_props->pick_component;
|
||||
|
||||
shift_hash(hash, num_layers_bits);
|
||||
/* Just 1 layer. */
|
||||
hash |= 1;
|
||||
|
||||
shift_hash(hash, 3U);
|
||||
hash |= shader_props->alpha_type;
|
||||
|
||||
#undef shift_hash
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
#define to_hash_table_key(_key) ((void *)(uintptr_t)(_key))
|
||||
|
||||
static VkResult pvr_transfer_frag_store_entry_data_compile(
|
||||
struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store_entry_data *const entry_data,
|
||||
const struct pvr_tq_shader_properties *shader_props,
|
||||
uint32_t *const num_usc_temps_out)
|
||||
{
|
||||
const uint32_t image_desc_offset =
|
||||
offsetof(struct pvr_combined_image_sampler_descriptor, image) / 4;
|
||||
const uint32_t sampler_desc_offset =
|
||||
offsetof(struct pvr_combined_image_sampler_descriptor, sampler) / 4;
|
||||
|
||||
const uint32_t cache_line_size =
|
||||
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
||||
|
||||
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout;
|
||||
uint32_t next_free_sh_reg = 0;
|
||||
struct util_dynarray shader;
|
||||
VkResult result;
|
||||
|
||||
/* TODO: Allocate all combined image samplers if needed? Otherwise change the
|
||||
* array to a single descriptor.
|
||||
*/
|
||||
sh_reg_layout->combined_image_samplers.offsets[0].image =
|
||||
next_free_sh_reg + image_desc_offset;
|
||||
sh_reg_layout->combined_image_samplers.offsets[0].sampler =
|
||||
next_free_sh_reg + sampler_desc_offset;
|
||||
sh_reg_layout->combined_image_samplers.count = 1;
|
||||
next_free_sh_reg += sizeof(struct pvr_combined_image_sampler_descriptor) / 4;
|
||||
|
||||
/* TODO: Handle dynamic_const_regs used for PVR_INT_COORD_SET_FLOATS_{4,6}, Z
|
||||
* position, texel unwind, etc. when compiler adds support for them.
|
||||
*/
|
||||
sh_reg_layout->dynamic_consts.offset = next_free_sh_reg;
|
||||
sh_reg_layout->dynamic_consts.count = 0;
|
||||
|
||||
sh_reg_layout->driver_total = next_free_sh_reg;
|
||||
|
||||
pvr_uscgen_tq_frag(shader_props,
|
||||
&entry_data->sh_reg_layout,
|
||||
num_usc_temps_out,
|
||||
&shader);
|
||||
|
||||
result = pvr_gpu_upload_usc(device,
|
||||
util_dynarray_begin(&shader),
|
||||
util_dynarray_num_elements(&shader, uint8_t),
|
||||
cache_line_size,
|
||||
&entry_data->usc_upload);
|
||||
util_dynarray_fini(&shader);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult pvr_transfer_frag_store_entry_data_create(
|
||||
struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store,
|
||||
const struct pvr_tq_shader_properties *shader_props,
|
||||
const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
|
||||
{
|
||||
struct pvr_pds_kickusc_program kick_usc_pds_prog = { 0 };
|
||||
struct pvr_transfer_frag_store_entry_data *entry_data;
|
||||
pvr_dev_addr_t dev_addr;
|
||||
uint32_t num_usc_temps;
|
||||
VkResult result;
|
||||
|
||||
entry_data = ralloc(store->hash_table, __typeof__(*entry_data));
|
||||
if (!entry_data)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = pvr_transfer_frag_store_entry_data_compile(device,
|
||||
entry_data,
|
||||
shader_props,
|
||||
&num_usc_temps);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_entry;
|
||||
|
||||
dev_addr = entry_data->usc_upload->vma->dev_addr;
|
||||
dev_addr.addr -= device->heaps.usc_heap->base_addr.addr;
|
||||
|
||||
pvr_pds_setup_doutu(&kick_usc_pds_prog.usc_task_control,
|
||||
dev_addr.addr,
|
||||
num_usc_temps,
|
||||
shader_props->full_rate
|
||||
? PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL)
|
||||
: PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
|
||||
false);
|
||||
|
||||
pvr_pds_kick_usc(&kick_usc_pds_prog, NULL, 0U, false, PDS_GENERATE_SIZES);
|
||||
|
||||
result = pvr_bo_alloc(
|
||||
device,
|
||||
device->heaps.pds_heap,
|
||||
(kick_usc_pds_prog.data_size + kick_usc_pds_prog.code_size) * 4,
|
||||
16,
|
||||
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
||||
&entry_data->kick_usc_pds_upload);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_usc_upload;
|
||||
|
||||
pvr_pds_kick_usc(&kick_usc_pds_prog,
|
||||
entry_data->kick_usc_pds_upload->bo->map,
|
||||
0U,
|
||||
false,
|
||||
PDS_GENERATE_CODEDATA_SEGMENTS);
|
||||
|
||||
dev_addr = entry_data->kick_usc_pds_upload->vma->dev_addr;
|
||||
dev_addr.addr -= device->heaps.pds_heap->base_addr.addr;
|
||||
entry_data->kick_usc_pds_offset = dev_addr;
|
||||
|
||||
*entry_data_out = entry_data;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_free_usc_upload:
|
||||
pvr_bo_free(device, entry_data->usc_upload);
|
||||
|
||||
err_free_entry:
|
||||
ralloc_free(entry_data);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void inline pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
|
||||
struct pvr_device *device,
|
||||
const struct pvr_transfer_frag_store_entry_data *entry_data)
|
||||
{
|
||||
pvr_bo_free(device, entry_data->kick_usc_pds_upload);
|
||||
pvr_bo_free(device, entry_data->usc_upload);
|
||||
}
|
||||
|
||||
static void inline pvr_transfer_frag_store_entry_data_destroy(
|
||||
struct pvr_device *device,
|
||||
const struct pvr_transfer_frag_store_entry_data *entry_data)
|
||||
{
|
||||
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(device,
|
||||
entry_data);
|
||||
/* Casting away the const :( */
|
||||
ralloc_free((void *)entry_data);
|
||||
}
|
||||
|
||||
static VkResult pvr_transfer_frag_store_get_entry(
|
||||
struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store,
|
||||
const struct pvr_tq_shader_properties *shader_props,
|
||||
const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
|
||||
{
|
||||
const uint32_t key =
|
||||
pvr_transfer_frag_shader_key(store->max_multisample, shader_props);
|
||||
const struct hash_entry *entry;
|
||||
VkResult result;
|
||||
|
||||
entry = _mesa_hash_table_search(store->hash_table, to_hash_table_key(key));
|
||||
if (!entry) {
|
||||
/* Init so that gcc stops complaining. */
|
||||
const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
|
||||
|
||||
result = pvr_transfer_frag_store_entry_data_create(device,
|
||||
store,
|
||||
shader_props,
|
||||
&entry_data);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
assert(entry_data);
|
||||
|
||||
entry = _mesa_hash_table_insert(store->hash_table,
|
||||
to_hash_table_key(key),
|
||||
(void *)entry_data);
|
||||
if (!entry) {
|
||||
pvr_transfer_frag_store_entry_data_destroy(device, entry_data);
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
}
|
||||
|
||||
*entry_data_out = to_pvr_entry_data(entry);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult pvr_transfer_frag_store_get_shader_info(
|
||||
struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store,
|
||||
const struct pvr_tq_shader_properties *shader_props,
|
||||
pvr_dev_addr_t *const pds_dev_addr_out,
|
||||
const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out)
|
||||
{
|
||||
/* Init so that gcc stops complaining. */
|
||||
const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
|
||||
VkResult result;
|
||||
|
||||
result = pvr_transfer_frag_store_get_entry(device,
|
||||
store,
|
||||
shader_props,
|
||||
&entry_data);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
*pds_dev_addr_out = entry_data->kick_usc_pds_offset;
|
||||
*reg_layout_out = &entry_data->sh_reg_layout;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void pvr_transfer_frag_store_fini(struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store)
|
||||
{
|
||||
hash_table_foreach_remove(store->hash_table, entry)
|
||||
{
|
||||
/* ralloc_free() in _mesa_hash_table_destroy() will free each entry's
|
||||
* memory so let's not waste extra time freeing them one by one and
|
||||
* unliking.
|
||||
*/
|
||||
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
|
||||
device,
|
||||
to_pvr_entry_data(entry));
|
||||
}
|
||||
|
||||
_mesa_hash_table_destroy(store->hash_table, NULL);
|
||||
}
|
57
src/imagination/vulkan/pvr_transfer_frag_store.h
Normal file
57
src/imagination/vulkan/pvr_transfer_frag_store.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright © 2023 Imagination Technologies Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef PVR_TRANSFER_FRAG_STORE_H
|
||||
#define PVR_TRANSFER_FRAG_STORE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
#include "pvr_device_info.h"
|
||||
#include "pvr_uscgen.h"
|
||||
#include "pvr_types.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
struct pvr_device;
|
||||
|
||||
struct pvr_transfer_frag_store {
|
||||
uint32_t max_multisample;
|
||||
/* Hash table mapping keys, produced by pvr_transfer_frag_shader_key(), to
|
||||
* pvr_transfer_frag_store_entry_data entries.
|
||||
*/
|
||||
struct hash_table *hash_table;
|
||||
};
|
||||
|
||||
VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store);
|
||||
void pvr_transfer_frag_store_fini(struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store);
|
||||
|
||||
VkResult pvr_transfer_frag_store_get_shader_info(
|
||||
struct pvr_device *device,
|
||||
struct pvr_transfer_frag_store *store,
|
||||
const struct pvr_tq_shader_properties *shader_props,
|
||||
pvr_dev_addr_t *const pds_dev_addr_out,
|
||||
const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out);
|
||||
|
||||
#endif /* PVR_TRANSFER_FRAG_STORE_H */
|
@@ -88,4 +88,45 @@ static inline bool vk_format_is_normalized(VkFormat vk_format)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
vk_format_get_common_color_channel_count(VkFormat src_format,
|
||||
VkFormat dst_format)
|
||||
{
|
||||
const struct util_format_description *dst_desc =
|
||||
vk_format_description(dst_format);
|
||||
const struct util_format_description *src_desc =
|
||||
vk_format_description(src_format);
|
||||
uint32_t count = 0;
|
||||
|
||||
/* Check if destination format is alpha only and source format has alpha
|
||||
* channel.
|
||||
*/
|
||||
if (util_format_is_alpha(vk_format_to_pipe_format(dst_format))) {
|
||||
count = 1;
|
||||
} else if (dst_desc->nr_channels <= src_desc->nr_channels) {
|
||||
for (uint32_t i = 0; i < dst_desc->nr_channels; i++) {
|
||||
enum pipe_swizzle swizzle = dst_desc->swizzle[i];
|
||||
|
||||
if (swizzle > PIPE_SWIZZLE_W)
|
||||
continue;
|
||||
|
||||
for (uint32_t j = 0; j < src_desc->nr_channels; j++) {
|
||||
if (src_desc->swizzle[j] == swizzle) {
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
count = dst_desc->nr_channels;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline bool vk_format_is_alpha(VkFormat format)
|
||||
{
|
||||
return util_format_is_alpha(vk_format_to_pipe_format(format));
|
||||
}
|
||||
|
||||
#endif /* VK_FORMAT_H */
|
||||
|
@@ -258,6 +258,7 @@ struct pvr_winsys_transfer_regs {
|
||||
uint32_t event_pixel_pds_code;
|
||||
uint32_t event_pixel_pds_data;
|
||||
uint32_t event_pixel_pds_info;
|
||||
uint32_t frag_screen;
|
||||
uint32_t isp_aa;
|
||||
uint32_t isp_bgobjvals;
|
||||
uint32_t isp_ctl;
|
||||
|
Reference in New Issue
Block a user