pvr: Add support to process transfer and blit cmds

Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Co-authored-by: Matt Coster <matt.coster@imgtec.com>
Co-authored-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21550>
This commit is contained in:
Rajnesh Kanwal
2022-05-17 17:19:31 +01:00
committed by Marge Bot
parent 1cdd0ccb37
commit 480bdff4b5
24 changed files with 5907 additions and 125 deletions

View File

@@ -53,6 +53,8 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_num_clusters = true,
.has_num_raster_pipes = true,
.has_num_user_clip_planes = true,
.has_pbe_filterable_f16 = true,
.has_pbe_yuv = true,
.has_slc_cache_line_size_bits = true,
.has_slc_mcu_cache_controls = true,
.has_tf_bicubic_filter = true,
@@ -96,6 +98,7 @@ const struct pvr_device_enhancements pvr_device_enhancements_4_40_2_51 = {
.has_ern35421 = true,
.has_ern38020 = true,
.has_ern38748 = true,
.has_ern42064 = true,
.has_ern42307 = true,
};
@@ -126,6 +129,7 @@ const struct pvr_device_ident pvr_device_ident_33_V_11_3 = {
const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_common_store_size_in_dwords = true,
.has_compute = true,
.has_ipf_creq_pf = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
.has_max_instances_per_pds_task = true,
@@ -136,6 +140,8 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_num_raster_pipes = true,
.has_num_user_clip_planes = true,
.has_pbe2_in_xe = true,
.has_pbe_filterable_f16 = true,
.has_pbe_yuv = true,
.has_roguexe = true,
.has_screen_size8K = true,
.has_simple_internal_parameter_format = true,
@@ -205,6 +211,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_compute_overlap = true,
.has_gpu_multicore_support = true,
.has_gs_rta_support = true,
.has_ipf_creq_pf = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
.has_max_instances_per_pds_task = true,
@@ -216,6 +223,8 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_num_user_clip_planes = true,
.has_paired_tiles = true,
.has_pbe2_in_xe = true,
.has_pbe_filterable_f16 = true,
.has_pbe_yuv = true,
.has_pds_ddmadt = true,
.has_roguexe = true,
.has_screen_size8K = true,

View File

@@ -257,6 +257,7 @@ struct pvr_device_features {
bool has_eight_output_registers : 1;
bool has_gpu_multicore_support : 1;
bool has_gs_rta_support : 1;
bool has_ipf_creq_pf : 1;
bool has_isp_max_tiles_in_flight : 1;
bool has_isp_samples_per_pixel : 1;
bool has_max_instances_per_pds_task : 1;
@@ -268,10 +269,13 @@ struct pvr_device_features {
bool has_num_user_clip_planes : 1;
bool has_paired_tiles : 1;
bool has_pbe2_in_xe : 1;
bool has_pbe_filterable_f16 : 1;
bool has_pbe_yuv : 1;
bool has_pds_ddmadt : 1;
bool has_roguexe : 1;
bool has_screen_size8K : 1;
bool has_simple_internal_parameter_format : 1;
bool has_simple_internal_parameter_format_v1 : 1;
bool has_simple_internal_parameter_format_v2 : 1;
bool has_simple_parameter_format_version : 1;
bool has_slc_cache_line_size_bits : 1;
@@ -327,6 +331,7 @@ struct pvr_device_enhancements {
bool has_ern35421 : 1;
bool has_ern38020 : 1;
bool has_ern38748 : 1;
bool has_ern42064 : 1;
bool has_ern42307 : 1;
bool has_ern45493 : 1;
};

View File

@@ -27,9 +27,18 @@
#include <assert.h>
#include <stdint.h>
#include "pvr_types.h"
#include "util/bitscan.h"
#include "util/macros.h"
static inline bool pvr_dev_addr_is_aligned(pvr_dev_addr_t addr,
const uint32_t alignment)
{
assert(util_is_power_of_two_nonzero(alignment));
return ((uintptr_t)(addr.addr) & (alignment - 1)) == 0;
}
static inline bool ptr_is_aligned(const void *const ptr,
const uint32_t alignment)
{

View File

@@ -499,7 +499,7 @@ SOFTWARE.
</struct>
<struct name="ISP_AA" length="1">
<field name="mode" start="0" end="1" type="ISP_AA_MODE_TYPE"/>
<field name="mode" start="0" end="1" type="ISP_AA_MODE_TYPE" default="AA_NONE"/>
</struct>
<struct name="ISP_CTL" length="1">

View File

@@ -26,6 +26,40 @@ SOFTWARE.
<csbgen name="ROGUE" prefix="IPF">
<define name="TILE_SIZE_PIXELS" value="32"/>
<define name="CONTROL_STREAM_SIZE_DWORDS" value="32"/>
<define name="ISP_VERTEX_XY_BIAS_VALUE" value="4096"/>
<enum name="COMPRESSION_FORMAT">
<value name="UNIQUE_1" value="0"/>
<value name="ORIGIN_1_DELTA_1" value="1"/>
<value name="ORIGIN_1_DELTA_2" value="2"/>
<value name="ORIGIN_1_DELTA_3" value="3"/>
<value name="ORIGIN_1_DELTA_4" value="4"/>
<value name="ORIGIN_1_DELTA_5" value="5"/>
<value name="ORIGIN_1_DELTA_6" value="6"/>
<value name="ORIGIN_1_DELTA_7" value="7"/>
<value name="RAW_BYTE" value="8"/>
<value name="UNIQUE_2" value="9"/>
<value name="ORIGIN_2_DELTA_1" value="10"/>
<value name="ORIGIN_2_DELTA_2" value="11"/>
<value name="ORIGIN_2_DELTA_3" value="12"/>
<value name="ORIGIN_2_DELTA_4" value="13"/>
<value name="ORIGIN_2_DELTA_5" value="14"/>
<value name="ORIGIN_2_DELTA_6" value="15"/>
</enum>
<enum name="CS_MASK_FMT">
<value name="INDEX" value="0"/>
<value name="BYTE" value="1"/>
<value name="BIT" value="2"/>
<value name="FULL" value="3"/>
</enum>
<enum name="CS_TYPE">
<value name="PRIM" value="0"/>
<value name="LINK" value="2"/>
<value name="TERM" value="3"/>
</enum>
<struct name="SCISSOR_WORD_0" length="1">
<field name="scw0_xmin" start="16" end="31" type="uint"/>
@@ -37,4 +71,100 @@ SOFTWARE.
<field name="scw1_ymax" start="0" end="15" type="uint"/>
</struct>
<struct name="CONTROL_STREAM" length="1">
<field name="cs_type" start="30" end="31" type="CS_TYPE"/>
<field name="cs_link" start="3" end="29" type="uint">
<define name="UNIT_SIZE" value="128"/>
</field>
</struct>
<struct name="PRIMITIVE_FORMAT" length="1">
<field name="cs_type" start="30" end="31" type="CS_TYPE"/>
<field name="cs_isp_state_read" start="29" end="29" type="bool"/>
<field name="cs_isp_state_size" start="26" end="28" type="uint"/>
<field name="cs_prim_total" start="19" end="25" type="uint"/>
<field name="cs_mask_fmt" start="17" end="18" type="CS_MASK_FMT"/>
<field name="cs_prim_base_pres" start="16" end="16" type="bool"/>
<field name="cs_prim_base_offset" start="0" end="15" type="uint"/>
</struct>
<struct name="PRIMITIVE_BASE" length="1">
<field name="cs_prim_base" start="0" end="31" shift="2" type="address"/>
</struct>
<struct name="COMPRESSION_SIZE_WORD" length="1">
<field name="cs_isp_comp_table_size" start="27" end="31" type="uint"/>
<field name="cs_tsp_comp_format_size" start="21" end="26" type="uint"/>
<field name="cs_tsp_comp_table_size" start="9" end="18" type="uint"/>
<field name="cs_tsp_comp_vertex_size" start="0" end="8" type="uint"/>
</struct>
<struct name="ISP_COMPRESSION_WORD_0" length="1">
<field name="cf_isp_comp_fmt_z1" start="28" end="31" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_z0" start="24" end="27" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_y2" start="20" end="23" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_y1" start="16" end="19" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_y0" start="12" end="15" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_x2" start="8" end="11" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_x1" start="4" end="7" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_x0" start="0" end="3" type="COMPRESSION_FORMAT"/>
</struct>
<struct name="ISP_COMPRESSION_WORD_1" length="1">
<field name="vf_prim_msaa" start="16" end="16" type="bool"/>
<field name="vf_prim_id_pres" start="15" end="15" type="bool"/>
<field name="vf_vertex_clipped" start="14" end="14" type="bool"/>
<field name="vf_vertex_total" start="8" end="13" type="uint"/>
<field name="cf_isp_comp_fmt_z3" start="4" end="7" type="COMPRESSION_FORMAT"/>
<field name="cf_isp_comp_fmt_z2" start="0" end="3" type="COMPRESSION_FORMAT"/>
</struct>
<struct name="INDEX_DATA" length="1">
<field name="ix_edge_flag1_ab" start="30" end="30" type="bool"/>
<field name="ix_index1_0" start="24" end="29" type="uint"/>
<field name="ix_bf_flag0" start="23" end="23" type="bool"/>
<field name="ix_edge_flag0_ca" start="22" end="22" type="bool"/>
<field name="ix_index0_2" start="16" end="21" type="uint"/>
<field name="ix_edge_flag0_bc" start="14" end="14" type="bool"/>
<field name="ix_index0_1" start="8" end="13" type="uint"/>
<field name="ix_edge_flag0_ab" start="6" end="6" type="bool"/>
<field name="ix_index0_0" start="0" end="5" type="uint"/>
</struct>
<struct name="ISP_VERTEX_XY" length="1">
<field name="sign" start="23" end="23" type="bool"/>
<field name="integer" start="8" end="22" type="uint"/>
<field name="frac" start="0" end="7" type="uint"/>
</struct>
<struct name="ISP_VERTEX_WORD_0" length="1">
<field name="y0" start="24" end="31" type="uint"/>
<field name="x0" start="0" end="23" type="uint"/>
</struct>
<struct name="ISP_VERTEX_WORD_1" length="1">
<field name="z0" start="16" end="31" type="uint"/>
<field name="y0" start="0" end="15" type="uint">
<define name="SHIFT" value="8"/>
</field>
</struct>
<struct name="ISP_VERTEX_WORD_2" length="1">
<field name="x1" start="16" end="31" type="uint"/>
<field name="z0" start="0" end="15" type="uint">
<define name="SHIFT" value="16"/>
</field>
</struct>
<struct name="ISP_VERTEX_WORD_3" length="1">
<field name="y1" start="8" end="31" type="uint"/>
<field name="x1" start="0" end="7" type="uint">
<define name="SHIFT" value="16"/>
</field>
</struct>
<struct name="ISP_VERTEX_WORD_4" length="1">
<field name="z1" start="0" end="31" type="uint"/>
</struct>
</csbgen>

View File

@@ -125,6 +125,16 @@
*/
#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
/* Size of the image state in 64-bit units. */
#define ROGUE_MAXIMUM_IMAGE_STATE_SIZE_IN_ULONGLONGS 2U
/* Size of the image state in dwords. The last 64-bit word is optional for
* non-YUV textures.
*/
#define ROGUE_MAXIMUM_IMAGE_STATE_SIZE \
(ROGUE_MAXIMUM_IMAGE_STATE_SIZE_IN_ULONGLONGS * \
(sizeof(uint64_t) / sizeof(uint32_t)))
#define PVR_NUM_PBE_EMIT_REGS 8U
#endif /* ROGUE_HW_DEFS_H */

View File

@@ -57,6 +57,7 @@ pvr_files = files(
'pvr_pass.c',
'pvr_pipeline.c',
'pvr_pipeline_cache.c',
'pvr_transfer_frag_store.c',
'pvr_query.c',
'pvr_query_compute.c',
'pvr_queue.c',

View File

@@ -433,22 +433,18 @@ void pvr_pds_pixel_shader_sa_initialize(
* \param dest_offset Destination offset in the attribute.
* \param dma_size The size of the DMA in words.
* \param src_address Source address for the burst.
* \param last Last DMA in program.
* \param dev_info PVR device info structure.
* \returns The number of DMA transfers required.
*/
uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
uint64_t *dma_address,
uint32_t dest_offset,
uint32_t dma_size,
uint64_t src_address,
bool last,
const struct pvr_device_info *dev_info)
{
/* Simplified for MS2. */
/* Force to 1 DMA. */
const uint32_t num_kicks = 1;
dma_control[0] = dma_size
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
dma_control[0] |= dest_offset
@@ -457,12 +453,15 @@ uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
dma_address[0] = src_address;
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
}
if (last)
dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
return num_kicks;
dma_address[0] = src_address;
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
/* Force to 1 DMA. */
return 1;
}
/* FIXME: use the csbgen interface and pvr_csb_pack.

View File

@@ -624,6 +624,7 @@ uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
uint32_t dest_offset,
uint32_t dma_size,
uint64_t src_address,
bool last,
const struct pvr_device_info *dev_info);
void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,

View File

@@ -28,6 +28,7 @@
#include "pvr_clear.h"
#include "pvr_csb.h"
#include "pvr_formats.h"
#include "pvr_job_transfer.h"
#include "pvr_private.h"
#include "pvr_shader_factory.h"
#include "pvr_static_shaders.h"
@@ -114,35 +115,161 @@ void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
assert(!"Unimplemented");
}
static struct pvr_transfer_cmd *
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
{
struct pvr_transfer_cmd *transfer_cmd;
transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
sizeof(*transfer_cmd),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!transfer_cmd) {
cmd_buffer->state.status =
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
return NULL;
}
/* transfer_cmd->mapping_count is already set to zero. */
transfer_cmd->filter = PVR_FILTER_POINT;
transfer_cmd->resolve_op = PVR_RESOLVE_BLEND;
transfer_cmd->addr_mode = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
transfer_cmd->cmd_buffer = cmd_buffer;
return transfer_cmd;
}
static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
VkRect2D *rect,
pvr_dev_addr_t dev_addr,
VkDeviceSize offset,
VkFormat vk_format,
uint32_t width,
uint32_t height)
{
surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
surface->width = width;
surface->height = height;
surface->stride = width;
surface->vk_format = vk_format;
surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
surface->sample_count = 1;
/* Initialize rectangle extent. Also, rectangle.offset should be set to
* zero, as the offset is already adjusted in the device address above. We
* don't explicitly set offset to zero as transfer_cmd is zero allocated.
*/
rect->extent.width = width;
rect->extent.height = height;
}
static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
pvr_dev_addr_t src_addr,
VkDeviceSize src_offset,
pvr_dev_addr_t dst_addr,
VkDeviceSize dst_offset,
VkDeviceSize size)
{
VkDeviceSize offset = 0;
while (offset < size) {
VkDeviceSize remaining_size = size - offset;
struct pvr_transfer_cmd *transfer_cmd;
uint32_t texel_width;
VkDeviceSize texels;
VkFormat vk_format;
VkResult result;
uint32_t height;
uint32_t width;
if (remaining_size >= 16U) {
vk_format = VK_FORMAT_R32G32B32A32_UINT;
texel_width = 16U;
} else if (remaining_size >= 4U) {
vk_format = VK_FORMAT_R32_UINT;
texel_width = 4U;
} else {
vk_format = VK_FORMAT_R8_UINT;
texel_width = 1U;
}
texels = remaining_size / texel_width;
/* Try to do max-width rects, fall back to a 1-height rect for the
* remainder.
*/
if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
} else {
width = texels;
height = 1;
}
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
if (!transfer_cmd)
return VK_ERROR_OUT_OF_HOST_MEMORY;
if (!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL)) {
pvr_setup_buffer_surface(&transfer_cmd->src,
&transfer_cmd->mappings[0].src_rect,
src_addr,
offset + src_offset,
vk_format,
width,
height);
transfer_cmd->src_present = true;
}
pvr_setup_buffer_surface(&transfer_cmd->dst,
&transfer_cmd->scissor,
dst_addr,
offset + dst_offset,
vk_format,
width,
height);
if (transfer_cmd->src_present)
transfer_cmd->mappings[0].dst_rect = transfer_cmd->scissor;
transfer_cmd->mapping_count++;
transfer_cmd->cmd_buffer = cmd_buffer;
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
return result;
}
offset += width * height * texel_width;
}
return VK_SUCCESS;
}
void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
const VkCopyBufferInfo2 *pCopyBufferInfo)
{
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
const size_t regions_size =
pCopyBufferInfo->regionCount * sizeof(*pCopyBufferInfo->pRegions);
struct pvr_transfer_cmd *transfer_cmd;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
transfer_cmd = vk_alloc(&cmd_buffer->vk.pool->alloc,
sizeof(*transfer_cmd) + regions_size,
8U,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!transfer_cmd) {
cmd_buffer->state.status =
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
VkResult result;
return;
result =
pvr_cmd_copy_buffer_region(cmd_buffer,
src->dev_addr,
pCopyBufferInfo->pRegions[i].srcOffset,
dst->dev_addr,
pCopyBufferInfo->pRegions[i].dstOffset,
pCopyBufferInfo->pRegions[i].size);
if (result != VK_SUCCESS)
return;
}
transfer_cmd->src = src;
transfer_cmd->dst = dst;
transfer_cmd->region_count = pCopyBufferInfo->regionCount;
memcpy(transfer_cmd->regions, pCopyBufferInfo->pRegions, regions_size);
pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
}
/**

View File

@@ -32,6 +32,7 @@
#include "hwdef/rogue_hw_defs.h"
#include "hwdef/rogue_hw_utils.h"
#include "pvr_bo.h"
#include "pvr_common.h"
#include "pvr_csb.h"
#include "pvr_csb_enum_helpers.h"
#include "pvr_device_info.h"
@@ -604,37 +605,6 @@ err_csb_finish:
return result;
}
struct pvr_combined_image_sampler_descriptor {
/* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
union pvr_sampler_descriptor sampler;
};
#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
(_size), \
"Size of '" #_field_name "' in '" #_struct_type \
"' differs from expected")
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
sizeof(uint32_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
sizeof(uint32_t));
#undef CHECK_STRUCT_FIELD_SIZE
static VkResult pvr_setup_texture_state_words(
struct pvr_device *device,
struct pvr_combined_image_sampler_descriptor *descriptor,

View File

@@ -39,9 +39,11 @@
* relevant for the driver/compiler interface (no Vulkan types).
*/
#include "hwdef/rogue_hw_defs.h"
#include "pvr_limits.h"
#include "pvr_types.h"
#include "util/list.h"
#include "util/macros.h"
#include "vk_object.h"
#include "vk_sync.h"
@@ -146,6 +148,13 @@ enum pvr_stage_allocation {
PVR_STAGE_ALLOCATION_COUNT
};
enum pvr_filter {
PVR_FILTER_DONTCARE, /* Any filtering mode is acceptable. */
PVR_FILTER_POINT,
PVR_FILTER_LINEAR,
PVR_FILTER_BICUBIC,
};
enum pvr_resolve_op {
PVR_RESOLVE_BLEND,
PVR_RESOLVE_MIN,
@@ -202,6 +211,42 @@ union pvr_sampler_descriptor {
} data;
};
struct pvr_combined_image_sampler_descriptor {
/* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
union pvr_sampler_descriptor sampler;
};
#define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
(_size), \
"Size of '" #_field_name "' in '" #_struct_type \
"' differs from expected")
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
PVR_IMAGE_DESCRIPTOR_SIZE * sizeof(uint32_t));
#if 0
/* TODO: Don't really want to include pvr_csb.h in here since this header is
* shared with the compiler. Figure out a better place for these.
*/
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
sizeof(uint32_t));
CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
image,
(pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
sizeof(uint32_t));
#endif
#undef CHECK_STRUCT_FIELD_SIZE
struct pvr_sampler {
struct vk_object_base base;

View File

@@ -1265,6 +1265,7 @@ static VkResult pvr_pds_idfwdf_programs_create_and_upload(
0,
shareds,
shareds_buffer_addr.addr,
false,
dev_info);
/* DMA temp regs. */

View File

@@ -25,21 +25,25 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <vulkan/vulkan.h>
#include "hwdef/rogue_hw_utils.h"
#include "pvr_bo.h"
#include "pvr_cdm_load_sr.h"
#include "pvr_common.h"
#include "pvr_csb.h"
#include "pvr_job_context.h"
#include "pvr_pds.h"
#include "pvr_private.h"
#include "pvr_transfer_frag_store.h"
#include "pvr_types.h"
#include "pvr_uscgen.h"
#include "pvr_vdm_load_sr.h"
#include "pvr_vdm_store_sr.h"
#include "pvr_winsys.h"
#include "util/macros.h"
#include "util/os_file.h"
#include "util/u_dynarray.h"
#include "vk_alloc.h"
#include "vk_log.h"
@@ -1259,15 +1263,30 @@ static void pvr_transfer_eot_shaders_fini(struct pvr_device *device,
static VkResult pvr_transfer_ctx_shaders_init(struct pvr_device *device,
struct pvr_transfer_ctx *ctx)
{
/* TODO: Setup USC fragments. */
VkResult result;
return pvr_transfer_eot_shaders_init(device, ctx);
result = pvr_transfer_frag_store_init(device, &ctx->frag_store);
if (result != VK_SUCCESS)
goto err_out;
result = pvr_transfer_eot_shaders_init(device, ctx);
if (result != VK_SUCCESS)
goto err_frag_store_fini;
return VK_SUCCESS;
err_frag_store_fini:
pvr_transfer_frag_store_fini(device, &ctx->frag_store);
err_out:
return result;
}
static void pvr_transfer_ctx_shaders_fini(struct pvr_device *device,
struct pvr_transfer_ctx *ctx)
{
pvr_transfer_eot_shaders_fini(device, ctx);
pvr_transfer_frag_store_fini(device, &ctx->frag_store);
}
VkResult pvr_transfer_ctx_create(struct pvr_device *const device,

View File

@@ -24,8 +24,11 @@
#ifndef PVR_JOB_CONTEXT_H
#define PVR_JOB_CONTEXT_H
#include "pvr_common.h"
#include "pvr_private.h"
#include "pvr_transfer_frag_store.h"
#include "pvr_types.h"
#include "pvr_uscgen.h"
#include "pvr_winsys.h"
/* Support PDS code/data loading/storing to the 'B' shared register state
@@ -143,6 +146,8 @@ struct pvr_transfer_ctx {
struct pvr_winsys_transfer_ctx *ws_ctx;
struct pvr_transfer_frag_store frag_store;
struct pvr_bo *usc_eot_bos[PVR_TRANSFER_MAX_RENDER_TARGETS];
struct pvr_pds_upload pds_unitex_code[PVR_TRANSFER_MAX_TEXSTATE_DMA]

File diff suppressed because it is too large Load Diff

View File

@@ -27,13 +27,23 @@
#include <stdint.h>
#include <vulkan/vulkan.h>
struct pvr_device;
struct pvr_sub_cmd_transfer;
struct pvr_transfer_ctx;
struct vk_sync;
VkResult pvr_transfer_job_submit(struct pvr_device *device,
struct pvr_transfer_ctx *ctx,
/**
* Destination pixels not covered by any of the destination rectangles but
* inside the scissor are filled with the clear color.
*/
#define PVR_TRANSFER_CMD_FLAGS_FILL 0x00000800U
/** If using TQ3D, route to fast2d. */
#define PVR_TRANSFER_CMD_FLAGS_FAST2D 0x00200000U
/** Merge a depth or stencil against a depth + stencil texture. */
#define PVR_TRANSFER_CMD_FLAGS_DSMERGE 0x00000200U
/** Valid if doing a DS merge with depth + stencil to depth + stencil. */
#define PVR_TRANSFER_CMD_FLAGS_PICKD 0x00000400U
VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
struct pvr_sub_cmd_transfer *sub_cmd,
struct vk_sync *wait,
struct vk_sync *signal_sync);

View File

@@ -58,6 +58,7 @@
#include "util/macros.h"
#include "util/simple_mtx.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "vk_buffer.h"
#include "vk_command_buffer.h"
#include "vk_device.h"
@@ -353,16 +354,118 @@ struct pvr_buffer_view {
uint64_t texture_state[2];
};
#define PVR_TRANSFER_MAX_CUSTOM_MAPPINGS 6U
/** A surface describes a source or destination for a transfer operation. */
struct pvr_transfer_cmd_surface {
pvr_dev_addr_t dev_addr;
/* Memory address for extra U/V planes. */
pvr_dev_addr_t uv_address[2];
/* Surface width in texels. */
uint32_t width;
/* Surface height in texels. */
uint32_t height;
uint32_t depth;
/* Z position in a 3D tecture. 0.0f <= z_position <= depth. */
float z_position;
/* Stride in texels. */
uint32_t stride;
VkFormat vk_format;
enum pvr_memlayout mem_layout;
uint32_t sample_count;
};
struct pvr_rect_mapping {
VkRect2D src_rect;
VkRect2D dst_rect;
};
/* Describes an Alpha-Transparency configuration - for Transfer Queue Use. */
struct pvr_transfer_alpha {
enum pvr_alpha_type type;
/* Global alpha value. */
uint32_t global;
/* Custom blend op for rgb. */
uint32_t custom_rgb;
/* Custom blend op for alpha. */
uint32_t custom_alpha;
/* Custom global alpha value for alpha output. */
uint32_t global2;
/* Custom multiplication of global and source alpha. */
bool glob_src_mul;
/* Custom zero source alpha transparency stage. */
bool zero_src_a_trans;
/* Enable argb1555 alpha components. */
bool alpha_components;
/* Source alpha value when argb1555 alpha bit is 0. */
uint32_t component0;
/* Source alpha value when argb1555 alpha bit is 1. */
uint32_t component1;
};
struct pvr_transfer_blit {
/* 16 bit rop4 (ie two 8 bit rop3's). */
uint32_t rop_code;
/* Color key mask. */
uint32_t color_mask;
/* Alpha blend. */
struct pvr_transfer_alpha alpha;
VkOffset2D offset;
};
struct pvr_transfer_cmd {
/* Node to link this cmd into the transfer_cmds list in
* pvr_sub_cmd::transfer structure.
*/
struct list_head link;
struct pvr_buffer *src;
struct pvr_buffer *dst;
uint32_t region_count;
VkBufferCopy2 regions[0];
uint32_t flags;
struct pvr_transfer_cmd_surface src;
bool src_present;
union fi clear_color[4];
struct pvr_transfer_cmd_surface dst;
VkRect2D scissor;
uint32_t mapping_count;
struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
/* In the case of a simple 1:1 copy, this setting does not affect the output
* but will affect performance. Use clamp to edge when possible.
*/
/* This is of type enum PVRX(TEXSTATE_ADDRMODE). */
int addr_mode;
/* Source filtering method. */
enum pvr_filter filter;
/* MSAA resolve operation. */
enum pvr_resolve_op resolve_op;
struct pvr_transfer_blit blit;
/* Pointer to cmd buffer this transfer cmd belongs to. This is mainly used
* to link buffer objects allocated during job submission into
* cmd_buffer::bo_list head.
*/
struct pvr_cmd_buffer *cmd_buffer;
};
struct pvr_sub_cmd_gfx {

View File

@@ -361,8 +361,7 @@ static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
return result;
result =
pvr_transfer_job_submit(device,
queue->transfer_ctx,
pvr_transfer_job_submit(queue->transfer_ctx,
sub_cmd,
queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
sync);

View File

@@ -218,7 +218,8 @@ pvr_pack_tex_state(struct pvr_device *device,
if (iview_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
array_layers /= 6;
word1.depth = array_layers - 1;
if (array_layers > 0)
word1.depth = array_layers - 1;
}
word1.texaddr = PVR_DEV_ADDR_OFFSET(info->addr, info->offset);

View File

@@ -0,0 +1,392 @@
/*
* Copyright © 2023 Imagination Technologies Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <vulkan/vulkan_core.h>
#include "hwdef/rogue_hw_utils.h"
#include "pvr_bo.h"
#include "pvr_common.h"
#include "pvr_device_info.h"
#include "pvr_job_transfer.h"
#include "pvr_pds.h"
#include "pvr_private.h"
#include "pvr_transfer_frag_store.h"
#include "pvr_types.h"
#include "pvr_uscgen.h"
#include "util/hash_table.h"
#include "util/macros.h"
#include "util/ralloc.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "vk_log.h"
#define PVR_TRANSFER_BYTE_UNWIND_MAX 16U
struct pvr_transfer_frag_store_entry_data {
pvr_dev_addr_t kick_usc_pds_offset;
struct pvr_bo *kick_usc_pds_upload;
struct pvr_bo *usc_upload;
struct pvr_tq_frag_sh_reg_layout sh_reg_layout;
};
#define to_pvr_entry_data(_entry) \
_Generic((_entry), \
struct hash_entry *: (struct pvr_transfer_frag_store_entry_data *)((_entry)->data), \
const struct hash_entry *: (const struct pvr_transfer_frag_store_entry_data *)((_entry)->data))
VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
struct pvr_transfer_frag_store *store)
{
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
*store = (struct pvr_transfer_frag_store){
.max_multisample = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 1U),
.hash_table = _mesa_hash_table_create_u32_keys(NULL),
};
if (!store->hash_table)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
return VK_SUCCESS;
}
/**
* \brief Returns a key based on shader properties.
*
* Returns a unique key that can be used to uniquely identify a transfer
* fragment shader based on the provided shader properties.
*
* Make sure that the non valid parts of shader_props are memset to 0. Otherwise
* these bits might appear in the key as uninitialized data and might not
* match a key for the same shader.
*/
static uint32_t pvr_transfer_frag_shader_key(
uint32_t max_multisample,
const struct pvr_tq_shader_properties *shader_props)
{
const struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
uint32_t resolve_op_num = max_multisample + PVR_RESOLVE_SAMPLE0;
uint32_t num_layers_bits = util_logbase2_ceil(PVR_TRANSFER_MAX_LAYERS + 1U);
uint32_t layer_float_bits = util_logbase2_ceil(PVR_INT_COORD_SET_FLOATS_NUM);
uint32_t pixel_src_bits = util_logbase2_ceil(PVR_TRANSFER_PBE_PIXEL_SRC_NUM);
uint32_t byte_unwind_bits = util_logbase2_ceil(PVR_TRANSFER_BYTE_UNWIND_MAX);
uint32_t resolve_op_bits = util_logbase2_ceil(resolve_op_num);
uint32_t sample_cnt_bits = util_last_bit(util_logbase2(max_multisample));
uint32_t hash = 0U;
#if defined(DEBUG)
uint32_t max_shift = 0U;
# define shift_hash(hash, num) \
do { \
max_shift += (num); \
assert(max_shift <= 32U); \
\
(hash) <<= (num); \
} while (0U)
#else
# define shift_hash(hash, num) hash <<= (num)
#endif
/* Hash layer info. */
shift_hash(hash, layer_float_bits);
hash |= (uint32_t)shader_props->layer_props.layer_floats;
shift_hash(hash, 1U);
hash |= layer->sample;
shift_hash(hash, 1U);
hash |= (uint32_t) false;
shift_hash(hash, 1U);
hash |= (uint32_t) false;
shift_hash(hash, pixel_src_bits);
hash |= (uint32_t)layer->pbe_format;
shift_hash(hash, resolve_op_bits);
hash |= (uint32_t)layer->resolve_op;
assert(util_is_power_of_two_nonzero(layer->sample_count));
shift_hash(hash, sample_cnt_bits);
hash |= (uint32_t)util_logbase2(layer->sample_count);
shift_hash(hash, 1U);
hash |= (uint32_t)layer->msaa;
shift_hash(hash, byte_unwind_bits);
hash |= layer->byte_unwind;
shift_hash(hash, 1U);
hash |= (uint32_t)layer->linear;
/* End layer info. */
shift_hash(hash, 1U);
hash |= (uint32_t)shader_props->full_rate;
shift_hash(hash, 1U);
hash |= (uint32_t)shader_props->iterated;
shift_hash(hash, 1U);
hash |= (uint32_t)shader_props->pick_component;
shift_hash(hash, num_layers_bits);
/* Just 1 layer. */
hash |= 1;
shift_hash(hash, 3U);
hash |= shader_props->alpha_type;
#undef shift_hash
return hash;
}
#define to_hash_table_key(_key) ((void *)(uintptr_t)(_key))
static VkResult pvr_transfer_frag_store_entry_data_compile(
struct pvr_device *device,
struct pvr_transfer_frag_store_entry_data *const entry_data,
const struct pvr_tq_shader_properties *shader_props,
uint32_t *const num_usc_temps_out)
{
const uint32_t image_desc_offset =
offsetof(struct pvr_combined_image_sampler_descriptor, image) / 4;
const uint32_t sampler_desc_offset =
offsetof(struct pvr_combined_image_sampler_descriptor, sampler) / 4;
const uint32_t cache_line_size =
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout;
uint32_t next_free_sh_reg = 0;
struct util_dynarray shader;
VkResult result;
/* TODO: Allocate all combined image samplers if needed? Otherwise change the
* array to a single descriptor.
*/
sh_reg_layout->combined_image_samplers.offsets[0].image =
next_free_sh_reg + image_desc_offset;
sh_reg_layout->combined_image_samplers.offsets[0].sampler =
next_free_sh_reg + sampler_desc_offset;
sh_reg_layout->combined_image_samplers.count = 1;
next_free_sh_reg += sizeof(struct pvr_combined_image_sampler_descriptor) / 4;
/* TODO: Handle dynamic_const_regs used for PVR_INT_COORD_SET_FLOATS_{4,6}, Z
* position, texel unwind, etc. when compiler adds support for them.
*/
sh_reg_layout->dynamic_consts.offset = next_free_sh_reg;
sh_reg_layout->dynamic_consts.count = 0;
sh_reg_layout->driver_total = next_free_sh_reg;
pvr_uscgen_tq_frag(shader_props,
&entry_data->sh_reg_layout,
num_usc_temps_out,
&shader);
result = pvr_gpu_upload_usc(device,
util_dynarray_begin(&shader),
util_dynarray_num_elements(&shader, uint8_t),
cache_line_size,
&entry_data->usc_upload);
util_dynarray_fini(&shader);
if (result != VK_SUCCESS)
return result;
return VK_SUCCESS;
}
static VkResult pvr_transfer_frag_store_entry_data_create(
struct pvr_device *device,
struct pvr_transfer_frag_store *store,
const struct pvr_tq_shader_properties *shader_props,
const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
{
struct pvr_pds_kickusc_program kick_usc_pds_prog = { 0 };
struct pvr_transfer_frag_store_entry_data *entry_data;
pvr_dev_addr_t dev_addr;
uint32_t num_usc_temps;
VkResult result;
entry_data = ralloc(store->hash_table, __typeof__(*entry_data));
if (!entry_data)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
result = pvr_transfer_frag_store_entry_data_compile(device,
entry_data,
shader_props,
&num_usc_temps);
if (result != VK_SUCCESS)
goto err_free_entry;
dev_addr = entry_data->usc_upload->vma->dev_addr;
dev_addr.addr -= device->heaps.usc_heap->base_addr.addr;
pvr_pds_setup_doutu(&kick_usc_pds_prog.usc_task_control,
dev_addr.addr,
num_usc_temps,
shader_props->full_rate
? PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL)
: PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
false);
pvr_pds_kick_usc(&kick_usc_pds_prog, NULL, 0U, false, PDS_GENERATE_SIZES);
result = pvr_bo_alloc(
device,
device->heaps.pds_heap,
(kick_usc_pds_prog.data_size + kick_usc_pds_prog.code_size) * 4,
16,
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
&entry_data->kick_usc_pds_upload);
if (result != VK_SUCCESS)
goto err_free_usc_upload;
pvr_pds_kick_usc(&kick_usc_pds_prog,
entry_data->kick_usc_pds_upload->bo->map,
0U,
false,
PDS_GENERATE_CODEDATA_SEGMENTS);
dev_addr = entry_data->kick_usc_pds_upload->vma->dev_addr;
dev_addr.addr -= device->heaps.pds_heap->base_addr.addr;
entry_data->kick_usc_pds_offset = dev_addr;
*entry_data_out = entry_data;
return VK_SUCCESS;
err_free_usc_upload:
pvr_bo_free(device, entry_data->usc_upload);
err_free_entry:
ralloc_free(entry_data);
return result;
}
static void inline pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
struct pvr_device *device,
const struct pvr_transfer_frag_store_entry_data *entry_data)
{
pvr_bo_free(device, entry_data->kick_usc_pds_upload);
pvr_bo_free(device, entry_data->usc_upload);
}
static void inline pvr_transfer_frag_store_entry_data_destroy(
struct pvr_device *device,
const struct pvr_transfer_frag_store_entry_data *entry_data)
{
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(device,
entry_data);
/* Casting away the const :( */
ralloc_free((void *)entry_data);
}
static VkResult pvr_transfer_frag_store_get_entry(
struct pvr_device *device,
struct pvr_transfer_frag_store *store,
const struct pvr_tq_shader_properties *shader_props,
const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
{
const uint32_t key =
pvr_transfer_frag_shader_key(store->max_multisample, shader_props);
const struct hash_entry *entry;
VkResult result;
entry = _mesa_hash_table_search(store->hash_table, to_hash_table_key(key));
if (!entry) {
/* Init so that gcc stops complaining. */
const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
result = pvr_transfer_frag_store_entry_data_create(device,
store,
shader_props,
&entry_data);
if (result != VK_SUCCESS)
return result;
assert(entry_data);
entry = _mesa_hash_table_insert(store->hash_table,
to_hash_table_key(key),
(void *)entry_data);
if (!entry) {
pvr_transfer_frag_store_entry_data_destroy(device, entry_data);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
}
*entry_data_out = to_pvr_entry_data(entry);
return VK_SUCCESS;
}
VkResult pvr_transfer_frag_store_get_shader_info(
struct pvr_device *device,
struct pvr_transfer_frag_store *store,
const struct pvr_tq_shader_properties *shader_props,
pvr_dev_addr_t *const pds_dev_addr_out,
const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out)
{
/* Init so that gcc stops complaining. */
const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
VkResult result;
result = pvr_transfer_frag_store_get_entry(device,
store,
shader_props,
&entry_data);
if (result != VK_SUCCESS)
return result;
*pds_dev_addr_out = entry_data->kick_usc_pds_offset;
*reg_layout_out = &entry_data->sh_reg_layout;
return VK_SUCCESS;
}
void pvr_transfer_frag_store_fini(struct pvr_device *device,
struct pvr_transfer_frag_store *store)
{
hash_table_foreach_remove(store->hash_table, entry)
{
/* ralloc_free() in _mesa_hash_table_destroy() will free each entry's
* memory so let's not waste extra time freeing them one by one and
* unliking.
*/
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
device,
to_pvr_entry_data(entry));
}
_mesa_hash_table_destroy(store->hash_table, NULL);
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright © 2023 Imagination Technologies Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef PVR_TRANSFER_FRAG_STORE_H
#define PVR_TRANSFER_FRAG_STORE_H
#include <stdint.h>
#include <vulkan/vulkan_core.h>
#include "pvr_device_info.h"
#include "pvr_uscgen.h"
#include "pvr_types.h"
#include "util/hash_table.h"
struct pvr_device;
struct pvr_transfer_frag_store {
uint32_t max_multisample;
/* Hash table mapping keys, produced by pvr_transfer_frag_shader_key(), to
* pvr_transfer_frag_store_entry_data entries.
*/
struct hash_table *hash_table;
};
VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
struct pvr_transfer_frag_store *store);
void pvr_transfer_frag_store_fini(struct pvr_device *device,
struct pvr_transfer_frag_store *store);
VkResult pvr_transfer_frag_store_get_shader_info(
struct pvr_device *device,
struct pvr_transfer_frag_store *store,
const struct pvr_tq_shader_properties *shader_props,
pvr_dev_addr_t *const pds_dev_addr_out,
const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out);
#endif /* PVR_TRANSFER_FRAG_STORE_H */

View File

@@ -88,4 +88,45 @@ static inline bool vk_format_is_normalized(VkFormat vk_format)
return true;
}
static inline uint32_t
vk_format_get_common_color_channel_count(VkFormat src_format,
VkFormat dst_format)
{
const struct util_format_description *dst_desc =
vk_format_description(dst_format);
const struct util_format_description *src_desc =
vk_format_description(src_format);
uint32_t count = 0;
/* Check if destination format is alpha only and source format has alpha
* channel.
*/
if (util_format_is_alpha(vk_format_to_pipe_format(dst_format))) {
count = 1;
} else if (dst_desc->nr_channels <= src_desc->nr_channels) {
for (uint32_t i = 0; i < dst_desc->nr_channels; i++) {
enum pipe_swizzle swizzle = dst_desc->swizzle[i];
if (swizzle > PIPE_SWIZZLE_W)
continue;
for (uint32_t j = 0; j < src_desc->nr_channels; j++) {
if (src_desc->swizzle[j] == swizzle) {
count++;
break;
}
}
}
} else {
count = dst_desc->nr_channels;
}
return count;
}
static inline bool vk_format_is_alpha(VkFormat format)
{
return util_format_is_alpha(vk_format_to_pipe_format(format));
}
#endif /* VK_FORMAT_H */

View File

@@ -258,6 +258,7 @@ struct pvr_winsys_transfer_regs {
uint32_t event_pixel_pds_code;
uint32_t event_pixel_pds_data;
uint32_t event_pixel_pds_info;
uint32_t frag_screen;
uint32_t isp_aa;
uint32_t isp_bgobjvals;
uint32_t isp_ctl;