pvr: Minor refactor of pvr_device.c
Moving a few functions further up here to prepare for the next commit; should make the diffs a lot nicer. No (intentional) functional changes. Signed-off-by: Matt Coster <matt.coster@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25326>
This commit is contained in:
@@ -247,6 +247,409 @@ static void pvr_physical_device_get_supported_features(
|
||||
};
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
|
||||
{
|
||||
struct mesa_sha1 sha1_ctx;
|
||||
unsigned build_id_len;
|
||||
uint8_t sha1[20];
|
||||
uint64_t bvnc;
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
|
||||
if (!note) {
|
||||
return vk_errorf(pdevice,
|
||||
VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to find build-id");
|
||||
}
|
||||
|
||||
build_id_len = build_id_length(note);
|
||||
if (build_id_len < 20) {
|
||||
return vk_errorf(pdevice,
|
||||
VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Build-id too short. It needs to be a SHA");
|
||||
}
|
||||
|
||||
bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
|
||||
|
||||
_mesa_sha1_init(&sha1_ctx);
|
||||
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
|
||||
_mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
|
||||
_mesa_sha1_final(&sha1_ctx, sha1);
|
||||
memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct pvr_descriptor_limits {
|
||||
uint32_t max_per_stage_resources;
|
||||
uint32_t max_per_stage_samplers;
|
||||
uint32_t max_per_stage_uniform_buffers;
|
||||
uint32_t max_per_stage_storage_buffers;
|
||||
uint32_t max_per_stage_sampled_images;
|
||||
uint32_t max_per_stage_storage_images;
|
||||
uint32_t max_per_stage_input_attachments;
|
||||
};
|
||||
|
||||
static const struct pvr_descriptor_limits *
|
||||
pvr_get_physical_device_descriptor_limits(
|
||||
const struct pvr_device_info *dev_info,
|
||||
const struct pvr_device_runtime_info *dev_runtime_info)
|
||||
{
|
||||
enum pvr_descriptor_cs_level {
|
||||
/* clang-format off */
|
||||
CS4096, /* 6XT and some XE cores with large CS. */
|
||||
CS2560, /* Mid range Rogue XE cores. */
|
||||
CS2048, /* Low end Rogue XE cores. */
|
||||
CS1536, /* Ultra-low-end 9XEP. */
|
||||
CS680, /* lower limits for older devices. */
|
||||
CS408, /* 7XE. */
|
||||
/* clang-format on */
|
||||
};
|
||||
|
||||
static const struct pvr_descriptor_limits descriptor_limits[] = {
|
||||
[CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
|
||||
[CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, },
|
||||
[CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, },
|
||||
[CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, },
|
||||
[CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, },
|
||||
[CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, },
|
||||
};
|
||||
|
||||
const uint32_t common_size =
|
||||
pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
|
||||
dev_runtime_info,
|
||||
UINT32_MAX,
|
||||
1);
|
||||
enum pvr_descriptor_cs_level cs_level;
|
||||
|
||||
if (common_size >= 2048) {
|
||||
cs_level = CS2048;
|
||||
} else if (common_size >= 1526) {
|
||||
cs_level = CS1536;
|
||||
} else if (common_size >= 680) {
|
||||
cs_level = CS680;
|
||||
} else if (common_size >= 408) {
|
||||
cs_level = CS408;
|
||||
} else {
|
||||
mesa_loge("This core appears to have a very limited amount of shared "
|
||||
"register space and may not meet the Vulkan spec limits.");
|
||||
abort();
|
||||
}
|
||||
|
||||
return &descriptor_limits[cs_level];
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_get_physical_device_properties_1_1(struct pvr_physical_device *pdevice,
|
||||
VkPhysicalDeviceVulkan11Properties *p)
|
||||
{
|
||||
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_get_physical_device_properties_1_2(struct pvr_physical_device *pdevice,
|
||||
VkPhysicalDeviceVulkan12Properties *p)
|
||||
{
|
||||
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
|
||||
|
||||
/* VK_KHR_driver_properties */
|
||||
p->driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA;
|
||||
memset(p->driverName, 0, sizeof(p->driverName));
|
||||
snprintf(p->driverName,
|
||||
VK_MAX_DRIVER_NAME_SIZE,
|
||||
"Imagination open-source Mesa driver");
|
||||
memset(p->driverInfo, 0, sizeof(p->driverInfo));
|
||||
snprintf(p->driverInfo,
|
||||
VK_MAX_DRIVER_INFO_SIZE,
|
||||
("Mesa " PACKAGE_VERSION MESA_GIT_SHA1));
|
||||
p->conformanceVersion = (VkConformanceVersion){
|
||||
.major = 1,
|
||||
.minor = 3,
|
||||
.subminor = 4,
|
||||
.patch = 1,
|
||||
};
|
||||
|
||||
/* VK_KHR_timeline_semaphore */
|
||||
p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_get_physical_device_properties_1_3(struct pvr_physical_device *pdevice,
|
||||
VkPhysicalDeviceVulkan13Properties *p)
|
||||
{
|
||||
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
|
||||
}
|
||||
|
||||
void pvr_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties *pProperties)
|
||||
{
|
||||
PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
|
||||
const struct pvr_device_info *const dev_info = &pdevice->dev_info;
|
||||
|
||||
const struct pvr_descriptor_limits *descriptor_limits =
|
||||
pvr_get_physical_device_descriptor_limits(dev_info,
|
||||
&pdevice->dev_runtime_info);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t max_multisample =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t uvs_pba_entries =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t num_user_clip_planes =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
|
||||
|
||||
const uint32_t sub_pixel_precision =
|
||||
PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
|
||||
|
||||
const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
|
||||
|
||||
const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
|
||||
|
||||
const uint32_t max_user_vertex_components =
|
||||
((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
|
||||
|
||||
/* The workgroup invocations are limited by the case where we have a compute
|
||||
* barrier - each slot has a fixed number of invocations, the whole workgroup
|
||||
* may need to span multiple slots. As each slot will WAIT at the barrier
|
||||
* until the last invocation completes, all have to be schedulable at the
|
||||
* same time.
|
||||
*
|
||||
* Typically all Rogue cores have 16 slots. Some of the smallest cores are
|
||||
* reduced to 14.
|
||||
*
|
||||
* The compute barrier slot exhaustion scenario can be tested with:
|
||||
* dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
|
||||
* .atomicwrite*guard*comp
|
||||
*/
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t max_instances_per_pds_task =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
|
||||
|
||||
const uint32_t max_compute_work_group_invocations =
|
||||
(usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
|
||||
|
||||
VkPhysicalDeviceLimits limits = {
|
||||
.maxImageDimension1D = max_render_size,
|
||||
.maxImageDimension2D = max_render_size,
|
||||
.maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
|
||||
.maxImageDimensionCube = max_render_size,
|
||||
.maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
|
||||
.maxTexelBufferElements = 64U * 1024U,
|
||||
.maxUniformBufferRange = 128U * 1024U * 1024U,
|
||||
.maxStorageBufferRange = 128U * 1024U * 1024U,
|
||||
.maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
|
||||
.maxMemoryAllocationCount = UINT32_MAX,
|
||||
.maxSamplerAllocationCount = UINT32_MAX,
|
||||
.bufferImageGranularity = 1U,
|
||||
.sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
|
||||
|
||||
/* Maximum number of descriptor sets that can be bound at the same time.
|
||||
*/
|
||||
.maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
|
||||
|
||||
.maxPerStageResources = descriptor_limits->max_per_stage_resources,
|
||||
.maxPerStageDescriptorSamplers =
|
||||
descriptor_limits->max_per_stage_samplers,
|
||||
.maxPerStageDescriptorUniformBuffers =
|
||||
descriptor_limits->max_per_stage_uniform_buffers,
|
||||
.maxPerStageDescriptorStorageBuffers =
|
||||
descriptor_limits->max_per_stage_storage_buffers,
|
||||
.maxPerStageDescriptorSampledImages =
|
||||
descriptor_limits->max_per_stage_sampled_images,
|
||||
.maxPerStageDescriptorStorageImages =
|
||||
descriptor_limits->max_per_stage_storage_images,
|
||||
.maxPerStageDescriptorInputAttachments =
|
||||
descriptor_limits->max_per_stage_input_attachments,
|
||||
|
||||
.maxDescriptorSetSamplers = 256U,
|
||||
.maxDescriptorSetUniformBuffers = 256U,
|
||||
.maxDescriptorSetUniformBuffersDynamic =
|
||||
PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
|
||||
.maxDescriptorSetStorageBuffers = 256U,
|
||||
.maxDescriptorSetStorageBuffersDynamic =
|
||||
PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
|
||||
.maxDescriptorSetSampledImages = 256U,
|
||||
.maxDescriptorSetStorageImages = 256U,
|
||||
.maxDescriptorSetInputAttachments = 256U,
|
||||
|
||||
/* Vertex Shader Limits */
|
||||
.maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
|
||||
.maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
|
||||
.maxVertexInputAttributeOffset = 0xFFFF,
|
||||
.maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
|
||||
.maxVertexOutputComponents = max_user_vertex_components,
|
||||
|
||||
/* Tessellation Limits */
|
||||
.maxTessellationGenerationLevel = 0,
|
||||
.maxTessellationPatchSize = 0,
|
||||
.maxTessellationControlPerVertexInputComponents = 0,
|
||||
.maxTessellationControlPerVertexOutputComponents = 0,
|
||||
.maxTessellationControlPerPatchOutputComponents = 0,
|
||||
.maxTessellationControlTotalOutputComponents = 0,
|
||||
.maxTessellationEvaluationInputComponents = 0,
|
||||
.maxTessellationEvaluationOutputComponents = 0,
|
||||
|
||||
/* Geometry Shader Limits */
|
||||
.maxGeometryShaderInvocations = 0,
|
||||
.maxGeometryInputComponents = 0,
|
||||
.maxGeometryOutputComponents = 0,
|
||||
.maxGeometryOutputVertices = 0,
|
||||
.maxGeometryTotalOutputComponents = 0,
|
||||
|
||||
/* Fragment Shader Limits */
|
||||
.maxFragmentInputComponents = max_user_vertex_components,
|
||||
.maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
|
||||
.maxFragmentDualSrcAttachments = 0,
|
||||
.maxFragmentCombinedOutputResources =
|
||||
descriptor_limits->max_per_stage_storage_buffers +
|
||||
descriptor_limits->max_per_stage_storage_images +
|
||||
PVR_MAX_COLOR_ATTACHMENTS,
|
||||
|
||||
/* Compute Shader Limits */
|
||||
.maxComputeSharedMemorySize = 16U * 1024U,
|
||||
.maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
|
||||
.maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
|
||||
.maxComputeWorkGroupSize = { max_compute_work_group_invocations,
|
||||
max_compute_work_group_invocations,
|
||||
64U },
|
||||
|
||||
/* Rasterization Limits */
|
||||
.subPixelPrecisionBits = sub_pixel_precision,
|
||||
.subTexelPrecisionBits = 8U,
|
||||
.mipmapPrecisionBits = 8U,
|
||||
|
||||
.maxDrawIndexedIndexValue = UINT32_MAX,
|
||||
.maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
|
||||
.maxSamplerLodBias = 16.0f,
|
||||
.maxSamplerAnisotropy = 1.0f,
|
||||
.maxViewports = PVR_MAX_VIEWPORTS,
|
||||
|
||||
.maxViewportDimensions[0] = max_render_size,
|
||||
.maxViewportDimensions[1] = max_render_size,
|
||||
.viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
|
||||
.viewportBoundsRange[1] = 2U * max_render_size,
|
||||
|
||||
.viewportSubPixelBits = 0,
|
||||
.minMemoryMapAlignment = 64U,
|
||||
.minTexelBufferOffsetAlignment = 16U,
|
||||
.minUniformBufferOffsetAlignment = 4U,
|
||||
.minStorageBufferOffsetAlignment = 4U,
|
||||
|
||||
.minTexelOffset = -8,
|
||||
.maxTexelOffset = 7U,
|
||||
.minTexelGatherOffset = -8,
|
||||
.maxTexelGatherOffset = 7,
|
||||
.minInterpolationOffset = -0.5,
|
||||
.maxInterpolationOffset = 0.5,
|
||||
.subPixelInterpolationOffsetBits = 4U,
|
||||
|
||||
.maxFramebufferWidth = max_render_size,
|
||||
.maxFramebufferHeight = max_render_size,
|
||||
.maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
|
||||
|
||||
.framebufferColorSampleCounts = max_sample_bits,
|
||||
.framebufferDepthSampleCounts = max_sample_bits,
|
||||
.framebufferStencilSampleCounts = max_sample_bits,
|
||||
.framebufferNoAttachmentsSampleCounts = max_sample_bits,
|
||||
.maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
|
||||
.sampledImageColorSampleCounts = max_sample_bits,
|
||||
.sampledImageIntegerSampleCounts = max_sample_bits,
|
||||
.sampledImageDepthSampleCounts = max_sample_bits,
|
||||
.sampledImageStencilSampleCounts = max_sample_bits,
|
||||
.storageImageSampleCounts = max_sample_bits,
|
||||
.maxSampleMaskWords = 1U,
|
||||
.timestampComputeAndGraphics = false,
|
||||
.timestampPeriod = 0.0f,
|
||||
.maxClipDistances = num_user_clip_planes,
|
||||
.maxCullDistances = num_user_clip_planes,
|
||||
.maxCombinedClipAndCullDistances = num_user_clip_planes,
|
||||
.discreteQueuePriorities = 2U,
|
||||
.pointSizeRange[0] = 1.0f,
|
||||
.pointSizeRange[1] = 511.0f,
|
||||
.pointSizeGranularity = 0.0625f,
|
||||
.lineWidthRange[0] = 1.0f / 16.0f,
|
||||
.lineWidthRange[1] = 16.0f,
|
||||
.lineWidthGranularity = 1.0f / 16.0f,
|
||||
.strictLines = false,
|
||||
.standardSampleLocations = true,
|
||||
.optimalBufferCopyOffsetAlignment = 4U,
|
||||
.optimalBufferCopyRowPitchAlignment = 4U,
|
||||
.nonCoherentAtomSize = 1U,
|
||||
};
|
||||
|
||||
*pProperties = (VkPhysicalDeviceProperties){
|
||||
.apiVersion = PVR_API_VERSION,
|
||||
.driverVersion = vk_get_driver_version(),
|
||||
.vendorID = VK_VENDOR_ID_IMAGINATION,
|
||||
.deviceID = dev_info->ident.device_id,
|
||||
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
|
||||
.limits = limits,
|
||||
.sparseProperties = { 0 },
|
||||
};
|
||||
|
||||
snprintf(pProperties->deviceName,
|
||||
sizeof(pProperties->deviceName),
|
||||
"%s",
|
||||
pdevice->name);
|
||||
|
||||
memcpy(pProperties->pipelineCacheUUID,
|
||||
pdevice->pipeline_cache_uuid,
|
||||
VK_UUID_SIZE);
|
||||
}
|
||||
|
||||
void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties2 *pProperties)
|
||||
{
|
||||
PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
|
||||
|
||||
VkPhysicalDeviceVulkan11Properties core_1_1 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceVulkan12Properties core_1_2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceVulkan13Properties core_1_3 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
|
||||
};
|
||||
|
||||
pvr_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
|
||||
pvr_get_physical_device_properties_1_1(pdevice, &core_1_1);
|
||||
pvr_get_physical_device_properties_1_2(pdevice, &core_1_2);
|
||||
pvr_get_physical_device_properties_1_3(pdevice, &core_1_3);
|
||||
|
||||
vk_foreach_struct (ext, pProperties->pNext) {
|
||||
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
|
||||
continue;
|
||||
|
||||
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
|
||||
continue;
|
||||
|
||||
if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
|
||||
continue;
|
||||
|
||||
switch (ext->sType) {
|
||||
default: {
|
||||
pvr_debug_ignored_stype(ext->sType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
|
||||
{
|
||||
*pApiVersion = PVR_API_VERSION;
|
||||
@@ -309,40 +712,6 @@ void pvr_DestroyInstance(VkInstance _instance,
|
||||
vk_free(&instance->vk.alloc, instance);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
|
||||
{
|
||||
struct mesa_sha1 sha1_ctx;
|
||||
unsigned build_id_len;
|
||||
uint8_t sha1[20];
|
||||
uint64_t bvnc;
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
|
||||
if (!note) {
|
||||
return vk_errorf(pdevice,
|
||||
VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to find build-id");
|
||||
}
|
||||
|
||||
build_id_len = build_id_length(note);
|
||||
if (build_id_len < 20) {
|
||||
return vk_errorf(pdevice,
|
||||
VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Build-id too short. It needs to be a SHA");
|
||||
}
|
||||
|
||||
bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
|
||||
|
||||
_mesa_sha1_init(&sha1_ctx);
|
||||
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
|
||||
_mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
|
||||
_mesa_sha1_final(&sha1_ctx, sha1);
|
||||
memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static uint64_t pvr_compute_heap_size(void)
|
||||
{
|
||||
/* Query the total ram from the system */
|
||||
@@ -857,375 +1226,6 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
|
||||
return MIN2(num_tile_in_flight, max_tiles_in_flight);
|
||||
}
|
||||
|
||||
struct pvr_descriptor_limits {
|
||||
uint32_t max_per_stage_resources;
|
||||
uint32_t max_per_stage_samplers;
|
||||
uint32_t max_per_stage_uniform_buffers;
|
||||
uint32_t max_per_stage_storage_buffers;
|
||||
uint32_t max_per_stage_sampled_images;
|
||||
uint32_t max_per_stage_storage_images;
|
||||
uint32_t max_per_stage_input_attachments;
|
||||
};
|
||||
|
||||
static const struct pvr_descriptor_limits *
|
||||
pvr_get_physical_device_descriptor_limits(
|
||||
const struct pvr_device_info *dev_info,
|
||||
const struct pvr_device_runtime_info *dev_runtime_info)
|
||||
{
|
||||
enum pvr_descriptor_cs_level {
|
||||
/* clang-format off */
|
||||
CS4096, /* 6XT and some XE cores with large CS. */
|
||||
CS2560, /* Mid range Rogue XE cores. */
|
||||
CS2048, /* Low end Rogue XE cores. */
|
||||
CS1536, /* Ultra-low-end 9XEP. */
|
||||
CS680, /* lower limits for older devices. */
|
||||
CS408, /* 7XE. */
|
||||
/* clang-format on */
|
||||
};
|
||||
|
||||
static const struct pvr_descriptor_limits descriptor_limits[] = {
|
||||
[CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
|
||||
[CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, },
|
||||
[CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, },
|
||||
[CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, },
|
||||
[CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, },
|
||||
[CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, },
|
||||
};
|
||||
|
||||
const uint32_t common_size =
|
||||
pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
|
||||
dev_runtime_info,
|
||||
UINT32_MAX,
|
||||
1);
|
||||
enum pvr_descriptor_cs_level cs_level;
|
||||
|
||||
if (common_size >= 2048) {
|
||||
cs_level = CS2048;
|
||||
} else if (common_size >= 1526) {
|
||||
cs_level = CS1536;
|
||||
} else if (common_size >= 680) {
|
||||
cs_level = CS680;
|
||||
} else if (common_size >= 408) {
|
||||
cs_level = CS408;
|
||||
} else {
|
||||
mesa_loge("This core appears to have a very limited amount of shared "
|
||||
"register space and may not meet the Vulkan spec limits.");
|
||||
abort();
|
||||
}
|
||||
|
||||
return &descriptor_limits[cs_level];
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_get_physical_device_properties_1_1(struct pvr_physical_device *pdevice,
|
||||
VkPhysicalDeviceVulkan11Properties *p)
|
||||
{
|
||||
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_get_physical_device_properties_1_2(struct pvr_physical_device *pdevice,
|
||||
VkPhysicalDeviceVulkan12Properties *p)
|
||||
{
|
||||
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
|
||||
|
||||
/* VK_KHR_driver_properties */
|
||||
p->driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA;
|
||||
memset(p->driverName, 0, sizeof(p->driverName));
|
||||
snprintf(p->driverName,
|
||||
VK_MAX_DRIVER_NAME_SIZE,
|
||||
"Imagination open-source Mesa driver");
|
||||
memset(p->driverInfo, 0, sizeof(p->driverInfo));
|
||||
snprintf(p->driverInfo,
|
||||
VK_MAX_DRIVER_INFO_SIZE,
|
||||
("Mesa " PACKAGE_VERSION MESA_GIT_SHA1));
|
||||
p->conformanceVersion = (VkConformanceVersion){
|
||||
.major = 1,
|
||||
.minor = 3,
|
||||
.subminor = 4,
|
||||
.patch = 1,
|
||||
};
|
||||
|
||||
/* VK_KHR_timeline_semaphore */
|
||||
p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_get_physical_device_properties_1_3(struct pvr_physical_device *pdevice,
|
||||
VkPhysicalDeviceVulkan13Properties *p)
|
||||
{
|
||||
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
|
||||
}
|
||||
|
||||
void pvr_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties *pProperties)
|
||||
{
|
||||
PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
|
||||
const struct pvr_device_info *const dev_info = &pdevice->dev_info;
|
||||
|
||||
const struct pvr_descriptor_limits *descriptor_limits =
|
||||
pvr_get_physical_device_descriptor_limits(dev_info,
|
||||
&pdevice->dev_runtime_info);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t max_multisample =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t uvs_pba_entries =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t num_user_clip_planes =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
|
||||
|
||||
const uint32_t sub_pixel_precision =
|
||||
PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
|
||||
|
||||
const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
|
||||
|
||||
const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
|
||||
|
||||
const uint32_t max_user_vertex_components =
|
||||
((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
|
||||
|
||||
/* The workgroup invocations are limited by the case where we have a compute
|
||||
* barrier - each slot has a fixed number of invocations, the whole workgroup
|
||||
* may need to span multiple slots. As each slot will WAIT at the barrier
|
||||
* until the last invocation completes, all have to be schedulable at the
|
||||
* same time.
|
||||
*
|
||||
* Typically all Rogue cores have 16 slots. Some of the smallest cores are
|
||||
* reduced to 14.
|
||||
*
|
||||
* The compute barrier slot exhaustion scenario can be tested with:
|
||||
* dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
|
||||
* .atomicwrite*guard*comp
|
||||
*/
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
|
||||
|
||||
/* Default value based on the minimum value found in all existing cores. */
|
||||
const uint32_t max_instances_per_pds_task =
|
||||
PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
|
||||
|
||||
const uint32_t max_compute_work_group_invocations =
|
||||
(usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
|
||||
|
||||
VkPhysicalDeviceLimits limits = {
|
||||
.maxImageDimension1D = max_render_size,
|
||||
.maxImageDimension2D = max_render_size,
|
||||
.maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
|
||||
.maxImageDimensionCube = max_render_size,
|
||||
.maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
|
||||
.maxTexelBufferElements = 64U * 1024U,
|
||||
.maxUniformBufferRange = 128U * 1024U * 1024U,
|
||||
.maxStorageBufferRange = 128U * 1024U * 1024U,
|
||||
.maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
|
||||
.maxMemoryAllocationCount = UINT32_MAX,
|
||||
.maxSamplerAllocationCount = UINT32_MAX,
|
||||
.bufferImageGranularity = 1U,
|
||||
.sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
|
||||
|
||||
/* Maximum number of descriptor sets that can be bound at the same time.
|
||||
*/
|
||||
.maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
|
||||
|
||||
.maxPerStageResources = descriptor_limits->max_per_stage_resources,
|
||||
.maxPerStageDescriptorSamplers =
|
||||
descriptor_limits->max_per_stage_samplers,
|
||||
.maxPerStageDescriptorUniformBuffers =
|
||||
descriptor_limits->max_per_stage_uniform_buffers,
|
||||
.maxPerStageDescriptorStorageBuffers =
|
||||
descriptor_limits->max_per_stage_storage_buffers,
|
||||
.maxPerStageDescriptorSampledImages =
|
||||
descriptor_limits->max_per_stage_sampled_images,
|
||||
.maxPerStageDescriptorStorageImages =
|
||||
descriptor_limits->max_per_stage_storage_images,
|
||||
.maxPerStageDescriptorInputAttachments =
|
||||
descriptor_limits->max_per_stage_input_attachments,
|
||||
|
||||
.maxDescriptorSetSamplers = 256U,
|
||||
.maxDescriptorSetUniformBuffers = 256U,
|
||||
.maxDescriptorSetUniformBuffersDynamic =
|
||||
PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
|
||||
.maxDescriptorSetStorageBuffers = 256U,
|
||||
.maxDescriptorSetStorageBuffersDynamic =
|
||||
PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
|
||||
.maxDescriptorSetSampledImages = 256U,
|
||||
.maxDescriptorSetStorageImages = 256U,
|
||||
.maxDescriptorSetInputAttachments = 256U,
|
||||
|
||||
/* Vertex Shader Limits */
|
||||
.maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
|
||||
.maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
|
||||
.maxVertexInputAttributeOffset = 0xFFFF,
|
||||
.maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
|
||||
.maxVertexOutputComponents = max_user_vertex_components,
|
||||
|
||||
/* Tessellation Limits */
|
||||
.maxTessellationGenerationLevel = 0,
|
||||
.maxTessellationPatchSize = 0,
|
||||
.maxTessellationControlPerVertexInputComponents = 0,
|
||||
.maxTessellationControlPerVertexOutputComponents = 0,
|
||||
.maxTessellationControlPerPatchOutputComponents = 0,
|
||||
.maxTessellationControlTotalOutputComponents = 0,
|
||||
.maxTessellationEvaluationInputComponents = 0,
|
||||
.maxTessellationEvaluationOutputComponents = 0,
|
||||
|
||||
/* Geometry Shader Limits */
|
||||
.maxGeometryShaderInvocations = 0,
|
||||
.maxGeometryInputComponents = 0,
|
||||
.maxGeometryOutputComponents = 0,
|
||||
.maxGeometryOutputVertices = 0,
|
||||
.maxGeometryTotalOutputComponents = 0,
|
||||
|
||||
/* Fragment Shader Limits */
|
||||
.maxFragmentInputComponents = max_user_vertex_components,
|
||||
.maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
|
||||
.maxFragmentDualSrcAttachments = 0,
|
||||
.maxFragmentCombinedOutputResources =
|
||||
descriptor_limits->max_per_stage_storage_buffers +
|
||||
descriptor_limits->max_per_stage_storage_images +
|
||||
PVR_MAX_COLOR_ATTACHMENTS,
|
||||
|
||||
/* Compute Shader Limits */
|
||||
.maxComputeSharedMemorySize = 16U * 1024U,
|
||||
.maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
|
||||
.maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
|
||||
.maxComputeWorkGroupSize = { max_compute_work_group_invocations,
|
||||
max_compute_work_group_invocations,
|
||||
64U },
|
||||
|
||||
/* Rasterization Limits */
|
||||
.subPixelPrecisionBits = sub_pixel_precision,
|
||||
.subTexelPrecisionBits = 8U,
|
||||
.mipmapPrecisionBits = 8U,
|
||||
|
||||
.maxDrawIndexedIndexValue = UINT32_MAX,
|
||||
.maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
|
||||
.maxSamplerLodBias = 16.0f,
|
||||
.maxSamplerAnisotropy = 1.0f,
|
||||
.maxViewports = PVR_MAX_VIEWPORTS,
|
||||
|
||||
.maxViewportDimensions[0] = max_render_size,
|
||||
.maxViewportDimensions[1] = max_render_size,
|
||||
.viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
|
||||
.viewportBoundsRange[1] = 2U * max_render_size,
|
||||
|
||||
.viewportSubPixelBits = 0,
|
||||
.minMemoryMapAlignment = 64U,
|
||||
.minTexelBufferOffsetAlignment = 16U,
|
||||
.minUniformBufferOffsetAlignment = 4U,
|
||||
.minStorageBufferOffsetAlignment = 4U,
|
||||
|
||||
.minTexelOffset = -8,
|
||||
.maxTexelOffset = 7U,
|
||||
.minTexelGatherOffset = -8,
|
||||
.maxTexelGatherOffset = 7,
|
||||
.minInterpolationOffset = -0.5,
|
||||
.maxInterpolationOffset = 0.5,
|
||||
.subPixelInterpolationOffsetBits = 4U,
|
||||
|
||||
.maxFramebufferWidth = max_render_size,
|
||||
.maxFramebufferHeight = max_render_size,
|
||||
.maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
|
||||
|
||||
.framebufferColorSampleCounts = max_sample_bits,
|
||||
.framebufferDepthSampleCounts = max_sample_bits,
|
||||
.framebufferStencilSampleCounts = max_sample_bits,
|
||||
.framebufferNoAttachmentsSampleCounts = max_sample_bits,
|
||||
.maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
|
||||
.sampledImageColorSampleCounts = max_sample_bits,
|
||||
.sampledImageIntegerSampleCounts = max_sample_bits,
|
||||
.sampledImageDepthSampleCounts = max_sample_bits,
|
||||
.sampledImageStencilSampleCounts = max_sample_bits,
|
||||
.storageImageSampleCounts = max_sample_bits,
|
||||
.maxSampleMaskWords = 1U,
|
||||
.timestampComputeAndGraphics = false,
|
||||
.timestampPeriod = 0.0f,
|
||||
.maxClipDistances = num_user_clip_planes,
|
||||
.maxCullDistances = num_user_clip_planes,
|
||||
.maxCombinedClipAndCullDistances = num_user_clip_planes,
|
||||
.discreteQueuePriorities = 2U,
|
||||
.pointSizeRange[0] = 1.0f,
|
||||
.pointSizeRange[1] = 511.0f,
|
||||
.pointSizeGranularity = 0.0625f,
|
||||
.lineWidthRange[0] = 1.0f / 16.0f,
|
||||
.lineWidthRange[1] = 16.0f,
|
||||
.lineWidthGranularity = 1.0f / 16.0f,
|
||||
.strictLines = false,
|
||||
.standardSampleLocations = true,
|
||||
.optimalBufferCopyOffsetAlignment = 4U,
|
||||
.optimalBufferCopyRowPitchAlignment = 4U,
|
||||
.nonCoherentAtomSize = 1U,
|
||||
};
|
||||
|
||||
*pProperties = (VkPhysicalDeviceProperties){
|
||||
.apiVersion = PVR_API_VERSION,
|
||||
.driverVersion = vk_get_driver_version(),
|
||||
.vendorID = VK_VENDOR_ID_IMAGINATION,
|
||||
.deviceID = dev_info->ident.device_id,
|
||||
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
|
||||
.limits = limits,
|
||||
.sparseProperties = { 0 },
|
||||
};
|
||||
|
||||
snprintf(pProperties->deviceName,
|
||||
sizeof(pProperties->deviceName),
|
||||
"%s",
|
||||
pdevice->name);
|
||||
|
||||
memcpy(pProperties->pipelineCacheUUID,
|
||||
pdevice->pipeline_cache_uuid,
|
||||
VK_UUID_SIZE);
|
||||
}
|
||||
|
||||
void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties2 *pProperties)
|
||||
{
|
||||
PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
|
||||
|
||||
VkPhysicalDeviceVulkan11Properties core_1_1 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceVulkan12Properties core_1_2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceVulkan13Properties core_1_3 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
|
||||
};
|
||||
|
||||
pvr_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
|
||||
pvr_get_physical_device_properties_1_1(pdevice, &core_1_1);
|
||||
pvr_get_physical_device_properties_1_2(pdevice, &core_1_2);
|
||||
pvr_get_physical_device_properties_1_3(pdevice, &core_1_3);
|
||||
|
||||
vk_foreach_struct (ext, pProperties->pNext) {
|
||||
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
|
||||
continue;
|
||||
|
||||
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
|
||||
continue;
|
||||
|
||||
if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
|
||||
continue;
|
||||
|
||||
switch (ext->sType) {
|
||||
default: {
|
||||
pvr_debug_ignored_stype(ext->sType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const static VkQueueFamilyProperties pvr_queue_family_properties = {
|
||||
.queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
|
||||
VK_QUEUE_TRANSFER_BIT,
|
||||
|
Reference in New Issue
Block a user