anv: Implement VK_KHR_zero_initialize_workgroup_memory
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8708>
This commit is contained in:

committed by
Marge Bot

parent
a2414ada87
commit
d49b0fa72f
@@ -15,3 +15,4 @@ Sparse memory support on RADV
|
||||
Rapid packed math (16bit-vectorization) on RADV
|
||||
VK_KHR_workgroup_memory_explicit_layout on Intel, RADV
|
||||
DRM format modifiers for AMD.
|
||||
VK_KHR_zero_initialize_workgroup_memory on Intel
|
||||
|
@@ -1451,6 +1451,13 @@ void anv_GetPhysicalDeviceFeatures2(
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
|
||||
VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
|
||||
(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
|
||||
features->shaderZeroInitializeWorkgroupMemory = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
anv_debug_ignored_stype(ext->sType);
|
||||
break;
|
||||
|
@@ -122,6 +122,7 @@ EXTENSIONS = [
|
||||
Extension('VK_KHR_workgroup_memory_explicit_layout', 1, True),
|
||||
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
|
||||
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
||||
Extension('VK_KHR_zero_initialize_workgroup_memory', 1, True),
|
||||
Extension('VK_EXT_4444_formats', 1, True),
|
||||
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
||||
Extension('VK_EXT_buffer_device_address', 2, 'device->has_a64_buffer_access'),
|
||||
|
@@ -1748,6 +1748,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
||||
|
||||
NIR_PASS_V(stage.nir, nir_lower_explicit_io,
|
||||
nir_var_mem_shared, nir_address_format_32bit_offset);
|
||||
|
||||
if (stage.nir->info.cs.zero_initialize_shared_memory &&
|
||||
stage.nir->info.cs.shared_size > 0) {
|
||||
/* The effective Shared Local Memory size is at least 1024 bytes and
|
||||
* is always rounded to a power of two, so it is OK to align the size
|
||||
* used by the shader to chunk_size -- which does simplify the logic.
|
||||
*/
|
||||
const unsigned chunk_size = 16;
|
||||
const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size);
|
||||
assert(shared_size <=
|
||||
calculate_gen_slm_size(compiler->devinfo->gen, stage.nir->info.cs.shared_size));
|
||||
|
||||
NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
|
||||
shared_size, chunk_size);
|
||||
}
|
||||
|
||||
NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
|
||||
|
||||
stage.num_stats = 1;
|
||||
|
Reference in New Issue
Block a user