diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 81c7f81a135..b17db63c782 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -15,3 +15,4 @@ Sparse memory support on RADV Rapid packed math (16bit-vectorization) on RADV VK_KHR_workgroup_memory_explicit_layout on Intel, RADV DRM format modifiers for AMD. +VK_KHR_zero_initialize_workgroup_memory on Intel diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 037641ae690..356d5f8ed55 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1451,6 +1451,13 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: { + VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features = + (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext; + features->shaderZeroInitializeWorkgroupMemory = true; + break; + } + default: anv_debug_ignored_stype(ext->sType); break; diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index f4b1b6346a9..8ddb8f98686 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -122,6 +122,7 @@ EXTENSIONS = [ Extension('VK_KHR_workgroup_memory_explicit_layout', 1, True), Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'), Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), + Extension('VK_KHR_zero_initialize_workgroup_memory', 1, True), Extension('VK_EXT_4444_formats', 1, True), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), Extension('VK_EXT_buffer_device_address', 2, 'device->has_a64_buffer_access'), diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 78c8bf033f6..49e67b3002d 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1748,6 +1748,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, NIR_PASS_V(stage.nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); + + if (stage.nir->info.cs.zero_initialize_shared_memory && + stage.nir->info.cs.shared_size > 0) { + /* The effective Shared Local Memory size is at least 1024 bytes and + * is always rounded to a power of two, so it is OK to align the size + * used by the shader to chunk_size -- which does simplify the logic. + */ + const unsigned chunk_size = 16; + const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size); + assert(shared_size <= + calculate_gen_slm_size(compiler->devinfo->gen, stage.nir->info.cs.shared_size)); + + NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory, + shared_size, chunk_size); + } + NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics); stage.num_stats = 1;