anv: Implement VK_KHR_zero_initialize_workgroup_memory

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8708>
This commit is contained in:
Caio Marcelo de Oliveira Filho
2020-06-24 22:15:28 -07:00
committed by Marge Bot
parent a2414ada87
commit d49b0fa72f
4 changed files with 25 additions and 0 deletions

View File

@@ -15,3 +15,4 @@ Sparse memory support on RADV
Rapid packed math (16bit-vectorization) on RADV
VK_KHR_workgroup_memory_explicit_layout on Intel, RADV
DRM format modifiers for AMD.
VK_KHR_zero_initialize_workgroup_memory on Intel

View File

@@ -1451,6 +1451,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
features->shaderZeroInitializeWorkgroupMemory = true;
break;
}
default:
anv_debug_ignored_stype(ext->sType);
break;

View File

@@ -122,6 +122,7 @@ EXTENSIONS = [
Extension('VK_KHR_workgroup_memory_explicit_layout', 1, True),
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
Extension('VK_KHR_zero_initialize_workgroup_memory', 1, True),
Extension('VK_EXT_4444_formats', 1, True),
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
Extension('VK_EXT_buffer_device_address', 2, 'device->has_a64_buffer_access'),

View File

@@ -1748,6 +1748,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
NIR_PASS_V(stage.nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset);
if (stage.nir->info.cs.zero_initialize_shared_memory &&
stage.nir->info.cs.shared_size > 0) {
/* The effective Shared Local Memory size is at least 1024 bytes and
* is always rounded to a power of two, so it is OK to align the size
* used by the shader to chunk_size -- which does simplify the logic.
*/
const unsigned chunk_size = 16;
const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size);
assert(shared_size <=
calculate_gen_slm_size(compiler->devinfo->gen, stage.nir->info.cs.shared_size));
NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
shared_size, chunk_size);
}
NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
stage.num_stats = 1;