anv: Implement VK_KHR_zero_initialize_workgroup_memory
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8708>
This commit is contained in:

committed by
Marge Bot

parent
a2414ada87
commit
d49b0fa72f
@@ -15,3 +15,4 @@ Sparse memory support on RADV
|
|||||||
Rapid packed math (16bit-vectorization) on RADV
|
Rapid packed math (16bit-vectorization) on RADV
|
||||||
VK_KHR_workgroup_memory_explicit_layout on Intel, RADV
|
VK_KHR_workgroup_memory_explicit_layout on Intel, RADV
|
||||||
DRM format modifiers for AMD.
|
DRM format modifiers for AMD.
|
||||||
|
VK_KHR_zero_initialize_workgroup_memory on Intel
|
||||||
|
@@ -1451,6 +1451,13 @@ void anv_GetPhysicalDeviceFeatures2(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
|
||||||
|
VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
|
||||||
|
(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
|
||||||
|
features->shaderZeroInitializeWorkgroupMemory = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
anv_debug_ignored_stype(ext->sType);
|
anv_debug_ignored_stype(ext->sType);
|
||||||
break;
|
break;
|
||||||
|
@@ -122,6 +122,7 @@ EXTENSIONS = [
|
|||||||
Extension('VK_KHR_workgroup_memory_explicit_layout', 1, True),
|
Extension('VK_KHR_workgroup_memory_explicit_layout', 1, True),
|
||||||
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
|
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
|
||||||
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
||||||
|
Extension('VK_KHR_zero_initialize_workgroup_memory', 1, True),
|
||||||
Extension('VK_EXT_4444_formats', 1, True),
|
Extension('VK_EXT_4444_formats', 1, True),
|
||||||
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
||||||
Extension('VK_EXT_buffer_device_address', 2, 'device->has_a64_buffer_access'),
|
Extension('VK_EXT_buffer_device_address', 2, 'device->has_a64_buffer_access'),
|
||||||
|
@@ -1748,6 +1748,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
|
|
||||||
NIR_PASS_V(stage.nir, nir_lower_explicit_io,
|
NIR_PASS_V(stage.nir, nir_lower_explicit_io,
|
||||||
nir_var_mem_shared, nir_address_format_32bit_offset);
|
nir_var_mem_shared, nir_address_format_32bit_offset);
|
||||||
|
|
||||||
|
if (stage.nir->info.cs.zero_initialize_shared_memory &&
|
||||||
|
stage.nir->info.cs.shared_size > 0) {
|
||||||
|
/* The effective Shared Local Memory size is at least 1024 bytes and
|
||||||
|
* is always rounded to a power of two, so it is OK to align the size
|
||||||
|
* used by the shader to chunk_size -- which does simplify the logic.
|
||||||
|
*/
|
||||||
|
const unsigned chunk_size = 16;
|
||||||
|
const unsigned shared_size = ALIGN(stage.nir->info.cs.shared_size, chunk_size);
|
||||||
|
assert(shared_size <=
|
||||||
|
calculate_gen_slm_size(compiler->devinfo->gen, stage.nir->info.cs.shared_size));
|
||||||
|
|
||||||
|
NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
|
||||||
|
shared_size, chunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
|
NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
|
||||||
|
|
||||||
stage.num_stats = 1;
|
stage.num_stats = 1;
|
||||||
|
Reference in New Issue
Block a user