From 1fd8b466672b76ad206af8aac6002e8a658db9fb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 20 Nov 2020 16:14:26 +0000 Subject: [PATCH] nir,spirv: add sparse image loads Signed-off-by: Rhys Perry Reviewed-by: Jason Ekstrand Part-of: --- src/amd/vulkan/radv_shader_info.c | 1 + src/compiler/nir/nir.c | 1 + src/compiler/nir/nir_divergence_analysis.c | 3 ++ src/compiler/nir/nir_intrinsics.py | 1 + .../nir/nir_lower_input_attachments.c | 18 ++++++++--- src/compiler/nir/nir_lower_memory_model.c | 1 + .../nir/nir_lower_non_uniform_access.c | 3 ++ src/compiler/nir/nir_opt_access.c | 15 +++++++--- src/compiler/spirv/spirv_to_nir.c | 30 +++++++++++++++++-- 9 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 24c6ed015bc..af4184fde86 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -187,6 +187,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr)); break; case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_deref_atomic_imin: diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 9dc2c52ee02..934b3887365 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2458,6 +2458,7 @@ nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, : nir_intrinsic_image_##op; \ break; CASE(load) + CASE(sparse_load) CASE(store) CASE(atomic_add) CASE(atomic_imin) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index f148c09e678..f3d71eac173 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -250,6 +250,9 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_image_load: case nir_intrinsic_image_deref_load: case nir_intrinsic_bindless_image_load: + case nir_intrinsic_image_sparse_load: + case nir_intrinsic_image_deref_sparse_load: + case nir_intrinsic_bindless_image_sparse_load: is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index e9912d10279..4e66f4b8b60 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -520,6 +520,7 @@ def image(name, src_comp=[], extra_indices=[], **kwargs): indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS] + extra_indices, **kwargs) image("load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) +image("sparse_load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) image("store", src_comp=[4, 1, 0, 1], extra_indices=[SRC_TYPE]) image("atomic_add", src_comp=[4, 1, 1], dest_comp=1) image("atomic_imin", src_comp=[4, 1, 1], dest_comp=1) diff --git a/src/compiler/nir/nir_lower_input_attachments.c b/src/compiler/nir/nir_lower_input_attachments.c index 3a3500a0bcd..f18aac0df0d 100644 --- a/src/compiler/nir/nir_lower_input_attachments.c +++ b/src/compiler/nir/nir_lower_input_attachments.c @@ -123,6 +123,7 @@ try_lower_input_load(nir_function_impl *impl, nir_intrinsic_instr *load, } tex->is_array = true; tex->is_shadow = false; + tex->is_sparse = load->intrinsic == nir_intrinsic_image_deref_sparse_load; tex->texture_index = 0; tex->sampler_index = 0; @@ -145,11 +146,19 @@ try_lower_input_load(nir_function_impl *impl, nir_intrinsic_instr *load, tex->texture_non_uniform = nir_intrinsic_access(load) & ACCESS_NON_UNIFORM; - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); + nir_ssa_dest_init(&tex->instr, &tex->dest, nir_tex_instr_dest_size(tex), 32, NULL); nir_builder_instr_insert(&b, &tex->instr); - nir_ssa_def_rewrite_uses(&load->dest.ssa, - nir_src_for_ssa(&tex->dest.ssa)); + if (tex->is_sparse) { + unsigned load_result_size = load->dest.ssa.num_components - 1; + nir_ssa_def *res = nir_channels( + &b, &tex->dest.ssa, BITFIELD_MASK(load_result_size) | 0x10); + + nir_ssa_def_rewrite_uses(&load->dest.ssa, nir_src_for_ssa(res)); + } else { + nir_ssa_def_rewrite_uses(&load->dest.ssa, + nir_src_for_ssa(&tex->dest.ssa)); + } return true; } @@ -208,7 +217,8 @@ nir_lower_input_attachments(nir_shader *shader, case nir_instr_type_intrinsic: { nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); - if (load->intrinsic == nir_intrinsic_image_deref_load) { + if (load->intrinsic == nir_intrinsic_image_deref_load || + load->intrinsic == nir_intrinsic_image_deref_sparse_load) { progress |= try_lower_input_load(function->impl, load, options); } diff --git a/src/compiler/nir/nir_lower_memory_model.c b/src/compiler/nir/nir_lower_memory_model.c index e29d97f3242..293126e0a2b 100644 --- a/src/compiler/nir/nir_lower_memory_model.c +++ b/src/compiler/nir/nir_lower_memory_model.c @@ -36,6 +36,7 @@ get_intrinsic_info(nir_intrinsic_instr *intrin, nir_variable_mode *modes, { switch (intrin->intrinsic) { case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_sparse_load: *modes = nir_src_as_deref(intrin->src[0])->modes; *reads = true; break; diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c index 08dcaaae497..12082a24272 100644 --- a/src/compiler/nir/nir_lower_non_uniform_access.c +++ b/src/compiler/nir/nir_lower_non_uniform_access.c @@ -235,6 +235,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, break; case nir_intrinsic_image_load: + case nir_intrinsic_image_sparse_load: case nir_intrinsic_image_store: case nir_intrinsic_image_atomic_add: case nir_intrinsic_image_atomic_imin: @@ -250,6 +251,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_image_size: case nir_intrinsic_image_samples: case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_sparse_load: case nir_intrinsic_bindless_image_store: case nir_intrinsic_bindless_image_atomic_add: case nir_intrinsic_bindless_image_atomic_imin: @@ -265,6 +267,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_bindless_image_size: case nir_intrinsic_bindless_image_samples: case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_deref_atomic_umin: diff --git a/src/compiler/nir/nir_opt_access.c b/src/compiler/nir/nir_opt_access.c index 89b44c2234b..72664b5ec6a 100644 --- a/src/compiler/nir/nir_opt_access.c +++ b/src/compiler/nir/nir_opt_access.c @@ -83,6 +83,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_deref_atomic_imin: case nir_intrinsic_image_deref_atomic_umin: @@ -96,7 +97,8 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) case nir_intrinsic_image_deref_atomic_fadd: var = nir_intrinsic_get_var(instr, 0); read = instr->intrinsic != nir_intrinsic_image_deref_store; - write = instr->intrinsic != nir_intrinsic_image_deref_load; + write = instr->intrinsic != nir_intrinsic_image_deref_load && + instr->intrinsic != nir_intrinsic_image_deref_sparse_load; /* In OpenGL, buffer images use normal buffer objects, whereas other * image types use textures which cannot alias with buffer objects. @@ -119,6 +121,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_store: + case nir_intrinsic_bindless_image_sparse_load: case nir_intrinsic_bindless_image_atomic_add: case nir_intrinsic_bindless_image_atomic_imin: case nir_intrinsic_bindless_image_atomic_umin: @@ -131,7 +134,8 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) case nir_intrinsic_bindless_image_atomic_comp_swap: case nir_intrinsic_bindless_image_atomic_fadd: read = instr->intrinsic != nir_intrinsic_bindless_image_store; - write = instr->intrinsic != nir_intrinsic_bindless_image_load; + write = instr->intrinsic != nir_intrinsic_bindless_image_load && + instr->intrinsic != nir_intrinsic_bindless_image_sparse_load; if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF) { state->buffers_read |= read; @@ -218,7 +222,8 @@ update_access(struct access_state *state, nir_intrinsic_instr *instr, bool is_im bool is_memory_writeonly = access & ACCESS_NON_READABLE; if (instr->intrinsic != nir_intrinsic_bindless_image_load && - instr->intrinsic != nir_intrinsic_bindless_image_store) { + instr->intrinsic != nir_intrinsic_bindless_image_store && + instr->intrinsic != nir_intrinsic_bindless_image_sparse_load) { const nir_variable *var = nir_get_binding_variable( state->shader, nir_chase_binding(instr->src[0])); is_memory_readonly |= var && (var->data.access & ACCESS_NON_WRITEABLE); @@ -246,6 +251,7 @@ process_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_store: + case nir_intrinsic_bindless_image_sparse_load: return update_access(state, instr, true, nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF); @@ -258,7 +264,8 @@ process_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) } case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_store: { + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_sparse_load: { nir_variable *var = nir_intrinsic_get_var(instr, 0); bool is_buffer = diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 1462549f08c..37dd1ca3cb6 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -3150,7 +3150,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, image.lod = NULL; break; - case SpvOpImageRead: { + case SpvOpImageRead: + case SpvOpImageSparseRead: { res_val = vtn_untyped_value(b, w[3]); image.image = vtn_get_image(b, w[3], &access); image.coord = get_image_coord(b, w[4]); @@ -3243,6 +3244,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, OP(ImageQuerySize, size) OP(ImageQuerySizeLod, size) OP(ImageRead, load) + OP(ImageSparseRead, sparse_load) OP(ImageWrite, store) OP(AtomicLoad, load) OP(AtomicStore, store) @@ -3316,6 +3318,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, break; case SpvOpAtomicLoad: case SpvOpImageRead: + case SpvOpImageSparseRead: /* Only OpImageRead can support a lod parameter if * SPV_AMD_shader_image_load_store_lod is used but the current NIR * intrinsics definition for atomics requires us to set it for @@ -3377,8 +3380,17 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) { struct vtn_type *type = vtn_get_type(b, w[1]); + struct vtn_type *struct_type = NULL; + if (opcode == SpvOpImageSparseRead) { + vtn_assert(glsl_type_is_struct_or_ifc(type->type)); + struct_type = type; + type = struct_type->members[1]; + } unsigned dest_components = glsl_get_vector_elements(type->type); + if (opcode == SpvOpImageSparseRead) + dest_components++; + if (nir_intrinsic_infos[op].dest_components == 0) intrin->num_components = dest_components; @@ -3392,9 +3404,20 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (nir_intrinsic_dest_components(intrin) != dest_components) result = nir_channels(&b->nb, result, (1 << dest_components) - 1); - vtn_push_nir_ssa(b, w[2], result); + if (opcode == SpvOpImageSparseRead) { + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type); + unsigned res_type_size = glsl_get_vector_elements(type->type); + dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size); + if (intrin->dest.ssa.bit_size != 32) + dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def); + dest->elems[1]->def = nir_channels(&b->nb, result, + BITFIELD_MASK(res_type_size)); + vtn_push_ssa_value(b, w[2], dest); + } else { + vtn_push_nir_ssa(b, w[2], result); + } - if (opcode == SpvOpImageRead) + if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead) nir_intrinsic_set_dest_type(intrin, nir_get_nir_type_for_glsl_type(type->type)); } else { nir_builder_instr_insert(&b->nb, &intrin->instr); @@ -5281,6 +5304,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpImageRead: + case SpvOpImageSparseRead: case SpvOpImageWrite: case SpvOpImageTexelPointer: case SpvOpImageQueryFormat: