From 820c50ada3f9563a0900f89a2252465b64bdac7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 27 Apr 2023 23:02:28 -0400 Subject: [PATCH] nir: rename ACCESS_STREAM_CACHE_POLICY -> ACCESS_NON_TEMPORAL and document Reviewed-by: Rob Clark Part-of: --- src/amd/common/ac_nir.c | 10 +++++----- src/amd/common/ac_nir_lower_esgs_io_to_mem.c | 2 +- src/amd/common/ac_nir_lower_ngg.c | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 4 ++-- src/amd/llvm/ac_nir_to_llvm.c | 4 ++-- src/compiler/nir/nir_print.c | 6 +++--- src/compiler/shader_enums.h | 7 +++++-- src/compiler/spirv/spirv_to_nir.c | 6 +++--- src/compiler/spirv/vtn_variables.c | 2 +- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +- src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 2 +- src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c | 2 +- 12 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index b4afb897ae9..a2201a08c8c 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -524,7 +524,7 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info, nir_ssa_def *zero = nir_imm_int(b, 0); nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero, .base = output->offset, .write_mask = mask, - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); + .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL); } nir_pop_if(b, NULL); @@ -581,7 +581,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, outputs.data[i][j] = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, .base = offset, - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); + .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL); /* clamp legacy color output */ if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 || @@ -607,7 +607,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, nir_ssa_def *data = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, .base = offset, - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); + .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL); if (has_lo_16bit) outputs.data_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data); @@ -944,7 +944,7 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_ssa_def *data = nir_u2uN(b, output, 32); nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0), - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY | + .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL | ACCESS_IS_SWIZZLED_AMD, .base = base, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ @@ -988,7 +988,7 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi), gsvs_ring, voffset, soffset, nir_imm_int(b, 0), - .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY | + .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL | ACCESS_IS_SWIZZLED_AMD, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out); diff --git a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c index bb490d5f391..53471f3c14c 100644 --- a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c @@ -113,7 +113,7 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero, .base = start_byte, .memory_modes = nir_var_shader_out, .access = ACCESS_COHERENT | - (slc ? ACCESS_STREAM_CACHE_POLICY : 0) | + (slc ? ACCESS_NON_TEMPORAL : 0) | (swizzled ? ACCESS_IS_SWIZZLED_AMD : 0)); start_byte += store_bytes; diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 9ae0983ccf3..0414f4d49ac 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -2017,7 +2017,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info, vtx_buffer_offsets[out->buffer], zero, zero, .base = out->offset, - .access = ACCESS_STREAM_CACHE_POLICY); + .access = ACCESS_NON_TEMPORAL); } } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c9b78c520d6..7bb3a2491b9 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7092,7 +7092,7 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp(); bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT; - bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY; + bool slc = nir_intrinsic_access(intrin) & ACCESS_NON_TEMPORAL; unsigned const_offset = nir_intrinsic_base(intrin); unsigned elem_size_bytes = intrin->dest.ssa.bit_size / 8u; @@ -7164,7 +7164,7 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[4].ssa)) : Temp(); bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT; - bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY; + bool slc = nir_intrinsic_access(intrin) & ACCESS_NON_TEMPORAL; unsigned const_offset = nir_intrinsic_base(intrin); unsigned write_mask = nir_intrinsic_write_mask(intrin); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 3a3c7d5c950..000033b4e7a 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1822,7 +1822,7 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qual cache_policy |= ac_glc; } - if (access & ACCESS_STREAM_CACHE_POLICY) + if (access & ACCESS_NON_TEMPORAL) cache_policy |= ac_slc | ac_glc; return cache_policy; @@ -4005,7 +4005,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD; bool reorder = nir_intrinsic_can_reorder(instr); bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT; - bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY; + bool slc = nir_intrinsic_access(instr) & ACCESS_NON_TEMPORAL; bool uses_format = nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD; enum ac_image_cache_policy cache_policy = 0; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index edab512121e..3c00171da04 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -650,12 +650,12 @@ print_var_decl(nir_variable *var, print_state *state) const char *const ronly = (access & ACCESS_NON_WRITEABLE) ? "readonly " : ""; const char *const wonly = (access & ACCESS_NON_READABLE) ? "writeonly " : ""; const char *const reorder = (access & ACCESS_CAN_REORDER) ? "reorderable " : ""; - const char *const stream_cache_policy = (access & ACCESS_STREAM_CACHE_POLICY) ? - "stream-cache-policy " : ""; + const char *const non_temporal = (access & ACCESS_NON_TEMPORAL) ? + "non-temporal" : ""; const char *const include_helpers = (access & ACCESS_INCLUDE_HELPERS) ? "include-helpers " : ""; fprintf(fp, "%s%s%s%s%s%s%s%s", coher, volat, restr, ronly, wonly, reorder, - stream_cache_policy, include_helpers); + non_temporal, include_helpers); if (glsl_get_base_type(glsl_without_array(var->type)) == GLSL_TYPE_IMAGE) { fprintf(fp, "%s ", util_format_short_name(var->data.image.format)); diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 2dfca1d7651..d86df6c59f0 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -1026,8 +1026,11 @@ enum gl_access_qualifier */ ACCESS_CAN_REORDER = (1 << 6), - /** Use as little cache space as possible. */ - ACCESS_STREAM_CACHE_POLICY = (1 << 7), + /** + * Hints that the accessed address is not likely to be accessed again + * in the near future. This reduces data retention in caches. + */ + ACCESS_NON_TEMPORAL = (1 << 7), /** Execute instruction also in helpers. */ ACCESS_INCLUDE_HELPERS = (1 << 8), diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 5997e861f3b..3dbb77f5bff 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -3113,7 +3113,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access); if (operands & SpvImageOperandsNontemporalMask) - access |= ACCESS_STREAM_CACHE_POLICY; + access |= ACCESS_NON_TEMPORAL; if (sampler && b->options->force_tex_non_uniform) access |= ACCESS_NON_UNIFORM; @@ -3370,7 +3370,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (operands & SpvImageOperandsVolatileTexelMask) access |= ACCESS_VOLATILE; if (operands & SpvImageOperandsNontemporalMask) - access |= ACCESS_STREAM_CACHE_POLICY; + access |= ACCESS_NON_TEMPORAL; break; } @@ -3412,7 +3412,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (operands & SpvImageOperandsVolatileTexelMask) access |= ACCESS_VOLATILE; if (operands & SpvImageOperandsNontemporalMask) - access |= ACCESS_STREAM_CACHE_POLICY; + access |= ACCESS_NON_TEMPORAL; break; } diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 8db61c8e61a..2a7ebcfb76f 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -2345,7 +2345,7 @@ spv_access_to_gl_access(SpvMemoryAccessMask access) if (access & SpvMemoryAccessVolatileMask) result |= ACCESS_VOLATILE; if (access & SpvMemoryAccessNontemporalMask) - result |= ACCESS_STREAM_CACHE_POLICY; + result |= ACCESS_NON_TEMPORAL; return result; } diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 579a62bcade..a8f5cff0c02 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1673,7 +1673,7 @@ get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst) if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) access |= ACCESS_VOLATILE; if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY) - access |= ACCESS_STREAM_CACHE_POLICY; + access |= ACCESS_NON_TEMPORAL; return access; } diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index a4da2732fb2..f0ef2b0eadc 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -268,7 +268,7 @@ void *si_create_clear_buffer_rmw_cs(struct si_context *sctx) data = nir_ior(&b, data, nir_channel(&b, user_sgprs, 0)); nir_store_ssbo(&b, data, zero, address, - .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_STREAM_CACHE_POLICY : 0, + .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_NON_TEMPORAL : 0, .align_mul = 4); return create_shader_state(sctx, b.shader); diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 32f0934be41..b1626430e19 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -202,7 +202,7 @@ emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform); break; case ACCESS_CAN_REORDER: - case ACCESS_STREAM_CACHE_POLICY: + case ACCESS_NON_TEMPORAL: /* no equivalent */ break; default: