nir: rename ACCESS_STREAM_CACHE_POLICY -> ACCESS_NON_TEMPORAL and document
Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22769>
This commit is contained in:
@@ -524,7 +524,7 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
|
|||||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||||
nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero,
|
nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero,
|
||||||
.base = output->offset, .write_mask = mask,
|
.base = output->offset, .write_mask = mask,
|
||||||
.access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
|
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_pop_if(b, NULL);
|
nir_pop_if(b, NULL);
|
||||||
@@ -581,7 +581,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||||||
outputs.data[i][j] =
|
outputs.data[i][j] =
|
||||||
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
||||||
.base = offset,
|
.base = offset,
|
||||||
.access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
|
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||||
|
|
||||||
/* clamp legacy color output */
|
/* clamp legacy color output */
|
||||||
if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
|
if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
|
||||||
@@ -607,7 +607,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||||||
nir_ssa_def *data =
|
nir_ssa_def *data =
|
||||||
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
||||||
.base = offset,
|
.base = offset,
|
||||||
.access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
|
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||||
|
|
||||||
if (has_lo_16bit)
|
if (has_lo_16bit)
|
||||||
outputs.data_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
|
outputs.data_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
|
||||||
@@ -944,7 +944,7 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||||||
nir_ssa_def *data = nir_u2uN(b, output, 32);
|
nir_ssa_def *data = nir_u2uN(b, output, 32);
|
||||||
|
|
||||||
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||||
.access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY |
|
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
|
||||||
ACCESS_IS_SWIZZLED_AMD,
|
ACCESS_IS_SWIZZLED_AMD,
|
||||||
.base = base,
|
.base = base,
|
||||||
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
||||||
@@ -988,7 +988,7 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||||||
|
|
||||||
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
|
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
|
||||||
gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||||
.access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY |
|
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
|
||||||
ACCESS_IS_SWIZZLED_AMD,
|
ACCESS_IS_SWIZZLED_AMD,
|
||||||
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
||||||
.memory_modes = nir_var_shader_out);
|
.memory_modes = nir_var_shader_out);
|
||||||
|
@@ -113,7 +113,7 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s
|
|||||||
nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero,
|
nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero,
|
||||||
.base = start_byte, .memory_modes = nir_var_shader_out,
|
.base = start_byte, .memory_modes = nir_var_shader_out,
|
||||||
.access = ACCESS_COHERENT |
|
.access = ACCESS_COHERENT |
|
||||||
(slc ? ACCESS_STREAM_CACHE_POLICY : 0) |
|
(slc ? ACCESS_NON_TEMPORAL : 0) |
|
||||||
(swizzled ? ACCESS_IS_SWIZZLED_AMD : 0));
|
(swizzled ? ACCESS_IS_SWIZZLED_AMD : 0));
|
||||||
|
|
||||||
start_byte += store_bytes;
|
start_byte += store_bytes;
|
||||||
|
@@ -2017,7 +2017,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info,
|
|||||||
vtx_buffer_offsets[out->buffer],
|
vtx_buffer_offsets[out->buffer],
|
||||||
zero, zero,
|
zero, zero,
|
||||||
.base = out->offset,
|
.base = out->offset,
|
||||||
.access = ACCESS_STREAM_CACHE_POLICY);
|
.access = ACCESS_NON_TEMPORAL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -7092,7 +7092,7 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||||||
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp();
|
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp();
|
||||||
|
|
||||||
bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
|
bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
|
||||||
bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY;
|
bool slc = nir_intrinsic_access(intrin) & ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
unsigned const_offset = nir_intrinsic_base(intrin);
|
unsigned const_offset = nir_intrinsic_base(intrin);
|
||||||
unsigned elem_size_bytes = intrin->dest.ssa.bit_size / 8u;
|
unsigned elem_size_bytes = intrin->dest.ssa.bit_size / 8u;
|
||||||
@@ -7164,7 +7164,7 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||||||
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[4].ssa)) : Temp();
|
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[4].ssa)) : Temp();
|
||||||
|
|
||||||
bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
|
bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
|
||||||
bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY;
|
bool slc = nir_intrinsic_access(intrin) & ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
unsigned const_offset = nir_intrinsic_base(intrin);
|
unsigned const_offset = nir_intrinsic_base(intrin);
|
||||||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||||
|
@@ -1822,7 +1822,7 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qual
|
|||||||
cache_policy |= ac_glc;
|
cache_policy |= ac_glc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (access & ACCESS_STREAM_CACHE_POLICY)
|
if (access & ACCESS_NON_TEMPORAL)
|
||||||
cache_policy |= ac_slc | ac_glc;
|
cache_policy |= ac_slc | ac_glc;
|
||||||
|
|
||||||
return cache_policy;
|
return cache_policy;
|
||||||
@@ -4005,7 +4005,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||||||
bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD;
|
bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD;
|
||||||
bool reorder = nir_intrinsic_can_reorder(instr);
|
bool reorder = nir_intrinsic_can_reorder(instr);
|
||||||
bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT;
|
bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT;
|
||||||
bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY;
|
bool slc = nir_intrinsic_access(instr) & ACCESS_NON_TEMPORAL;
|
||||||
bool uses_format = nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD;
|
bool uses_format = nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD;
|
||||||
|
|
||||||
enum ac_image_cache_policy cache_policy = 0;
|
enum ac_image_cache_policy cache_policy = 0;
|
||||||
|
@@ -650,12 +650,12 @@ print_var_decl(nir_variable *var, print_state *state)
|
|||||||
const char *const ronly = (access & ACCESS_NON_WRITEABLE) ? "readonly " : "";
|
const char *const ronly = (access & ACCESS_NON_WRITEABLE) ? "readonly " : "";
|
||||||
const char *const wonly = (access & ACCESS_NON_READABLE) ? "writeonly " : "";
|
const char *const wonly = (access & ACCESS_NON_READABLE) ? "writeonly " : "";
|
||||||
const char *const reorder = (access & ACCESS_CAN_REORDER) ? "reorderable " : "";
|
const char *const reorder = (access & ACCESS_CAN_REORDER) ? "reorderable " : "";
|
||||||
const char *const stream_cache_policy = (access & ACCESS_STREAM_CACHE_POLICY) ?
|
const char *const non_temporal = (access & ACCESS_NON_TEMPORAL) ?
|
||||||
"stream-cache-policy " : "";
|
"non-temporal" : "";
|
||||||
const char *const include_helpers = (access & ACCESS_INCLUDE_HELPERS) ?
|
const char *const include_helpers = (access & ACCESS_INCLUDE_HELPERS) ?
|
||||||
"include-helpers " : "";
|
"include-helpers " : "";
|
||||||
fprintf(fp, "%s%s%s%s%s%s%s%s", coher, volat, restr, ronly, wonly, reorder,
|
fprintf(fp, "%s%s%s%s%s%s%s%s", coher, volat, restr, ronly, wonly, reorder,
|
||||||
stream_cache_policy, include_helpers);
|
non_temporal, include_helpers);
|
||||||
|
|
||||||
if (glsl_get_base_type(glsl_without_array(var->type)) == GLSL_TYPE_IMAGE) {
|
if (glsl_get_base_type(glsl_without_array(var->type)) == GLSL_TYPE_IMAGE) {
|
||||||
fprintf(fp, "%s ", util_format_short_name(var->data.image.format));
|
fprintf(fp, "%s ", util_format_short_name(var->data.image.format));
|
||||||
|
@@ -1026,8 +1026,11 @@ enum gl_access_qualifier
|
|||||||
*/
|
*/
|
||||||
ACCESS_CAN_REORDER = (1 << 6),
|
ACCESS_CAN_REORDER = (1 << 6),
|
||||||
|
|
||||||
/** Use as little cache space as possible. */
|
/**
|
||||||
ACCESS_STREAM_CACHE_POLICY = (1 << 7),
|
* Hints that the accessed address is not likely to be accessed again
|
||||||
|
* in the near future. This reduces data retention in caches.
|
||||||
|
*/
|
||||||
|
ACCESS_NON_TEMPORAL = (1 << 7),
|
||||||
|
|
||||||
/** Execute instruction also in helpers. */
|
/** Execute instruction also in helpers. */
|
||||||
ACCESS_INCLUDE_HELPERS = (1 << 8),
|
ACCESS_INCLUDE_HELPERS = (1 << 8),
|
||||||
|
@@ -3113,7 +3113,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
|||||||
vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
|
vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
|
||||||
|
|
||||||
if (operands & SpvImageOperandsNontemporalMask)
|
if (operands & SpvImageOperandsNontemporalMask)
|
||||||
access |= ACCESS_STREAM_CACHE_POLICY;
|
access |= ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
if (sampler && b->options->force_tex_non_uniform)
|
if (sampler && b->options->force_tex_non_uniform)
|
||||||
access |= ACCESS_NON_UNIFORM;
|
access |= ACCESS_NON_UNIFORM;
|
||||||
@@ -3370,7 +3370,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
|
|||||||
if (operands & SpvImageOperandsVolatileTexelMask)
|
if (operands & SpvImageOperandsVolatileTexelMask)
|
||||||
access |= ACCESS_VOLATILE;
|
access |= ACCESS_VOLATILE;
|
||||||
if (operands & SpvImageOperandsNontemporalMask)
|
if (operands & SpvImageOperandsNontemporalMask)
|
||||||
access |= ACCESS_STREAM_CACHE_POLICY;
|
access |= ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -3412,7 +3412,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
|
|||||||
if (operands & SpvImageOperandsVolatileTexelMask)
|
if (operands & SpvImageOperandsVolatileTexelMask)
|
||||||
access |= ACCESS_VOLATILE;
|
access |= ACCESS_VOLATILE;
|
||||||
if (operands & SpvImageOperandsNontemporalMask)
|
if (operands & SpvImageOperandsNontemporalMask)
|
||||||
access |= ACCESS_STREAM_CACHE_POLICY;
|
access |= ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -2345,7 +2345,7 @@ spv_access_to_gl_access(SpvMemoryAccessMask access)
|
|||||||
if (access & SpvMemoryAccessVolatileMask)
|
if (access & SpvMemoryAccessVolatileMask)
|
||||||
result |= ACCESS_VOLATILE;
|
result |= ACCESS_VOLATILE;
|
||||||
if (access & SpvMemoryAccessNontemporalMask)
|
if (access & SpvMemoryAccessNontemporalMask)
|
||||||
result |= ACCESS_STREAM_CACHE_POLICY;
|
result |= ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@@ -1673,7 +1673,7 @@ get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
|
|||||||
if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
||||||
access |= ACCESS_VOLATILE;
|
access |= ACCESS_VOLATILE;
|
||||||
if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
|
if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
|
||||||
access |= ACCESS_STREAM_CACHE_POLICY;
|
access |= ACCESS_NON_TEMPORAL;
|
||||||
|
|
||||||
return access;
|
return access;
|
||||||
}
|
}
|
||||||
|
@@ -268,7 +268,7 @@ void *si_create_clear_buffer_rmw_cs(struct si_context *sctx)
|
|||||||
data = nir_ior(&b, data, nir_channel(&b, user_sgprs, 0));
|
data = nir_ior(&b, data, nir_channel(&b, user_sgprs, 0));
|
||||||
|
|
||||||
nir_store_ssbo(&b, data, zero, address,
|
nir_store_ssbo(&b, data, zero, address,
|
||||||
.access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_STREAM_CACHE_POLICY : 0,
|
.access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_NON_TEMPORAL : 0,
|
||||||
.align_mul = 4);
|
.align_mul = 4);
|
||||||
|
|
||||||
return create_shader_state(sctx, b.shader);
|
return create_shader_state(sctx, b.shader);
|
||||||
|
@@ -202,7 +202,7 @@ emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id
|
|||||||
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform);
|
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform);
|
||||||
break;
|
break;
|
||||||
case ACCESS_CAN_REORDER:
|
case ACCESS_CAN_REORDER:
|
||||||
case ACCESS_STREAM_CACHE_POLICY:
|
case ACCESS_NON_TEMPORAL:
|
||||||
/* no equivalent */
|
/* no equivalent */
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
Reference in New Issue
Block a user