lavapipe: Implement shaderResourceResidency

Adds a bit set to llvmpipe_resurce where each bit stores the residency
of a 64KB tile. The sampling code is adjusted to make use of said table
and return a residency code for sparse texture operations.

Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29408>
This commit is contained in:
Konstantin Seurer
2024-05-26 13:51:58 +02:00
committed by Marge Bot
parent d747c4a874
commit 6168317b84
16 changed files with 301 additions and 58 deletions

View File

@@ -154,8 +154,8 @@ lp_bld_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base
if (params->texture_resource) {
LLVMTypeRef out_data_type = lp_build_vec_type(gallivm, params->type);
LLVMValueRef out_data[4];
for (uint32_t i = 0; i < 4; i++) {
LLVMValueRef out_data[5];
for (uint32_t i = 0; i < ARRAY_SIZE(out_data); i++) {
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
LLVMBuildStore(builder, lp_build_const_vec(gallivm, params->type, 0), out_data[i]);
}
@@ -264,7 +264,7 @@ lp_bld_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base
LLVMValueRef result = LLVMBuildCall2(builder, texture_function_type, texture_function, args, num_args, "");
for (unsigned i = 0; i < 4; i++) {
for (unsigned i = 0; i < ARRAY_SIZE(out_data); i++) {
params->texel[i] = LLVMBuildExtractValue(gallivm->builder, result, i, "");
if (params->type.length != lp_native_vector_width / 32)
@@ -275,7 +275,7 @@ lp_bld_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base
lp_build_endif(&if_state);
for (unsigned i = 0; i < 4; i++)
for (unsigned i = 0; i < ARRAY_SIZE(out_data); i++)
params->texel[i] = LLVMBuildLoad2(gallivm->builder, out_data_type, out_data[i], "");
return;
@@ -443,8 +443,8 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
const struct util_format_description *desc = util_format_description(params->format);
LLVMTypeRef out_data_type = lp_build_vec_type(gallivm, lp_build_texel_type(params->type, desc));
LLVMValueRef out_data[4];
for (uint32_t i = 0; i < 4; i++) {
LLVMValueRef out_data[5];
for (uint32_t i = 0; i < ARRAY_SIZE(out_data); i++) {
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
LLVMBuildStore(builder, lp_build_const_vec(gallivm, lp_build_texel_type(params->type, desc), 0), out_data[i]);
}
@@ -500,7 +500,7 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
args[num_args++] = image_descriptor;
if (params->img_op != LP_IMG_LOAD)
if (params->img_op != LP_IMG_LOAD && params->img_op != LP_IMG_LOAD_SPARSE)
args[num_args++] = params->exec_mask;
for (uint32_t i = 0; i < 3; i++)
@@ -509,7 +509,7 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
if (params->ms_index)
args[num_args++] = params->ms_index;
if (params->img_op != LP_IMG_LOAD)
if (params->img_op != LP_IMG_LOAD && params->img_op != LP_IMG_LOAD_SPARSE)
for (uint32_t i = 0; i < 4; i++)
args[num_args++] = params->indata[i];
@@ -532,7 +532,8 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
LLVMValueRef result = LLVMBuildCall2(builder, image_function_type, image_function, args, num_args, "");
if (params->img_op != LP_IMG_STORE) {
for (unsigned i = 0; i < 4; i++) {
uint32_t channel_count = params->img_op == LP_IMG_LOAD_SPARSE ? 5 : 4;
for (unsigned i = 0; i < channel_count; i++) {
LLVMValueRef channel = LLVMBuildExtractValue(gallivm->builder, result, i, "");
if (params->type.length != lp_native_vector_width / 32)
channel = truncate_to_type_width(gallivm, channel, params->type);
@@ -544,7 +545,7 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
lp_build_endif(&if_state);
if (params->img_op != LP_IMG_STORE) {
for (unsigned i = 0; i < 4; i++) {
for (unsigned i = 0; i < ARRAY_SIZE(out_data); i++) {
params->outdata[i] = LLVMBuildLoad2(gallivm->builder, out_data_type, out_data[i], "");
}
}

View File

@@ -239,10 +239,12 @@ lp_build_create_jit_image_type(struct gallivm_state *gallivm)
elem_types[LP_JIT_IMAGE_HEIGHT] =
elem_types[LP_JIT_IMAGE_DEPTH] = LLVMInt16TypeInContext(lc);
elem_types[LP_JIT_IMAGE_NUM_SAMPLES] = LLVMInt8TypeInContext(lc);
elem_types[LP_JIT_IMAGE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
elem_types[LP_JIT_IMAGE_BASE] =
elem_types[LP_JIT_IMAGE_RESIDENCY] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
elem_types[LP_JIT_IMAGE_ROW_STRIDE] =
elem_types[LP_JIT_IMAGE_IMG_STRIDE] =
elem_types[LP_JIT_IMAGE_SAMPLE_STRIDE] = LLVMInt32TypeInContext(lc);
elem_types[LP_JIT_IMAGE_SAMPLE_STRIDE] =
elem_types[LP_JIT_IMAGE_BASE_OFFSET] = LLVMInt32TypeInContext(lc);
image_type = LLVMStructTypeInContext(lc, elem_types,
ARRAY_SIZE(elem_types), 0);
@@ -407,6 +409,38 @@ lp_build_llvm_texture_member(struct gallivm_state *gallivm,
return res;
}
static LLVMValueRef
lp_build_llvm_texture_residency(struct gallivm_state *gallivm,
LLVMTypeRef resources_type,
LLVMValueRef resources_ptr,
unsigned texture_unit,
LLVMValueRef texture_unit_offset)
{
LLVMBuilderRef builder = gallivm->builder;
static_assert(offsetof(struct lp_descriptor, texture) == 0, "Invalid texture offset");
LLVMValueRef texture_ptr = gallivm->texture_descriptor;
LLVMTypeRef texture_ptr_type = LLVMStructGetTypeAtIndex(resources_type, LP_JIT_RES_TEXTURES);
LLVMTypeRef texture_type = LLVMGetElementType(texture_ptr_type);
texture_ptr_type = LLVMPointerType(texture_type, 0);
texture_ptr = LLVMBuildIntToPtr(builder, texture_ptr, texture_ptr_type, "");
static_assert(offsetof(struct lp_jit_texture, row_stride) == offsetof(struct lp_jit_texture, residency),
"Invalid texture descriptor layout");
LLVMValueRef indices[2] = {
lp_build_const_int32(gallivm, 0),
lp_build_const_int32(gallivm, LP_JIT_TEXTURE_ROW_STRIDE),
};
LLVMValueRef ptr = LLVMBuildGEP2(builder, texture_type, texture_ptr, indices, ARRAY_SIZE(indices), "");
LLVMTypeRef residency_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(residency_type, 0), "");
return LLVMBuildLoad2(builder, residency_type, ptr, "");
}
/**
* Helper macro to instantiate the functions that generate the code to
@@ -652,6 +686,8 @@ LP_BUILD_LLVM_IMAGE_MEMBER_OUTTYPE(row_stride, LP_JIT_IMAGE_ROW_STRIDE, true)
LP_BUILD_LLVM_IMAGE_MEMBER_OUTTYPE(img_stride, LP_JIT_IMAGE_IMG_STRIDE, true)
LP_BUILD_LLVM_IMAGE_MEMBER(num_samples, LP_JIT_IMAGE_NUM_SAMPLES, true)
LP_BUILD_LLVM_IMAGE_MEMBER(sample_stride, LP_JIT_IMAGE_SAMPLE_STRIDE, true)
LP_BUILD_LLVM_IMAGE_MEMBER(residency, LP_JIT_IMAGE_RESIDENCY, true)
LP_BUILD_LLVM_IMAGE_MEMBER(base_offset, LP_JIT_IMAGE_BASE_OFFSET, true)
void
lp_build_jit_fill_sampler_dynamic_state(struct lp_sampler_dynamic_state *state)
@@ -665,6 +701,7 @@ lp_build_jit_fill_sampler_dynamic_state(struct lp_sampler_dynamic_state *state)
state->row_stride = lp_build_llvm_texture_row_stride;
state->img_stride = lp_build_llvm_texture_img_stride;
state->mip_offsets = lp_build_llvm_texture_mip_offsets;
state->residency = lp_build_llvm_texture_residency;
state->min_lod = lp_build_llvm_sampler_min_lod;
state->max_lod = lp_build_llvm_sampler_max_lod;
@@ -685,6 +722,8 @@ lp_build_jit_fill_image_dynamic_state(struct lp_sampler_dynamic_state *state)
state->img_stride = lp_build_llvm_image_img_stride;
state->last_level = lp_build_llvm_image_num_samples;
state->sample_stride = lp_build_llvm_image_sample_stride;
state->residency = lp_build_llvm_image_residency;
state->base_offset = lp_build_llvm_image_base_offset;
}
/**
@@ -751,7 +790,7 @@ lp_build_sample_function_type(struct gallivm_state *gallivm, uint32_t sample_key
LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
LLVMTypeRef ret_type;
LLVMTypeRef val_type[4];
LLVMTypeRef val_type[5];
uint32_t num_params = 0;
LLVMTypeRef coord_type;
@@ -782,7 +821,8 @@ lp_build_sample_function_type(struct gallivm_state *gallivm, uint32_t sample_key
arg_types[num_params++] = coord_type;
val_type[0] = val_type[1] = val_type[2] = val_type[3] = lp_build_vec_type(gallivm, type);
ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
val_type[4] = lp_build_int_vec_type(gallivm, type);
ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 5, 0);
return LLVMFunctionType(ret_type, arg_types, num_params, false);
}
@@ -831,7 +871,7 @@ lp_build_image_function_type(struct gallivm_state *gallivm,
arg_types[num_params++] = LLVMInt64TypeInContext(gallivm->context);
if (params->img_op != LP_IMG_LOAD)
if (params->img_op != LP_IMG_LOAD && params->img_op != LP_IMG_LOAD_SPARSE)
arg_types[num_params++] = lp_build_int_vec_type(gallivm, type);
for (uint32_t i = 0; i < 3; i++)
@@ -840,7 +880,7 @@ lp_build_image_function_type(struct gallivm_state *gallivm,
if (ms)
arg_types[num_params++] = lp_build_vec_type(gallivm, lp_uint_type(type));
uint32_t num_inputs = params->img_op != LP_IMG_LOAD ? 4 : 0;
uint32_t num_inputs = params->img_op != LP_IMG_LOAD && params->img_op != LP_IMG_LOAD_SPARSE ? 4 : 0;
if (params->img_op == LP_IMG_ATOMIC_CAS)
num_inputs = 8;
@@ -849,8 +889,13 @@ lp_build_image_function_type(struct gallivm_state *gallivm,
for (uint32_t i = 0; i < num_inputs; i++)
arg_types[num_params++] = component_type;
if (params->img_op != LP_IMG_STORE) {
if (params->img_op == LP_IMG_LOAD_SPARSE) {
LLVMTypeRef val_type[5];
val_type[0] = val_type[1] = val_type[2] = val_type[3] = component_type;
val_type[4] = lp_build_int_vec_type(gallivm, type);
ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 5, 0);
} else if (params->img_op != LP_IMG_STORE) {
LLVMTypeRef val_type[4];
val_type[0] = val_type[1] = val_type[2] = val_type[3] = component_type;
ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);

View File

@@ -68,10 +68,15 @@ struct lp_jit_texture
uint32_t width; /* same as number of elements */
uint16_t height;
uint16_t depth; /* doubles as array size */
union {
struct {
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
};
const void *residency;
};
uint8_t first_level;
uint8_t last_level; /* contains num_samples for multisample */
uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; /* sample stride is in mip_offsets[15] */
uint32_t sampler_index;
};
@@ -81,10 +86,10 @@ enum {
LP_JIT_TEXTURE_WIDTH,
LP_JIT_TEXTURE_HEIGHT,
LP_JIT_TEXTURE_DEPTH,
LP_JIT_TEXTURE_FIRST_LEVEL,
LP_JIT_TEXTURE_LAST_LEVEL,
LP_JIT_TEXTURE_ROW_STRIDE,
LP_JIT_TEXTURE_IMG_STRIDE,
LP_JIT_TEXTURE_FIRST_LEVEL,
LP_JIT_TEXTURE_LAST_LEVEL,
LP_JIT_TEXTURE_MIP_OFFSETS,
LP_JIT_SAMPLER_INDEX_DUMMY,
LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
@@ -118,6 +123,8 @@ struct lp_jit_image
uint32_t sample_stride;
uint32_t row_stride;
uint32_t img_stride;
const void *residency;
uint32_t base_offset;
};
enum {
@@ -129,6 +136,8 @@ enum {
LP_JIT_IMAGE_SAMPLE_STRIDE,
LP_JIT_IMAGE_ROW_STRIDE,
LP_JIT_IMAGE_IMG_STRIDE,
LP_JIT_IMAGE_RESIDENCY,
LP_JIT_IMAGE_BASE_OFFSET,
LP_JIT_IMAGE_NUM_FIELDS /* number of fields above */
};

View File

@@ -1624,7 +1624,7 @@ visit_load_image(struct lp_build_nir_context *bld_base,
params.coords = coords;
params.outdata = result;
params.img_op = LP_IMG_LOAD;
lp_img_op_from_intrinsic(&params, instr);
if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS ||
nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_SUBPASS_MS)
params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]),
@@ -1710,6 +1710,11 @@ lp_img_op_from_intrinsic(struct lp_img_params *params, nir_intrinsic_instr *inst
return;
}
if (instr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
params->img_op = LP_IMG_LOAD_SPARSE;
return;
}
if (instr->intrinsic == nir_intrinsic_image_store ||
instr->intrinsic == nir_intrinsic_bindless_image_store) {
params->img_op = LP_IMG_STORE;
@@ -2191,6 +2196,7 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
break;
case nir_intrinsic_image_load:
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
visit_load_image(bld_base, instr, result);
break;
case nir_intrinsic_image_store:
@@ -2450,6 +2456,9 @@ lp_build_nir_sample_key(gl_shader_stage stage, nir_tex_instr *instr)
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
if (instr->is_sparse)
sample_key |= LP_SAMPLER_RESIDENCY;
return sample_key;
}

View File

@@ -98,6 +98,8 @@ enum lp_sampler_op_type {
#define LP_SAMPLER_GATHER_COMP_SHIFT 8
#define LP_SAMPLER_GATHER_COMP_MASK (3 << 8)
#define LP_SAMPLER_FETCH_MS (1 << 10)
#define LP_SAMPLER_RESIDENCY (1 << 11)
#define LP_SAMPLE_KEY_COUNT (1 << 12)
/* Parameters used to handle TEX instructions */
@@ -147,10 +149,11 @@ struct lp_sampler_size_query_params
};
#define LP_IMG_LOAD 0
#define LP_IMG_STORE 1
#define LP_IMG_ATOMIC 2
#define LP_IMG_ATOMIC_CAS 3
#define LP_IMG_OP_COUNT 4
#define LP_IMG_LOAD_SPARSE 1
#define LP_IMG_STORE 2
#define LP_IMG_ATOMIC 3
#define LP_IMG_ATOMIC_CAS 4
#define LP_IMG_OP_COUNT 5
struct lp_img_params
{
@@ -373,6 +376,20 @@ struct lp_sampler_dynamic_state
LLVMTypeRef thread_data_type,
LLVMValueRef thread_data_ptr,
unsigned unit);
/** Obtain pointer to a bitset of resident tiles. */
LLVMValueRef
(*residency)(struct gallivm_state *gallivm,
LLVMTypeRef resources_type,
LLVMValueRef resources_ptr,
unsigned texture_unit, LLVMValueRef texture_unit_offset);
/** Obtain the offset of base_ptr into the referenced resource. */
LLVMValueRef
(*base_offset)(struct gallivm_state *gallivm,
LLVMTypeRef resources_type,
LLVMValueRef resources_ptr,
unsigned texture_unit, LLVMValueRef texture_unit_offset);
};
@@ -416,6 +433,7 @@ struct lp_build_sample_context
bool no_brilinear;
bool no_rho_approx;
bool fetch_ms;
bool residency;
/** regular scalar float type */
struct lp_type float_type;
@@ -494,6 +512,8 @@ struct lp_build_sample_context
LLVMValueRef resources_ptr;
LLVMValueRef aniso_filter_table;
LLVMValueRef resident;
};
/*
@@ -774,7 +794,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
LLVMValueRef lod, /* optional */
LLVMValueRef ms_index, /* optional */
LLVMValueRef aniso_filter_table,
LLVMValueRef texel_out[4]);
LLVMValueRef *texel_out);
void
@@ -818,7 +838,7 @@ lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
struct lp_sampler_dynamic_state *dynamic_state,
struct gallivm_state *gallivm,
const struct lp_img_params *params,
LLVMValueRef outdata[4]);
LLVMValueRef *outdata);
void
lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,

View File

@@ -36,6 +36,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
#include "util/bitset.h"
#include "util/compiler.h"
#include "util/u_debug.h"
#include "util/u_dump.h"
@@ -65,6 +66,52 @@
#include "lp_bld_misc.h"
#include "lp_bld_jit_types.h"
static void
lp_build_gather_resident(struct lp_build_context *bld,
struct lp_sampler_dynamic_state *dynamic_state,
LLVMTypeRef resources_type,
LLVMValueRef resources_ptr,
LLVMValueRef offset,
LLVMValueRef *out_resident)
{
struct lp_type type = lp_int_type(bld->type);
struct gallivm_state *gallivm = bld->gallivm;
LLVMBuilderRef builder = gallivm->builder;
static_assert(sizeof(BITSET_WORD) == 4, "Unexpected BITSET_WORD size");
LLVMValueRef residency =
dynamic_state->residency(gallivm, resources_type, resources_ptr, 0, NULL);
LLVMValueRef tile_size_log2 =
lp_build_const_int_vec(gallivm, type, util_logbase2(64 * 1024));
LLVMValueRef tile_index = LLVMBuildLShr(builder, offset, tile_size_log2, "");
LLVMValueRef dword_bitsize_log2 =
lp_build_const_int_vec(gallivm, type, util_logbase2(32));
LLVMValueRef dword_index = LLVMBuildLShr(builder, tile_index, dword_bitsize_log2, "");
LLVMValueRef dword_size_log2 =
lp_build_const_int_vec(gallivm, type, util_logbase2(4));
LLVMValueRef dword_offset = LLVMBuildShl(builder, dword_index, dword_size_log2, "");
residency = lp_build_gather(gallivm, type.length, type.width, lp_elem_type(type),
true, residency, dword_offset, true);
LLVMValueRef dword_bit_mask =
lp_build_const_int_vec(gallivm, type, 31);
LLVMValueRef bit_index = LLVMBuildAnd(builder, tile_index, dword_bit_mask, "");
LLVMValueRef bit_mask = LLVMBuildShl(builder, lp_build_one(gallivm, type), bit_index, "");
LLVMValueRef resident = LLVMBuildAnd(builder, residency, bit_mask, "");
resident = LLVMBuildICmp(builder, LLVMIntNE, resident, lp_build_zero(gallivm, type), "");
if (*out_resident)
*out_resident = LLVMBuildAnd(builder, *out_resident, resident, "");
else
*out_resident = resident;
}
/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
@@ -87,6 +134,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef z_stride,
LLVMValueRef data_ptr,
LLVMValueRef mipoffsets,
LLVMValueRef ilevel,
LLVMValueRef texel_out[4])
{
const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
@@ -167,6 +215,22 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
}
if (bld->residency) {
LLVMValueRef real_offset = offset;
if (!mipoffsets) {
mipoffsets = lp_build_get_mip_offsets(bld, ilevel);
real_offset = lp_build_add(&bld->int_coord_bld, real_offset, mipoffsets);
if (use_border)
real_offset = lp_build_andnot(&bld->int_coord_bld, real_offset, use_border);
}
lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
bld->resources_type, bld->resources_ptr,
real_offset, &bld->resident);
}
lp_build_fetch_rgba_soa(bld->gallivm,
bld->format_desc,
bld->texel_type, true,
@@ -916,6 +980,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
LLVMValueRef mipoffsets,
LLVMValueRef ilevel,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef colors_out[4])
@@ -985,7 +1050,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x, y, z,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, colors_out);
data_ptr, mipoffsets, ilevel, colors_out);
if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
LLVMValueRef cmpval;
@@ -1062,6 +1127,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
LLVMValueRef mipoffsets,
LLVMValueRef ilevel,
const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef colors_out[4])
@@ -1395,12 +1461,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x00, y00, z00,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[0][0]);
data_ptr, mipoffsets, ilevel, neighbors[0][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
x01, y01, z01,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[0][1]);
data_ptr, mipoffsets, ilevel, neighbors[0][1]);
if (dims == 1) {
assert(!is_gather);
@@ -1432,12 +1498,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x10, y10, z10,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[1][0]);
data_ptr, mipoffsets, ilevel, neighbors[1][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
x11, y11, z11,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[1][1]);
data_ptr, mipoffsets, ilevel, neighbors[1][1]);
/*
* To avoid having to duplicate linear_mask / fetch code use
@@ -1723,22 +1789,22 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x00, y00, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[0][0]);
data_ptr, mipoffsets, ilevel, neighbors1[0][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
x01, y01, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[0][1]);
data_ptr, mipoffsets, ilevel, neighbors1[0][1]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
x10, y10, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[1][0]);
data_ptr, mipoffsets, ilevel, neighbors1[1][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
x11, y11, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[1][1]);
data_ptr, mipoffsets, ilevel, neighbors1[1][1]);
if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
/* Bilinear interpolate the four samples from the second Z slice */
@@ -1854,13 +1920,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest(bld, size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, mipoff0, coords, offsets,
data_ptr0, mipoff0, ilevel0, coords, offsets,
colors0);
} else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
lp_build_sample_image_linear(bld, is_gather, size0, NULL,
row_stride0_vec, img_stride0_vec,
data_ptr0, mipoff0, coords, offsets,
data_ptr0, mipoff0, ilevel0, coords, offsets,
colors0);
}
@@ -1914,12 +1980,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest(bld, size1,
row_stride1_vec, img_stride1_vec,
data_ptr1, mipoff1, coords, offsets,
data_ptr1, mipoff1, ilevel1, coords, offsets,
colors1);
} else {
lp_build_sample_image_linear(bld, false, size1, NULL,
row_stride1_vec, img_stride1_vec,
data_ptr1, mipoff1, coords, offsets,
data_ptr1, mipoff1, ilevel1, coords, offsets,
colors1);
}
@@ -1991,7 +2057,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
lp_build_sample_image_linear(bld, false, size0, linear_mask,
row_stride0_vec, img_stride0_vec,
data_ptr0, mipoff0, coords, offsets,
data_ptr0, mipoff0, ilevel0, coords, offsets,
colors0);
/* Store the first level's colors in the output variables */
@@ -2035,7 +2101,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
lp_build_sample_image_linear(bld, false, size1, linear_mask,
row_stride1_vec, img_stride1_vec,
data_ptr1, mipoff1, coords, offsets,
data_ptr1, mipoff1, ilevel1, coords, offsets,
colors1);
/* interpolate samples from the two mipmap levels */
@@ -2456,7 +2522,7 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld,
LLVMValueRef temp_colors[4];
lp_build_sample_image_nearest(bld, size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, mipoff0, new_coords, offsets,
data_ptr0, mipoff0, ilevel0, new_coords, offsets,
temp_colors);
for (unsigned chan = 0; chan < 4; chan++) {
@@ -2546,7 +2612,7 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld,
LLVMValueRef colors_den0[4];
lp_build_sample_image_linear(bld, false, size0, NULL,
row_stride0_vec, img_stride0_vec,
data_ptr0, mipoff0, coords, offsets,
data_ptr0, mipoff0, ilevel0, coords, offsets,
colors_den0);
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef chan_val =
@@ -3290,6 +3356,12 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
&offset, &out_of_bounds);
}
if (bld->residency) {
lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
bld->resources_type, bld->resources_ptr,
offset, &bld->resident);
}
offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
lp_build_fetch_rgba_soa(bld->gallivm,
@@ -3388,7 +3460,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
LLVMValueRef lod, /* optional */
LLVMValueRef ms_index, /* optional */
LLVMValueRef aniso_filter_table,
LLVMValueRef texel_out[4])
LLVMValueRef *texel_out)
{
assert(static_texture_state);
assert(static_texture_state->format < PIPE_FORMAT_COUNT);
@@ -3596,6 +3668,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
}
bld.fetch_ms = fetch_ms;
bld.residency = !!(sample_key & LP_SAMPLER_RESIDENCY);
if (op_is_gather)
bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
bld.lodf_type = type;
@@ -3770,6 +3843,8 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
} else if (op_type == LP_SAMPLER_OP_FETCH) {
lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
lod, offsets, texel_out);
if (bld.residency)
texel_out[4] = bld.resident;
} else {
LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
@@ -3830,6 +3905,8 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
texel_out[0] = lod_fpart;
texel_out[1] = lod;
texel_out[2] = texel_out[3] = bld.coord_bld.zero;
if (bld.residency)
texel_out[4] = bld.resident;
return;
}
@@ -3863,6 +3940,8 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
lod_positive, lod_fpart,
ilevel0, ilevel1,
texel_out);
if (bld.residency)
texel_out[4] = bld.resident;
}
} else {
struct lp_build_sample_context bld4;
@@ -4895,7 +4974,7 @@ lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
struct lp_sampler_dynamic_state *dynamic_state,
struct gallivm_state *gallivm,
const struct lp_img_params *params,
LLVMValueRef outdata[4])
LLVMValueRef *outdata)
{
const enum pipe_texture_target target = params->target;
const unsigned dims = texture_dims(target);
@@ -5005,9 +5084,22 @@ lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
sample_stride, &offset,
&out_of_bounds);
}
if (params->img_op == LP_IMG_LOAD) {
if (params->img_op == LP_IMG_LOAD || params->img_op == LP_IMG_LOAD_SPARSE) {
struct lp_type texel_type = lp_build_texel_type(params->type, format_desc);
if (params->img_op == LP_IMG_LOAD_SPARSE && static_texture_state->tiled) {
LLVMValueRef base_offset =
dynamic_state->base_offset(gallivm, params->resources_type,
params->resources_ptr, params->image_index, NULL);
base_offset = lp_build_broadcast_scalar(&int_coord_bld, base_offset);
LLVMValueRef full_offset = LLVMBuildAdd(gallivm->builder, base_offset, offset, "");
lp_build_gather_resident(&int_coord_bld, dynamic_state,
params->resources_type, params->resources_ptr,
full_offset, &outdata[4]);
}
offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
struct lp_build_context texel_bld;
lp_build_context_init(&texel_bld, gallivm, texel_type);

View File

@@ -445,6 +445,9 @@ lp_jit_texture_from_pipe(struct lp_jit_texture *jit, const struct pipe_sampler_v
else
assert(view->u.tex.last_layer < res->array_size);
}
if (res->flags & PIPE_RESOURCE_FLAG_SPARSE)
jit->residency = lp_tex->residency;
} else {
/*
* For tex2d_from_buf, adjust width and height with application
@@ -610,6 +613,11 @@ lp_jit_image_from_pipe(struct lp_jit_image *jit, const struct pipe_image_view *v
view->u.tex2d_from_buf.offset * image_blocksize;
}
}
if (res->flags & PIPE_RESOURCE_FLAG_SPARSE) {
jit->residency = lp_res->residency;
jit->base_offset = (uint32_t)((uintptr_t)jit->base - (uintptr_t)lp_res->tex_data);
}
}
}

View File

@@ -327,6 +327,8 @@ llvmpipe_resource_create_all(struct pipe_screen *_screen,
-1, 0);
madvise(lpr->tex_data, lpr->size_required, MADV_DONTNEED);
#endif
lpr->residency = calloc(DIV_ROUND_UP(lpr->size_required, 64 * 1024 * sizeof(uint32_t) * 8), sizeof(uint32_t));
}
}
} else {
@@ -578,6 +580,8 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen,
#endif
}
free(lpr->residency);
#if MESA_DEBUG
simple_mtx_lock(&resource_list_mutex);
if (!list_is_empty(&lpr->list))
@@ -1564,6 +1568,7 @@ llvmpipe_resource_bind_backing(struct pipe_screen *pscreen,
if (llvmpipe_resource_is_texture(&lpr->base)) {
mmap((char *)lpr->tex_data + offset, size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED, mem->fd, mem->offset + fd_offset);
BITSET_SET(lpr->residency, offset / (64 * 1024));
} else {
mmap((char *)lpr->data + offset, size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED, mem->fd, mem->offset + fd_offset);
@@ -1572,6 +1577,7 @@ llvmpipe_resource_bind_backing(struct pipe_screen *pscreen,
if (llvmpipe_resource_is_texture(&lpr->base)) {
mmap((char *)lpr->tex_data + offset, size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
BITSET_CLEAR(lpr->residency, offset / (64 * 1024));
} else {
mmap((char *)lpr->data + offset, size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED|MAP_ANONYMOUS, -1, 0);

View File

@@ -32,6 +32,7 @@
#include "pipe/p_state.h"
#include "util/u_debug.h"
#include "lp_limits.h"
#include "util/bitset.h"
#if MESA_DEBUG
#include "util/list.h"
#endif
@@ -93,6 +94,8 @@ struct llvmpipe_resource
*/
void *tex_data;
BITSET_WORD *residency;
/**
* Data for non-texture resources.
*/

View File

@@ -261,7 +261,7 @@ compile_image_function(struct llvmpipe_context *ctx, struct lp_static_texture_st
if (op >= LP_IMG_OP_COUNT - 1) {
params.img_op = LP_IMG_ATOMIC;
params.op = op - (LP_IMG_OP_COUNT - 1);
} else if (op != LP_IMG_LOAD && op != LP_IMG_STORE) {
} else if (op != LP_IMG_LOAD && op != LP_IMG_LOAD_SPARSE && op != LP_IMG_STORE) {
params.img_op = LP_IMG_ATOMIC_CAS;
}
@@ -319,7 +319,7 @@ compile_image_function(struct llvmpipe_context *ctx, struct lp_static_texture_st
gallivm->texture_descriptor = LLVMGetParam(function, arg_index++);
if (params.img_op != LP_IMG_LOAD)
if (params.img_op != LP_IMG_LOAD && params.img_op != LP_IMG_LOAD_SPARSE)
params.exec_mask = LLVMGetParam(function, arg_index++);
LLVMValueRef coords[3];
@@ -330,7 +330,7 @@ compile_image_function(struct llvmpipe_context *ctx, struct lp_static_texture_st
if (ms)
params.ms_index = LLVMGetParam(function, arg_index++);
if (params.img_op != LP_IMG_LOAD)
if (params.img_op != LP_IMG_LOAD && params.img_op != LP_IMG_LOAD_SPARSE)
for (uint32_t i = 0; i < 4; i++)
params.indata[i] = LLVMGetParam(function, arg_index++);
@@ -343,15 +343,20 @@ compile_image_function(struct llvmpipe_context *ctx, struct lp_static_texture_st
gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
LLVMPositionBuilderAtEnd(gallivm->builder, block);
LLVMValueRef outdata[4] = { 0 };
LLVMValueRef outdata[5] = { 0 };
lp_build_img_op_soa(texture, lp_build_image_soa_dynamic_state(image_soa), gallivm, &params, outdata);
for (uint32_t i = 1; i < 4; i++)
if (!outdata[i])
outdata[i] = outdata[0];
if (outdata[4])
outdata[4] = LLVMBuildZExt(gallivm->builder, outdata[4], lp_build_int_vec_type(gallivm, lp_int_type(type)), "");
else
outdata[4] = lp_build_one(gallivm, lp_int_type(type));
if (params.img_op != LP_IMG_STORE)
LLVMBuildAggregateRet(gallivm->builder, outdata, 4);
LLVMBuildAggregateRet(gallivm->builder, outdata, params.img_op == LP_IMG_LOAD_SPARSE ? 5 : 4);
else
LLVMBuildRetVoid(gallivm->builder);
@@ -492,7 +497,7 @@ compile_sample_function(struct llvmpipe_context *ctx, struct lp_static_texture_s
gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
LLVMPositionBuilderAtEnd(gallivm->builder, block);
LLVMValueRef texel_out[4] = { 0 };
LLVMValueRef texel_out[5] = { 0 };
if (supported) {
lp_build_sample_soa_code(gallivm, texture, sampler, lp_build_sampler_soa_dynamic_state(sampler_soa),
type, sample_key, 0, 0, cs.jit_resources_type, NULL, cs.jit_cs_thread_data_type,
@@ -501,7 +506,12 @@ compile_sample_function(struct llvmpipe_context *ctx, struct lp_static_texture_s
lp_build_sample_nop(gallivm, lp_build_texel_type(type, util_format_description(texture->format)), coords, texel_out);
}
LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
if (texel_out[4])
texel_out[4] = LLVMBuildZExt(gallivm->builder, texel_out[4], lp_build_int_vec_type(gallivm, lp_int_type(type)), "");
else
texel_out[4] = lp_build_one(gallivm, lp_int_type(type));
LLVMBuildAggregateRet(gallivm->builder, texel_out, 5);
LLVMDisposeBuilder(gallivm->builder);
gallivm->builder = old_builder;

View File

@@ -31,8 +31,6 @@
#include "gallivm/lp_bld_sample.h"
#include "gallivm/lp_bld_jit_sample.h"
#define LP_SAMPLE_KEY_COUNT (1 << 11)
struct lp_sampler_matrix {
struct lp_texture_functions **textures;
struct lp_static_sampler_state *samplers;

View File

@@ -349,6 +349,7 @@ lvp_get_features(const struct lvp_physical_device *pdevice,
.sparseResidencyImage2D = DETECT_OS_LINUX,
.sparseResidencyImage3D = DETECT_OS_LINUX,
.sparseResidencyAliased = DETECT_OS_LINUX,
.shaderResourceResidency = DETECT_OS_LINUX,
/* Vulkan 1.1 */
.storageBuffer16BitAccess = true,

View File

@@ -0,0 +1,31 @@
/*
* Copyright © 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "lvp_private.h"
#include "nir.h"
#include "nir_builder.h"
static bool
pass(nir_builder *b, nir_intrinsic_instr *instr, void *data)
{
b->cursor = nir_before_instr(&instr->instr);
if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
nir_def_rewrite_uses(&instr->def, nir_iand(b, instr->src[0].ssa, instr->src[1].ssa));
return true;
} else if (instr->intrinsic == nir_intrinsic_is_sparse_texels_resident) {
nir_def_rewrite_uses(&instr->def, nir_ine_imm(b, instr->src[0].ssa, 0));
return true;
}
return false;
}
bool
lvp_nir_lower_sparse_residency(struct nir_shader *shader)
{
return nir_shader_intrinsics_pass(shader, pass, nir_metadata_block_index | nir_metadata_dominance, NULL);
}

View File

@@ -443,10 +443,17 @@ lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_
* functions that need to be pre-compiled.
*/
const nir_lower_tex_options tex_options = {
/* lower_tg4_offsets can introduce new sparse residency intrinsics
* which is why we have to lower everything before calling
* lvp_nir_lower_sparse_residency.
*/
.lower_tg4_offsets = true,
.lower_txd = true,
};
NIR_PASS(_, nir, nir_lower_tex, &tex_options);
NIR_PASS(_, nir, lvp_nir_lower_sparse_residency);
lvp_shader_optimize(nir);
if (nir->info.stage != MESA_SHADER_VERTEX)

View File

@@ -771,6 +771,8 @@ void *
lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir, bool locked);
bool
lvp_nir_lower_ray_queries(struct nir_shader *shader);
bool
lvp_nir_lower_sparse_residency(struct nir_shader *shader);
enum vk_cmd_type
lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token);
#ifdef __cplusplus

View File

@@ -27,6 +27,7 @@ liblvp_files = files(
'lvp_lower_vulkan_resource.h',
'lvp_lower_input_attachments.c',
'lvp_nir_lower_ray_queries.c',
'lvp_nir_lower_sparse_residency.c',
'lvp_nir_ray_tracing.c',
'lvp_nir_ray_tracing.h',
'lvp_pipe_sync.c',