gallivm: Vectorize the rho computation.
This commit is contained in:
@@ -171,9 +171,6 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
|
|||||||
* Generate code to compute coordinate gradient (rho).
|
* Generate code to compute coordinate gradient (rho).
|
||||||
* \param ddx partial derivatives of (s, t, r, q) with respect to X
|
* \param ddx partial derivatives of (s, t, r, q) with respect to X
|
||||||
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
|
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
|
||||||
* \param width scalar int texture width
|
|
||||||
* \param height scalar int texture height
|
|
||||||
* \param depth scalar int texture depth
|
|
||||||
*
|
*
|
||||||
* XXX: The resulting rho is scalar, so we ignore all but the first element of
|
* XXX: The resulting rho is scalar, so we ignore all but the first element of
|
||||||
* derivatives that are passed by the shader.
|
* derivatives that are passed by the shader.
|
||||||
@@ -181,52 +178,75 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
|
|||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
lp_build_rho(struct lp_build_sample_context *bld,
|
lp_build_rho(struct lp_build_sample_context *bld,
|
||||||
const LLVMValueRef ddx[4],
|
const LLVMValueRef ddx[4],
|
||||||
const LLVMValueRef ddy[4],
|
const LLVMValueRef ddy[4])
|
||||||
LLVMValueRef width,
|
|
||||||
LLVMValueRef height,
|
|
||||||
LLVMValueRef depth)
|
|
||||||
{
|
{
|
||||||
|
struct lp_build_context *float_size_bld = &bld->float_size_bld;
|
||||||
struct lp_build_context *float_bld = &bld->float_bld;
|
struct lp_build_context *float_bld = &bld->float_bld;
|
||||||
const int dims = texture_dims(bld->static_state->target);
|
const int dims = texture_dims(bld->static_state->target);
|
||||||
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
|
LLVMTypeRef i32t = LLVMInt32Type();
|
||||||
LLVMValueRef dsdx, dsdy;
|
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
|
||||||
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
|
LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
|
||||||
|
LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
|
||||||
|
LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
|
||||||
|
LLVMValueRef rho_x, rho_y;
|
||||||
|
LLVMValueRef rho_vec;
|
||||||
|
LLVMValueRef float_size;
|
||||||
LLVMValueRef rho;
|
LLVMValueRef rho;
|
||||||
|
|
||||||
dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
|
dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
|
||||||
dsdx = lp_build_abs(float_bld, dsdx);
|
|
||||||
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
|
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
|
||||||
dsdy = lp_build_abs(float_bld, dsdy);
|
|
||||||
if (dims > 1) {
|
if (dims <= 1) {
|
||||||
|
rho_x = dsdx;
|
||||||
|
rho_y = dsdy;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rho_x = float_size_bld->undef;
|
||||||
|
rho_y = float_size_bld->undef;
|
||||||
|
|
||||||
|
rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
|
||||||
|
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");
|
||||||
|
|
||||||
dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
|
dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
|
||||||
dtdx = lp_build_abs(float_bld, dtdx);
|
|
||||||
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
|
dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
|
||||||
dtdy = lp_build_abs(float_bld, dtdy);
|
|
||||||
if (dims > 2) {
|
rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
|
||||||
|
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");
|
||||||
|
|
||||||
|
if (dims >= 3) {
|
||||||
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
|
drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
|
||||||
drdx = lp_build_abs(float_bld, drdx);
|
|
||||||
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
|
drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
|
||||||
drdy = lp_build_abs(float_bld, drdy);
|
|
||||||
|
rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
|
||||||
|
rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute rho = max of all partial derivatives scaled by texture size.
|
rho_x = lp_build_abs(float_size_bld, rho_x);
|
||||||
* XXX this could be vectorized somewhat
|
rho_y = lp_build_abs(float_size_bld, rho_y);
|
||||||
*/
|
|
||||||
rho = LLVMBuildFMul(bld->builder,
|
rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
|
||||||
lp_build_max(float_bld, dsdx, dsdy),
|
|
||||||
lp_build_int_to_float(float_bld, width), "");
|
float_size = lp_build_int_to_float(float_size_bld, bld->uint_size);
|
||||||
if (dims > 1) {
|
|
||||||
LLVMValueRef max;
|
rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
|
||||||
max = LLVMBuildFMul(bld->builder,
|
|
||||||
lp_build_max(float_bld, dtdx, dtdy),
|
if (dims <= 1) {
|
||||||
lp_build_int_to_float(float_bld, height), "");
|
rho = rho_vec;
|
||||||
rho = lp_build_max(float_bld, rho, max);
|
}
|
||||||
if (dims > 2) {
|
else {
|
||||||
max = LLVMBuildFMul(bld->builder,
|
if (dims >= 2) {
|
||||||
lp_build_max(float_bld, drdx, drdy),
|
LLVMValueRef rho_s, rho_t, rho_r;
|
||||||
lp_build_int_to_float(float_bld, depth), "");
|
|
||||||
rho = lp_build_max(float_bld, rho, max);
|
rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
|
||||||
|
rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");
|
||||||
|
|
||||||
|
rho = lp_build_max(float_bld, rho_s, rho_t);
|
||||||
|
|
||||||
|
if (dims >= 3) {
|
||||||
|
rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
|
||||||
|
rho = lp_build_max(float_bld, rho, rho_r);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -289,7 +309,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
|
|||||||
else {
|
else {
|
||||||
LLVMValueRef rho;
|
LLVMValueRef rho;
|
||||||
|
|
||||||
rho = lp_build_rho(bld, ddx, ddy, width, height, depth);
|
rho = lp_build_rho(bld, ddx, ddy);
|
||||||
|
|
||||||
/* compute lod = log2(rho) */
|
/* compute lod = log2(rho) */
|
||||||
if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
|
if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
|
||||||
|
@@ -202,9 +202,20 @@ struct lp_build_sample_context
|
|||||||
struct lp_type int_coord_type;
|
struct lp_type int_coord_type;
|
||||||
struct lp_build_context int_coord_bld;
|
struct lp_build_context int_coord_bld;
|
||||||
|
|
||||||
|
/** Unsigned integer texture size */
|
||||||
|
struct lp_type uint_size_type;
|
||||||
|
struct lp_build_context uint_size_bld;
|
||||||
|
|
||||||
|
/** Unsigned integer texture size */
|
||||||
|
struct lp_type float_size_type;
|
||||||
|
struct lp_build_context float_size_bld;
|
||||||
|
|
||||||
/** Output texels type and build context */
|
/** Output texels type and build context */
|
||||||
struct lp_type texel_type;
|
struct lp_type texel_type;
|
||||||
struct lp_build_context texel_bld;
|
struct lp_build_context texel_bld;
|
||||||
|
|
||||||
|
/** Unsigned vector with texture width, height, depth */
|
||||||
|
LLVMValueRef uint_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -241,7 +252,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static INLINE int
|
static INLINE unsigned
|
||||||
texture_dims(enum pipe_texture_target tex)
|
texture_dims(enum pipe_texture_target tex)
|
||||||
{
|
{
|
||||||
switch (tex) {
|
switch (tex) {
|
||||||
|
@@ -1141,7 +1141,9 @@ lp_build_sample_soa(LLVMBuilderRef builder,
|
|||||||
LLVMValueRef explicit_lod, /* optional */
|
LLVMValueRef explicit_lod, /* optional */
|
||||||
LLVMValueRef texel_out[4])
|
LLVMValueRef texel_out[4])
|
||||||
{
|
{
|
||||||
|
unsigned dims = texture_dims(static_state->target);
|
||||||
struct lp_build_sample_context bld;
|
struct lp_build_sample_context bld;
|
||||||
|
LLVMTypeRef i32t = LLVMInt32Type();
|
||||||
LLVMValueRef width, width_vec;
|
LLVMValueRef width, width_vec;
|
||||||
LLVMValueRef height, height_vec;
|
LLVMValueRef height, height_vec;
|
||||||
LLVMValueRef depth, depth_vec;
|
LLVMValueRef depth, depth_vec;
|
||||||
@@ -1171,6 +1173,9 @@ lp_build_sample_soa(LLVMBuilderRef builder,
|
|||||||
bld.coord_type = type;
|
bld.coord_type = type;
|
||||||
bld.uint_coord_type = lp_uint_type(type);
|
bld.uint_coord_type = lp_uint_type(type);
|
||||||
bld.int_coord_type = lp_int_type(type);
|
bld.int_coord_type = lp_int_type(type);
|
||||||
|
bld.float_size_type = lp_type_float(32);
|
||||||
|
bld.float_size_type.length = dims > 1 ? 4 : 1;
|
||||||
|
bld.uint_size_type = lp_uint_type(bld.float_size_type);
|
||||||
bld.texel_type = type;
|
bld.texel_type = type;
|
||||||
|
|
||||||
float_vec_type = lp_type_float_vec(32);
|
float_vec_type = lp_type_float_vec(32);
|
||||||
@@ -1181,6 +1186,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
|
|||||||
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
|
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
|
||||||
lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
|
lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
|
||||||
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
|
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
|
||||||
|
lp_build_context_init(&bld.uint_size_bld, builder, bld.uint_size_type);
|
||||||
|
lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type);
|
||||||
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
|
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
|
||||||
|
|
||||||
/* Get the dynamic state */
|
/* Get the dynamic state */
|
||||||
@@ -1196,6 +1203,23 @@ lp_build_sample_soa(LLVMBuilderRef builder,
|
|||||||
t = coords[1];
|
t = coords[1];
|
||||||
r = coords[2];
|
r = coords[2];
|
||||||
|
|
||||||
|
/* width, height, depth as single uint vector */
|
||||||
|
if (dims <= 1) {
|
||||||
|
bld.uint_size = width;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bld.uint_size = LLVMBuildInsertElement(builder, bld.uint_size_bld.undef,
|
||||||
|
width, LLVMConstInt(i32t, 0, 0), "");
|
||||||
|
if (dims >= 2) {
|
||||||
|
bld.uint_size = LLVMBuildInsertElement(builder, bld.uint_size,
|
||||||
|
height, LLVMConstInt(i32t, 1, 0), "");
|
||||||
|
if (dims >= 3) {
|
||||||
|
bld.uint_size = LLVMBuildInsertElement(builder, bld.uint_size,
|
||||||
|
depth, LLVMConstInt(i32t, 2, 0), "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* width, height, depth as uint vectors */
|
/* width, height, depth as uint vectors */
|
||||||
width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
|
width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
|
||||||
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
|
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
|
||||||
|
Reference in New Issue
Block a user