llvmpipe: add framebuffer fetching support (v1.1)
v1.1: Merge two if blocks (Roland) Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5914>
This commit is contained in:
@@ -2083,13 +2083,6 @@ spec/ext_shader_framebuffer_fetch/compiler/gles3/negative-gl_lastfragdata.frag:
|
||||
spec/ext_shader_framebuffer_fetch/compiler/gles3/negative-inout-gl_fragdepth.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch/compiler/gles3/negative-inout-vertex-output.vert: skip
|
||||
spec/ext_shader_framebuffer_fetch/compiler/gles3/negative-output-layout.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles2/negative-gl_lastfragdata-write.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles2/negative-inout-fragment-output.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles2/negative-output-layout.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles3/negative-gl_lastfragdata.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles3/negative-inout-gl_fragdepth.frag: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles3/negative-inout-vertex-output.vert: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/compiler/gles3/negative-output-layout.frag: skip
|
||||
spec/ext_shader_io_blocks/preprocessor/disabled-defined-es.comp: skip
|
||||
spec/ext_shader_io_blocks/preprocessor/disabled-defined-es.frag: skip
|
||||
spec/ext_shader_io_blocks/preprocessor/disabled-defined-es.geom: skip
|
||||
@@ -2664,10 +2657,10 @@ spec/oes_texture_storage_multisample_2d_array/preprocessor/disabled-undefined-es
|
||||
summary:
|
||||
name: results
|
||||
---- --------
|
||||
pass: 12172
|
||||
pass: 12179
|
||||
fail: 2
|
||||
crash: 4
|
||||
skip: 2657
|
||||
skip: 2650
|
||||
timeout: 0
|
||||
warn: 0
|
||||
incomplete: 0
|
||||
|
@@ -574,35 +574,12 @@ spec/ext_shader_framebuffer_fetch/execution/gles3/single-slice-2darray-mipmap: s
|
||||
spec/ext_shader_framebuffer_fetch/execution/gles3/single-slice-3d: skip
|
||||
spec/ext_shader_framebuffer_fetch/execution/gles3/single-slice-cubemap: skip
|
||||
spec/ext_shader_framebuffer_fetch/execution/gles3/texture: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gl/1d: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gl/layered-1darray: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gl/layered-2darray: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gl/layered-cubemap: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles2/mrt: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles2/nonuniform-ss: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles2/nonuniform-ss-redecl-highp: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles2/nonuniform-ss-redecl-lowp: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles2/simple-ss: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/discard-ms8: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/discard-ss: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/integer-ms2: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/integer-ms8: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/integer-ss: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/mrt: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/nonuniform-ms16: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/nonuniform-ms2: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/nonuniform-ms8: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/nonuniform-ss: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/overwrite: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/simple-ms16: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/simple-ms2: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/simple-ms8: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/simple-ss: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/single-slice-2darray: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/single-slice-2darray-mipmap: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/single-slice-3d: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/single-slice-cubemap: skip
|
||||
spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/texture: skip
|
||||
spec/ext_shader_image_load_formatted/execution/image_checkerboard: skip
|
||||
spec/glsl-1.10/execution/built-in-functions/fs-pow-float-float: fail
|
||||
spec/glsl-1.10/execution/built-in-functions/vs-pow-float-float: fail
|
||||
@@ -2322,10 +2299,10 @@ spec/nv_viewport_swizzle/viewport_swizzle: skip
|
||||
summary:
|
||||
name: results
|
||||
---- --------
|
||||
pass: 14054
|
||||
pass: 14077
|
||||
fail: 102
|
||||
crash: 178
|
||||
skip: 2041
|
||||
skip: 2018
|
||||
timeout: 0
|
||||
warn: 0
|
||||
incomplete: 0
|
||||
|
@@ -425,6 +425,10 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
|
||||
}
|
||||
break;
|
||||
case nir_var_shader_out:
|
||||
if (bld->fs_iface && bld->fs_iface->fb_fetch) {
|
||||
bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
|
||||
return;
|
||||
}
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
int idx = (i * dmul) + var->data.location_frac;
|
||||
if (bld->tcs_iface) {
|
||||
|
@@ -251,6 +251,11 @@ struct lp_build_fs_iface {
|
||||
unsigned attrib, unsigned chan,
|
||||
bool centroid, bool sample,
|
||||
LLVMValueRef indir_index, LLVMValueRef offsets[2]);
|
||||
|
||||
void (*fb_fetch)(const struct lp_build_fs_iface *iface,
|
||||
struct lp_build_context *bld,
|
||||
unsigned cbuf,
|
||||
LLVMValueRef result[4]);
|
||||
};
|
||||
|
||||
void
|
||||
|
@@ -331,6 +331,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
||||
return 1;
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
return 1;
|
||||
case PIPE_CAP_FBFETCH:
|
||||
return 8;
|
||||
case PIPE_CAP_FBFETCH_COHERENT:
|
||||
return 0;
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
return 1;
|
||||
@@ -364,7 +368,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
||||
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
|
||||
case PIPE_CAP_NATIVE_FENCE_FD:
|
||||
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
|
||||
case PIPE_CAP_FBFETCH:
|
||||
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
|
||||
case PIPE_CAP_TGSI_CLOCK:
|
||||
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
|
||||
|
@@ -89,6 +89,7 @@
|
||||
#include "gallivm/lp_bld_pack.h"
|
||||
#include "gallivm/lp_bld_format.h"
|
||||
#include "gallivm/lp_bld_quad.h"
|
||||
#include "gallivm/lp_bld_gather.h"
|
||||
|
||||
#include "lp_bld_alpha.h"
|
||||
#include "lp_bld_blend.h"
|
||||
@@ -111,6 +112,100 @@
|
||||
/** Fragment shader number (for debugging) */
|
||||
static unsigned fs_no = 0;
|
||||
|
||||
static void
|
||||
load_unswizzled_block(struct gallivm_state *gallivm,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef stride,
|
||||
unsigned block_width,
|
||||
unsigned block_height,
|
||||
LLVMValueRef* dst,
|
||||
struct lp_type dst_type,
|
||||
unsigned dst_count,
|
||||
unsigned dst_alignment,
|
||||
LLVMValueRef x_offset,
|
||||
LLVMValueRef y_offset,
|
||||
bool fb_fetch_twiddle);
|
||||
/**
|
||||
* Checks if a format description is an arithmetic format
|
||||
*
|
||||
* A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
|
||||
*/
|
||||
static inline boolean
|
||||
is_arithmetic_format(const struct util_format_description *format_desc)
|
||||
{
|
||||
boolean arith = false;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < format_desc->nr_channels; ++i) {
|
||||
arith |= format_desc->channel[i].size != format_desc->channel[0].size;
|
||||
arith |= (format_desc->channel[i].size % 8) != 0;
|
||||
}
|
||||
|
||||
return arith;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this format requires special handling due to required expansion
|
||||
* to floats for blending, and furthermore has "natural" packed AoS -> unpacked
|
||||
* SoA conversion.
|
||||
*/
|
||||
static inline boolean
|
||||
format_expands_to_float_soa(const struct util_format_description *format_desc)
|
||||
{
|
||||
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
|
||||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the type representing the memory layout for a format
|
||||
*
|
||||
* e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
|
||||
*/
|
||||
static inline void
|
||||
lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
|
||||
struct lp_type* type)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned chan;
|
||||
|
||||
if (format_expands_to_float_soa(format_desc)) {
|
||||
/* just make this a uint with width of block */
|
||||
type->floating = false;
|
||||
type->fixed = false;
|
||||
type->sign = false;
|
||||
type->norm = false;
|
||||
type->width = format_desc->block.bits;
|
||||
type->length = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
|
||||
break;
|
||||
chan = i;
|
||||
|
||||
memset(type, 0, sizeof(struct lp_type));
|
||||
type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
|
||||
type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
|
||||
type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
|
||||
type->norm = format_desc->channel[chan].normalized;
|
||||
|
||||
if (is_arithmetic_format(format_desc)) {
|
||||
type->width = 0;
|
||||
type->length = 1;
|
||||
|
||||
for (i = 0; i < format_desc->nr_channels; ++i) {
|
||||
type->width += format_desc->channel[i].size;
|
||||
}
|
||||
} else {
|
||||
type->width = format_desc->channel[chan].size;
|
||||
type->length = format_desc->nr_channels;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the relevant bits of mask_input to a n*4-dword mask for the
|
||||
@@ -328,6 +423,11 @@ struct lp_build_fs_llvm_iface {
|
||||
struct lp_build_interp_soa_context *interp;
|
||||
struct lp_build_for_loop_state *loop_state;
|
||||
LLVMValueRef mask_store;
|
||||
LLVMValueRef sample_id;
|
||||
LLVMValueRef color_ptr_ptr;
|
||||
LLVMValueRef color_stride_ptr;
|
||||
LLVMValueRef color_sample_stride_ptr;
|
||||
const struct lp_fragment_shader_variant_key *key;
|
||||
};
|
||||
|
||||
static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
|
||||
@@ -350,6 +450,105 @@ static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
|
||||
attrib, chan, loc, attrib_indir, offsets);
|
||||
}
|
||||
|
||||
static void fs_fb_fetch(const struct lp_build_fs_iface *iface,
|
||||
struct lp_build_context *bld,
|
||||
unsigned cbuf,
|
||||
LLVMValueRef result[4])
|
||||
{
|
||||
struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
|
||||
struct gallivm_state *gallivm = bld->gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
const struct lp_fragment_shader_variant_key *key = fs_iface->key;
|
||||
LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
|
||||
LLVMValueRef color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_ptr_ptr, &index, 1, ""), "");
|
||||
LLVMValueRef stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_stride_ptr, &index, 1, ""), "");
|
||||
|
||||
LLVMValueRef dst[4 * 4];
|
||||
enum pipe_format cbuf_format = key->cbuf_format[cbuf];
|
||||
const struct util_format_description* out_format_desc = util_format_description(cbuf_format);
|
||||
struct lp_type dst_type;
|
||||
unsigned block_size = bld->type.length;
|
||||
unsigned block_height = key->resource_1d ? 1 : 2;
|
||||
unsigned block_width = block_size / block_height;
|
||||
|
||||
lp_mem_type_from_format_desc(out_format_desc, &dst_type);
|
||||
|
||||
struct lp_type blend_type;
|
||||
memset(&blend_type, 0, sizeof blend_type);
|
||||
blend_type.floating = FALSE; /* values are integers */
|
||||
blend_type.sign = FALSE; /* values are unsigned */
|
||||
blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */
|
||||
blend_type.width = 8; /* 8-bit ubyte values */
|
||||
blend_type.length = 16; /* 16 elements per vector */
|
||||
|
||||
uint32_t dst_alignment;
|
||||
/*
|
||||
* Compute the alignment of the destination pointer in bytes
|
||||
* We fetch 1-4 pixels, if the format has pot alignment then those fetches
|
||||
* are always aligned by MIN2(16, fetch_width) except for buffers (not
|
||||
* 1d tex but can't distinguish here) so need to stick with per-pixel
|
||||
* alignment in this case.
|
||||
*/
|
||||
if (key->resource_1d) {
|
||||
dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
|
||||
}
|
||||
else {
|
||||
dst_alignment = dst_type.length * dst_type.width / 8;
|
||||
}
|
||||
/* Force power-of-two alignment by extracting only the least-significant-bit */
|
||||
dst_alignment = 1 << (ffs(dst_alignment) - 1);
|
||||
/*
|
||||
* Resource base and stride pointers are aligned to 16 bytes, so that's
|
||||
* the maximum alignment we can guarantee
|
||||
*/
|
||||
dst_alignment = MIN2(16, dst_alignment);
|
||||
|
||||
LLVMTypeRef blend_vec_type = lp_build_vec_type(gallivm, blend_type);
|
||||
color_ptr = LLVMBuildBitCast(builder, color_ptr, LLVMPointerType(blend_vec_type, 0), "");
|
||||
|
||||
if (key->multisample) {
|
||||
LLVMValueRef sample_stride = LLVMBuildLoad(builder,
|
||||
LLVMBuildGEP(builder, fs_iface->color_sample_stride_ptr,
|
||||
&index, 1, ""), "");
|
||||
LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, fs_iface->sample_id, "");
|
||||
color_ptr = LLVMBuildGEP(builder, color_ptr, &sample_offset, 1, "");
|
||||
}
|
||||
/* fragment shader executes on 4x4 blocks. depending on vector width it can execute 2 or 4 iterations.
|
||||
* only move to the next row once the top row has completed 8 wide 1 iteration, 4 wide 2 iterations */
|
||||
LLVMValueRef x_offset = NULL, y_offset = NULL;
|
||||
if (!key->resource_1d) {
|
||||
LLVMValueRef counter = fs_iface->loop_state->counter;
|
||||
|
||||
if (block_size == 4) {
|
||||
x_offset = LLVMBuildShl(builder,
|
||||
LLVMBuildAnd(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), ""),
|
||||
lp_build_const_int32(gallivm, 1), "");
|
||||
counter = LLVMBuildLShr(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), "");
|
||||
}
|
||||
y_offset = LLVMBuildMul(builder, counter, lp_build_const_int32(gallivm, 2), "");
|
||||
}
|
||||
load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, block_size, dst_alignment, x_offset, y_offset, true);
|
||||
|
||||
for (unsigned i = 0; i < block_size; i++) {
|
||||
dst[i] = LLVMBuildBitCast(builder, dst[i], LLVMInt32TypeInContext(gallivm->context), "");
|
||||
}
|
||||
LLVMValueRef packed = lp_build_gather_values(gallivm, dst, block_size);
|
||||
|
||||
struct lp_type texel_type = bld->type;
|
||||
if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
|
||||
out_format_desc->channel[0].pure_integer) {
|
||||
if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
texel_type = lp_type_int_vec(bld->type.width, bld->type.width * bld->type.length);
|
||||
}
|
||||
else if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
|
||||
texel_type = lp_type_uint_vec(bld->type.width, bld->type.width * bld->type.length);
|
||||
}
|
||||
}
|
||||
lp_build_unpack_rgba_soa(gallivm, out_format_desc,
|
||||
texel_type,
|
||||
packed, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the fragment shader, depth/stencil test, and alpha tests.
|
||||
*/
|
||||
@@ -370,6 +569,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
|
||||
LLVMValueRef depth_base_ptr,
|
||||
LLVMValueRef depth_stride,
|
||||
LLVMValueRef depth_sample_stride,
|
||||
LLVMValueRef color_ptr_ptr,
|
||||
LLVMValueRef color_stride_ptr,
|
||||
LLVMValueRef color_sample_stride_ptr,
|
||||
LLVMValueRef facing,
|
||||
LLVMValueRef thread_data_ptr)
|
||||
{
|
||||
@@ -727,9 +929,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
|
||||
|
||||
struct lp_build_fs_llvm_iface fs_iface = {
|
||||
.base.interp_fn = fs_interp,
|
||||
.base.fb_fetch = fs_fb_fetch,
|
||||
.interp = interp,
|
||||
.loop_state = &loop_state,
|
||||
.sample_id = system_values.sample_id,
|
||||
.mask_store = mask_store,
|
||||
.color_ptr_ptr = color_ptr_ptr,
|
||||
.color_stride_ptr = color_stride_ptr,
|
||||
.color_sample_stride_ptr = color_sample_stride_ptr,
|
||||
.key = key,
|
||||
};
|
||||
|
||||
struct lp_build_tgsi_params params;
|
||||
@@ -1244,7 +1452,10 @@ load_unswizzled_block(struct gallivm_state *gallivm,
|
||||
LLVMValueRef* dst,
|
||||
struct lp_type dst_type,
|
||||
unsigned dst_count,
|
||||
unsigned dst_alignment)
|
||||
unsigned dst_alignment,
|
||||
LLVMValueRef x_offset,
|
||||
LLVMValueRef y_offset,
|
||||
bool fb_fetch_twiddle)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
unsigned row_size = dst_count / block_height;
|
||||
@@ -1257,8 +1468,28 @@ load_unswizzled_block(struct gallivm_state *gallivm,
|
||||
unsigned x = i % row_size;
|
||||
unsigned y = i / row_size;
|
||||
|
||||
LLVMValueRef bx = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
|
||||
LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, "");
|
||||
if (block_height == 2 && dst_count == 8 && fb_fetch_twiddle) {
|
||||
/* remap the raw slots into the fragment shader execution mode. */
|
||||
/* this math took me way too long to work out, I'm sure it's overkill. */
|
||||
x = (i & 1) + ((i >> 2) << 1);
|
||||
y = (i & 2) >> 1;
|
||||
}
|
||||
|
||||
LLVMValueRef x_val;
|
||||
if (x_offset) {
|
||||
x_val = lp_build_const_int32(gallivm, x);
|
||||
if (x_offset)
|
||||
x_val = LLVMBuildAdd(builder, x_val, x_offset, "");
|
||||
x_val = LLVMBuildMul(builder, x_val, lp_build_const_int32(gallivm, (dst_type.width / 8) * dst_type.length), "");
|
||||
} else
|
||||
x_val = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
|
||||
|
||||
LLVMValueRef bx = x_val;
|
||||
|
||||
LLVMValueRef y_val = lp_build_const_int32(gallivm, y);
|
||||
if (y_offset)
|
||||
y_val = LLVMBuildAdd(builder, y_val, y_offset, "");
|
||||
LLVMValueRef by = LLVMBuildMul(builder, y_val, stride, "");
|
||||
|
||||
LLVMValueRef gep[2];
|
||||
LLVMValueRef dst_ptr;
|
||||
@@ -1322,89 +1553,6 @@ store_unswizzled_block(struct gallivm_state *gallivm,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if a format description is an arithmetic format
|
||||
*
|
||||
* A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
|
||||
*/
|
||||
static inline boolean
|
||||
is_arithmetic_format(const struct util_format_description *format_desc)
|
||||
{
|
||||
boolean arith = false;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < format_desc->nr_channels; ++i) {
|
||||
arith |= format_desc->channel[i].size != format_desc->channel[0].size;
|
||||
arith |= (format_desc->channel[i].size % 8) != 0;
|
||||
}
|
||||
|
||||
return arith;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this format requires special handling due to required expansion
|
||||
* to floats for blending, and furthermore has "natural" packed AoS -> unpacked
|
||||
* SoA conversion.
|
||||
*/
|
||||
static inline boolean
|
||||
format_expands_to_float_soa(const struct util_format_description *format_desc)
|
||||
{
|
||||
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
|
||||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the type representing the memory layout for a format
|
||||
*
|
||||
* e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
|
||||
*/
|
||||
static inline void
|
||||
lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
|
||||
struct lp_type* type)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned chan;
|
||||
|
||||
if (format_expands_to_float_soa(format_desc)) {
|
||||
/* just make this a uint with width of block */
|
||||
type->floating = false;
|
||||
type->fixed = false;
|
||||
type->sign = false;
|
||||
type->norm = false;
|
||||
type->width = format_desc->block.bits;
|
||||
type->length = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
|
||||
break;
|
||||
chan = i;
|
||||
|
||||
memset(type, 0, sizeof(struct lp_type));
|
||||
type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
|
||||
type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
|
||||
type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
|
||||
type->norm = format_desc->channel[chan].normalized;
|
||||
|
||||
if (is_arithmetic_format(format_desc)) {
|
||||
type->width = 0;
|
||||
type->length = 1;
|
||||
|
||||
for (i = 0; i < format_desc->nr_channels; ++i) {
|
||||
type->width += format_desc->channel[i].size;
|
||||
}
|
||||
} else {
|
||||
type->width = format_desc->channel[chan].size;
|
||||
type->length = format_desc->nr_channels;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the type for a format which is usable in the blending code.
|
||||
@@ -2622,7 +2770,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
|
||||
|
||||
if (is_1d) {
|
||||
load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
|
||||
dst, ls_type, dst_count / 4, dst_alignment);
|
||||
dst, ls_type, dst_count / 4, dst_alignment, NULL, NULL, false);
|
||||
for (i = dst_count / 4; i < dst_count; i++) {
|
||||
dst[i] = lp_build_undef(gallivm, ls_type);
|
||||
}
|
||||
@@ -2630,7 +2778,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
|
||||
}
|
||||
else {
|
||||
load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
|
||||
dst, ls_type, dst_count, dst_alignment);
|
||||
dst, ls_type, dst_count, dst_alignment, NULL, NULL, false);
|
||||
}
|
||||
|
||||
|
||||
@@ -3058,6 +3206,9 @@ generate_fragment(struct llvmpipe_context *lp,
|
||||
depth_ptr,
|
||||
depth_stride,
|
||||
depth_sample_stride,
|
||||
color_ptr_ptr,
|
||||
stride_ptr,
|
||||
color_sample_stride_ptr,
|
||||
facing,
|
||||
thread_data_ptr);
|
||||
|
||||
|
Reference in New Issue
Block a user