Files
third_party_mesa3d/src/mesa/state_tracker/st_pbo_compute.c
Timothy Arceri bfa096a0b9 glsl/st: move st_nir_opts() into gl compiler common code
This will allow us to use this in future NIR linker work. It also makes
more sense to move it here as the classic drivers are gone, tgsi is
going away and we are merging more of the st into the gl common code.

Reviewed-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14785>
2022-01-31 11:49:21 +00:00

1112 lines
43 KiB
C

/*
* Copyright © 2021 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
*/
#include <stdbool.h>
#include "main/image.h"
#include "main/pbo.h"
#include "state_tracker/st_nir.h"
#include "state_tracker/st_format.h"
#include "state_tracker/st_pbo.h"
#include "state_tracker/st_texture.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
#include "compiler/glsl/gl_nir.h"
#include "compiler/glsl/gl_nir_linker.h"
#include "util/u_sampler.h"
#define BGR_FORMAT(NAME) \
{{ \
[0] = PIPE_FORMAT_##NAME##_SNORM, \
[1] = PIPE_FORMAT_##NAME##_SINT, \
}, \
{ \
[0] = PIPE_FORMAT_##NAME##_UNORM, \
[1] = PIPE_FORMAT_##NAME##_UINT, \
}}
#define FORMAT(NAME, NAME16, NAME32) \
{{ \
[1] = PIPE_FORMAT_##NAME##_SNORM, \
[2] = PIPE_FORMAT_##NAME16##_SNORM, \
[4] = PIPE_FORMAT_##NAME32##_SNORM, \
}, \
{ \
[1] = PIPE_FORMAT_##NAME##_UNORM, \
[2] = PIPE_FORMAT_##NAME16##_UNORM, \
[4] = PIPE_FORMAT_##NAME32##_UNORM, \
}}
/* don't try these at home */
static enum pipe_format
get_convert_format(struct gl_context *ctx,
enum pipe_format src_format,
GLenum format, GLenum type,
bool *need_bgra_swizzle)
{
struct st_context *st = st_context(ctx);
GLint bpp = _mesa_bytes_per_pixel(format, type);
if (_mesa_is_depth_format(format) ||
format == GL_GREEN_INTEGER ||
format == GL_BLUE_INTEGER) {
switch (bpp) {
case 1:
return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R8_UINT : PIPE_FORMAT_R8_SINT;
case 2:
return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R16_SINT;
case 4:
return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R32_SINT;
}
}
mesa_format mformat = _mesa_tex_format_from_format_and_type(ctx, format, type);
enum pipe_format pformat = st_mesa_format_to_pipe_format(st, mformat);
if (!pformat) {
GLint dst_components = _mesa_components_in_format(format);
bpp /= dst_components;
if (format == GL_BGR || format == GL_BGRA) {
pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR ? GL_RGB : GL_RGBA, type, 0);
if (!pformat)
pformat = get_convert_format(ctx, src_format, format == GL_BGR ? GL_RGB : GL_RGBA, type, need_bgra_swizzle);
assert(pformat);
*need_bgra_swizzle = true;
} else if (format == GL_BGR_INTEGER || format == GL_BGRA_INTEGER) {
pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, 0);
if (!pformat)
pformat = get_convert_format(ctx, src_format, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, need_bgra_swizzle);
assert(pformat);
*need_bgra_swizzle = true;
} else {
/* [signed,unsigned][bpp] */
enum pipe_format rgb[5][2][5] = {
[1] = FORMAT(R8, R16, R32),
[2] = FORMAT(R8G8, R16G16, R32G32),
[3] = FORMAT(R8G8B8, R16G16B16, R32G32B32),
[4] = FORMAT(R8G8B8A8, R16G16B16A16, R32G32B32A32),
};
pformat = rgb[dst_components][_mesa_is_type_unsigned(type)][bpp];
}
assert(util_format_get_nr_components(pformat) == dst_components);
}
assert(pformat);
return pformat;
}
#undef BGR_FORMAT
#undef FORMAT
struct pbo_shader_data {
nir_ssa_def *offset;
nir_ssa_def *range;
nir_ssa_def *invert;
nir_ssa_def *blocksize;
nir_ssa_def *alignment;
nir_ssa_def *dst_bit_size;
nir_ssa_def *channels;
nir_ssa_def *normalized;
nir_ssa_def *integer;
nir_ssa_def *clamp_uint;
nir_ssa_def *r11g11b10_or_sint;
nir_ssa_def *r9g9b9e5;
nir_ssa_def *bits1;
nir_ssa_def *bits2;
nir_ssa_def *bits3;
nir_ssa_def *bits4;
nir_ssa_def *swap;
nir_ssa_def *bits; //vec4
};
/* must be under 16bytes / sizeof(vec4) / 128 bits) */
struct pbo_data {
union {
struct {
struct {
uint16_t x, y;
};
struct {
uint16_t width, height;
};
struct {
uint16_t depth;
uint8_t invert : 1;
uint8_t blocksize : 7;
uint8_t clamp_uint : 1;
uint8_t r11g11b10_or_sint : 1;
uint8_t r9g9b9e5 : 1;
uint8_t swap : 1;
uint16_t alignment : 2;
uint8_t dst_bit_size : 2; //8, 16, 32, 64
};
struct {
uint8_t channels : 2;
uint8_t bits1 : 6;
uint8_t normalized : 1;
uint8_t integer : 1;
uint8_t bits2 : 6;
uint8_t bits3 : 6;
uint8_t pad1 : 2;
uint8_t bits4 : 6;
uint8_t pad2 : 2;
};
};
float vec[4];
};
};
#define STRUCT_OFFSET(name) (offsetof(struct pbo_data, name) * 8)
#define STRUCT_BLOCK(offset, ...) \
do { \
assert(offset % 8 == 0); \
nir_ssa_def *block##offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, (offset), 1, 8)); \
__VA_ARGS__ \
} while (0)
#define STRUCT_MEMBER(blockoffset, name, offset, size, op, clamp) \
do { \
assert(offset + size <= 8); \
nir_ssa_def *val = nir_iand_imm(b, block##blockoffset, u_bit_consecutive(offset, size)); \
if (offset) \
val = nir_ushr_imm(b, val, offset); \
sd->name = op; \
if (clamp) \
sd->name = nir_umin(b, sd->name, nir_imm_int(b, clamp)); \
} while (0)
#define STRUCT_MEMBER_SHIFTED_2BIT(blockoffset, name, offset, shift, clamp) \
STRUCT_MEMBER(blockoffset, name, offset, 2, nir_ishl(b, nir_imm_int(b, shift), val), clamp)
#define STRUCT_MEMBER_BOOL(blockoffset, name, offset) \
STRUCT_MEMBER(blockoffset, name, offset, 1, nir_ieq_imm(b, val, 1), 0)
/* this function extracts the conversion data from pbo_data using the
* size annotations for each grouping. data is compacted into bitfields,
* so bitwise operations must be used to "unpact" everything
*/
static void
init_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd)
{
nir_variable *ubo = nir_variable_create(b->shader, nir_var_uniform, glsl_uvec4_type(), "offset");
nir_ssa_def *ubo_load = nir_load_var(b, ubo);
sd->offset = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(x), 2, 16)), nir_imm_int(b, 65535));
sd->range = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(width), 3, 16)), nir_imm_int(b, 65535));
STRUCT_BLOCK(80,
STRUCT_MEMBER_BOOL(80, invert, 0);
STRUCT_MEMBER(80, blocksize, 1, 7, nir_iadd_imm(b, val, 1), 128);
);
STRUCT_BLOCK(88,
STRUCT_MEMBER_BOOL(88, clamp_uint, 0);
STRUCT_MEMBER_BOOL(88, r11g11b10_or_sint, 1);
STRUCT_MEMBER_BOOL(88, r9g9b9e5, 2);
STRUCT_MEMBER_BOOL(88, swap, 3);
STRUCT_MEMBER_SHIFTED_2BIT(88, alignment, 4, 1, 8);
STRUCT_MEMBER_SHIFTED_2BIT(88, dst_bit_size, 6, 8, 64);
);
STRUCT_BLOCK(96,
STRUCT_MEMBER(96, channels, 0, 2, nir_iadd_imm(b, val, 1), 4);
STRUCT_MEMBER(96, bits1, 2, 6, val, 32);
);
STRUCT_BLOCK(104,
STRUCT_MEMBER_BOOL(104, normalized, 0);
STRUCT_MEMBER_BOOL(104, integer, 1);
STRUCT_MEMBER(104, bits2, 2, 6, val, 32);
);
STRUCT_BLOCK(112,
STRUCT_MEMBER(112, bits3, 0, 6, val, 32);
);
STRUCT_BLOCK(120,
STRUCT_MEMBER(120, bits4, 0, 6, val, 32);
);
sd->bits = nir_vec4(b, sd->bits1, sd->bits2, sd->bits3, sd->bits4);
/* clamp swap in the shader to enable better optimizing */
/* TODO?
sd->swap = nir_bcsel(b, nir_ior(b,
nir_ieq_imm(b, sd->blocksize, 8),
nir_bcsel(b,
nir_ieq_imm(b, sd->bits1, 8),
nir_bcsel(b,
nir_uge(b, sd->channels, nir_imm_int(b, 2)),
nir_bcsel(b,
nir_uge(b, sd->channels, nir_imm_int(b, 3)),
nir_bcsel(b,
nir_ieq(b, sd->channels, nir_imm_int(b, 4)),
nir_ball(b, nir_ieq(b, sd->bits, nir_imm_ivec4(b, 8, 8, 8, 8))),
nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 7), nir_imm_ivec3(b, 8, 8, 8)))),
nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 3), nir_imm_ivec2(b, 8, 8)))),
nir_imm_bool(b, 0)),
nir_imm_bool(b, 0))),
nir_imm_bool(b, 0),
sd->swap);
*/
}
static unsigned
fill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap)
{
unsigned bits[4] = {0};
bool weird_packed = false;
const struct util_format_description *dst_desc = util_format_description(dst_format);
bool is_8bit = true;
for (unsigned c = 0; c < 4; c++) {
bits[c] = dst_desc->channel[c].size;
if (c < dst_desc->nr_channels) {
weird_packed |= bits[c] != bits[0] || bits[c] % 8 != 0;
if (bits[c] != 8)
is_8bit = false;
}
}
if (is_8bit || dst_desc->block.bits == 8)
swap = false;
unsigned dst_bit_size = 0;
if (weird_packed) {
dst_bit_size = dst_desc->block.bits;
} else {
dst_bit_size = dst_desc->block.bits / dst_desc->nr_channels;
}
assert(dst_bit_size);
assert(dst_bit_size <= 64);
pd->dst_bit_size = dst_bit_size >> 4;
pd->channels = dst_desc->nr_channels - 1;
pd->normalized = dst_desc->is_unorm || dst_desc->is_snorm;
pd->clamp_uint = dst_desc->is_unorm ||
(util_format_is_pure_sint(dst_format) &&
!util_format_is_pure_sint(src_format) &&
!util_format_is_snorm(src_format)) ||
util_format_is_pure_uint(dst_format);
pd->integer = util_format_is_pure_uint(dst_format) || util_format_is_pure_sint(dst_format);
pd->r11g11b10_or_sint = dst_format == PIPE_FORMAT_R11G11B10_FLOAT || util_format_is_pure_sint(dst_format);
pd->r9g9b9e5 = dst_format == PIPE_FORMAT_R9G9B9E5_FLOAT;
pd->bits1 = bits[0];
pd->bits2 = bits[1];
pd->bits3 = bits[2];
pd->bits4 = bits[3];
pd->swap = swap;
return weird_packed ? 1 : dst_desc->nr_channels;
}
static nir_ssa_def *
get_buffer_offset(nir_builder *b, nir_ssa_def *coord, struct pbo_shader_data *sd)
{
/* from _mesa_image_offset():
offset = topOfImage
+ (skippixels + column) * bytes_per_pixel
+ (skiprows + row) * bytes_per_row
+ (skipimages + img) * bytes_per_image;
*/
nir_ssa_def *bytes_per_row = nir_imul(b, nir_channel(b, sd->range, 0), sd->blocksize);
bytes_per_row = nir_bcsel(b, nir_ult(b, sd->alignment, nir_imm_int(b, 2)),
bytes_per_row,
nir_iand(b,
nir_isub(b, nir_iadd(b, bytes_per_row, sd->alignment), nir_imm_int(b, 1)),
nir_inot(b, nir_isub(b, sd->alignment, nir_imm_int(b, 1)))));
nir_ssa_def *bytes_per_image = nir_imul(b, bytes_per_row, nir_channel(b, sd->range, 1));
bytes_per_row = nir_bcsel(b, sd->invert,
nir_isub(b, nir_imm_int(b, 0), bytes_per_row),
bytes_per_row);
return nir_iadd(b,
nir_imul(b, nir_channel(b, coord, 0), sd->blocksize),
nir_iadd(b,
nir_imul(b, nir_channel(b, coord, 1), bytes_per_row),
nir_imul(b, nir_channel(b, coord, 2), bytes_per_image)));
}
static inline void
write_ssbo(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset)
{
nir_store_ssbo(b, pixel, nir_imm_zero(b, 1, 32), buffer_offset,
.align_mul = pixel->bit_size / 8,
.write_mask = (1 << pixel->num_components) - 1);
}
static void
write_conversion(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd)
{
nir_push_if(b, nir_ilt(b, sd->dst_bit_size, nir_imm_int(b, 32)));
nir_push_if(b, nir_ieq_imm(b, sd->dst_bit_size, 16));
write_ssbo(b, nir_u2u16(b, pixel), buffer_offset);
nir_push_else(b, NULL);
write_ssbo(b, nir_u2u8(b, pixel), buffer_offset);
nir_pop_if(b, NULL);
nir_push_else(b, NULL);
write_ssbo(b, pixel, buffer_offset);
nir_pop_if(b, NULL);
}
static nir_ssa_def *
swap2(nir_builder *b, nir_ssa_def *src)
{
/* dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00); */
return nir_ior(b,
nir_ushr_imm(b, src, 8),
nir_iand_imm(b, nir_ishl(b, src, nir_imm_int(b, 8)), 0xff00));
}
static nir_ssa_def *
swap4(nir_builder *b, nir_ssa_def *src)
{
/* a = (b >> 24) | ((b >> 8) & 0xff00) | ((b << 8) & 0xff0000) | ((b << 24) & 0xff000000); */
return nir_ior(b,
/* (b >> 24) */
nir_ushr_imm(b, src, 24),
nir_ior(b,
/* ((b >> 8) & 0xff00) */
nir_iand(b, nir_ushr_imm(b, src, 8), nir_imm_int(b, 0xff00)),
nir_ior(b,
/* ((b << 8) & 0xff0000) */
nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 8)), nir_imm_int(b, 0xff0000)),
/* ((b << 24) & 0xff000000) */
nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 24)), nir_imm_int(b, 0xff000000)))));
}
/* explode the cf to handle channel counts in the shader */
static void
grab_components(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed)
{
if (weird_packed) {
nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32));
write_conversion(b, nir_channels(b, pixel, 3), buffer_offset, sd);
nir_push_else(b, NULL);
write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
nir_pop_if(b, NULL);
} else {
nir_push_if(b, nir_ieq_imm(b, sd->channels, 1));
write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd);
nir_push_else(b, NULL);
nir_push_if(b, nir_ieq_imm(b, sd->channels, 2));
write_conversion(b, nir_channels(b, pixel, (1 << 2) - 1), buffer_offset, sd);
nir_push_else(b, NULL);
nir_push_if(b, nir_ieq_imm(b, sd->channels, 3));
write_conversion(b, nir_channels(b, pixel, (1 << 3) - 1), buffer_offset, sd);
nir_push_else(b, NULL);
write_conversion(b, nir_channels(b, pixel, (1 << 4) - 1), buffer_offset, sd);
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
}
}
/* if byteswap is enabled, handle that and then write the components */
static void
handle_swap(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset,
struct pbo_shader_data *sd, unsigned num_components, bool weird_packed)
{
nir_push_if(b, sd->swap); {
nir_push_if(b, nir_ieq_imm(b, nir_udiv_imm(b, sd->blocksize, num_components), 2)); {
/* this is a single high/low swap per component */
nir_ssa_def *components[4];
for (unsigned i = 0; i < 4; i++)
components[i] = swap2(b, nir_channel(b, pixel, i));
nir_ssa_def *v = nir_vec(b, components, 4);
grab_components(b, v, buffer_offset, sd, weird_packed);
} nir_push_else(b, NULL); {
/* this is a pair of high/low swaps for each half of the component */
nir_ssa_def *components[4];
for (unsigned i = 0; i < 4; i++)
components[i] = swap4(b, nir_channel(b, pixel, i));
nir_ssa_def *v = nir_vec(b, components, 4);
grab_components(b, v, buffer_offset, sd, weird_packed);
} nir_pop_if(b, NULL);
} nir_push_else(b, NULL); {
/* swap disabled */
grab_components(b, pixel, buffer_offset, sd, weird_packed);
} nir_pop_if(b, NULL);
}
static nir_ssa_def *
check_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component)
{
nir_ssa_def *c = nir_channel(b, sd->bits, component - 1);
return nir_bcsel(b,
nir_ige(b, sd->channels, nir_imm_int(b, component)),
nir_ior(b,
nir_ine(b, c, sd->bits1),
nir_ine(b, nir_imod(b, c, nir_imm_int(b, 8)), nir_imm_int(b, 0))),
nir_imm_bool(b, 0));
}
/* convenience function for clamping signed integers */
static inline nir_ssa_def *
nir_imin_imax(nir_builder *build, nir_ssa_def *src, nir_ssa_def *clamp_to_min, nir_ssa_def *clamp_to_max)
{
return nir_imax(build, nir_imin(build, src, clamp_to_min), clamp_to_max);
}
static inline nir_ssa_def *
nir_format_float_to_unorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)
{
/* Clamp to the range [0, 1] */
f = nir_fsat(b, f);
return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
}
static inline nir_ssa_def *
nir_format_float_to_snorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor)
{
/* Clamp to the range [-1, 1] */
f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
}
static nir_ssa_def *
clamp_and_mask(nir_builder *b, nir_ssa_def *src, nir_ssa_def *channels)
{
nir_ssa_def *one = nir_imm_ivec4(b, 1, 0, 0, 0);
nir_ssa_def *two = nir_imm_ivec4(b, 1, 1, 0, 0);
nir_ssa_def *three = nir_imm_ivec4(b, 1, 1, 1, 0);
nir_ssa_def *four = nir_imm_ivec4(b, 1, 1, 1, 1);
/* avoid underflow by clamping to channel count */
src = nir_bcsel(b,
nir_ieq(b, channels, one),
nir_isub(b, src, one),
nir_bcsel(b,
nir_ieq_imm(b, channels, 2),
nir_isub(b, src, two),
nir_bcsel(b,
nir_ieq_imm(b, channels, 3),
nir_isub(b, src, three),
nir_isub(b, src, four))));
return nir_mask(b, src, 32);
}
static void
convert_swap_write(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset,
unsigned num_components,
struct pbo_shader_data *sd)
{
nir_ssa_def *weird_packed = nir_ior(b,
nir_ior(b,
check_for_weird_packing(b, sd, 4),
check_for_weird_packing(b, sd, 3)),
check_for_weird_packing(b, sd, 2));
if (num_components == 1) {
nir_push_if(b, weird_packed);
nir_push_if(b, sd->r11g11b10_or_sint);
handle_swap(b, nir_pad_vec4(b, nir_format_pack_11f11f10f(b, pixel)), buffer_offset, sd, 1, true);
nir_push_else(b, NULL);
nir_push_if(b, sd->r9g9b9e5);
handle_swap(b, nir_pad_vec4(b, nir_format_pack_r9g9b9e5(b, pixel)), buffer_offset, sd, 1, true);
nir_push_else(b, NULL);
nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); { //PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
nir_ssa_def *pack[2];
pack[0] = nir_format_pack_uint_unmasked_ssa(b, nir_channel(b, pixel, 0), nir_channel(b, sd->bits, 0));
pack[1] = nir_format_pack_uint_unmasked_ssa(b, nir_channels(b, pixel, 6), nir_channels(b, sd->bits, 6));
handle_swap(b, nir_pad_vec4(b, nir_vec2(b, pack[0], pack[1])), buffer_offset, sd, 2, true);
} nir_push_else(b, NULL);
handle_swap(b, nir_pad_vec4(b, nir_format_pack_uint_unmasked_ssa(b, pixel, sd->bits)), buffer_offset, sd, 1, true);
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
nir_push_else(b, NULL);
handle_swap(b, pixel, buffer_offset, sd, num_components, false);
nir_pop_if(b, NULL);
} else {
nir_push_if(b, weird_packed);
handle_swap(b, pixel, buffer_offset, sd, num_components, true);
nir_push_else(b, NULL);
handle_swap(b, pixel, buffer_offset, sd, num_components, false);
nir_pop_if(b, NULL);
}
}
static void
do_shader_conversion(nir_builder *b, nir_ssa_def *pixel,
unsigned num_components,
nir_ssa_def *coord, struct pbo_shader_data *sd)
{
nir_ssa_def *buffer_offset = get_buffer_offset(b, coord, sd);
nir_ssa_def *signed_bit_mask = clamp_and_mask(b, sd->bits, sd->channels);
#define CONVERT_SWAP_WRITE(PIXEL) \
convert_swap_write(b, PIXEL, buffer_offset, num_components, sd);
nir_push_if(b, sd->normalized);
nir_push_if(b, sd->clamp_uint); //unorm
CONVERT_SWAP_WRITE(nir_format_float_to_unorm_with_factor(b, pixel, nir_u2f32(b, nir_mask(b, sd->bits, 32))));
nir_push_else(b, NULL);
CONVERT_SWAP_WRITE(nir_format_float_to_snorm_with_factor(b, pixel, nir_u2f32(b, signed_bit_mask)));
nir_pop_if(b, NULL);
nir_push_else(b, NULL);
nir_push_if(b, sd->integer);
nir_push_if(b, sd->r11g11b10_or_sint); //sint
nir_push_if(b, sd->clamp_uint); //uint -> sint
CONVERT_SWAP_WRITE(nir_umin(b, pixel, signed_bit_mask));
nir_push_else(b, NULL);
CONVERT_SWAP_WRITE(nir_imin_imax(b, pixel, signed_bit_mask, nir_isub(b, nir_ineg(b, signed_bit_mask), nir_imm_int(b, 1))));
nir_pop_if(b, NULL);
nir_push_else(b, NULL);
nir_push_if(b, sd->clamp_uint); //uint
/* nir_format_clamp_uint */
CONVERT_SWAP_WRITE(nir_umin(b, pixel, nir_mask(b, sd->bits, 32)));
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
nir_push_else(b, NULL);
nir_push_if(b, nir_ieq_imm(b, sd->bits1, 16)); //half
CONVERT_SWAP_WRITE(nir_format_float_to_half(b, pixel));
nir_push_else(b, NULL);
CONVERT_SWAP_WRITE(pixel);
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
}
static void *
create_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components)
{
const nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_COMPUTE);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "%s", "convert");
b.shader->info.workgroup_size[0] = target != PIPE_TEXTURE_1D ? 8 : 64;
b.shader->info.workgroup_size[1] = target != PIPE_TEXTURE_1D ? 8 : 1;
b.shader->info.workgroup_size[2] = 1;
b.shader->info.textures_used[0] = 1;
b.shader->info.num_ssbos = 1;
b.shader->num_uniforms = 2;
nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_float_type(), 0, 4), "ssbo");
nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, st_pbo_sampler_type_for_target(target, ST_PBO_CONVERT_FLOAT), "sampler");
unsigned coord_components = glsl_get_sampler_coordinate_components(sampler->type);
sampler->data.explicit_binding = 1;
struct pbo_shader_data sd;
init_pbo_shader_data(&b, &sd);
nir_ssa_def *bsize = nir_imm_ivec4(&b,
b.shader->info.workgroup_size[0],
b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2],
0);
nir_ssa_def *wid = nir_load_workgroup_id(&b, 32);
nir_ssa_def *iid = nir_load_local_invocation_id(&b);
nir_ssa_def *tile = nir_imul(&b, wid, bsize);
nir_ssa_def *global_id = nir_iadd(&b, tile, iid);
nir_ssa_def *start = nir_iadd(&b, global_id, sd.offset);
nir_ssa_def *coord = nir_channels(&b, start, (1<<coord_components)-1);
nir_ssa_def *max = nir_iadd(&b, sd.offset, sd.range);
nir_push_if(&b, nir_ball(&b, nir_ilt(&b, coord, nir_channels(&b, max, (1<<coord_components)-1))));
nir_tex_instr *txf = nir_tex_instr_create(b.shader, 3);
txf->is_array = glsl_sampler_type_is_array(sampler->type);
txf->op = nir_texop_txf;
txf->sampler_dim = glsl_get_sampler_dim(sampler->type);
txf->dest_type = nir_type_float32;
txf->coord_components = coord_components;
txf->texture_index = 0;
txf->sampler_index = 0;
txf->src[0].src_type = nir_tex_src_coord;
txf->src[0].src = nir_src_for_ssa(coord);
txf->src[1].src_type = nir_tex_src_lod;
txf->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
txf->src[2].src_type = nir_tex_src_texture_deref;
nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler);
txf->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL);
nir_builder_instr_insert(&b, &txf->instr);
/* pass the grid offset as the coord to get the zero-indexed buffer offset */
do_shader_conversion(&b, &txf->dest.ssa, num_components, global_id, &sd);
nir_pop_if(&b, NULL);
nir_validate_shader(b.shader, NULL);
gl_nir_opts(b.shader);
return st_nir_finish_builtin_shader(st, b.shader);
}
static void
invert_swizzle(uint8_t *out, const uint8_t *in)
{
/* First, default to all zeroes to prevent uninitialized junk */
for (unsigned c = 0; c < 4; ++c)
out[c] = PIPE_SWIZZLE_0;
/* Now "do" what the swizzle says */
for (unsigned c = 0; c < 4; ++c) {
unsigned char i = in[c];
/* Who cares? */
assert(PIPE_SWIZZLE_X == 0);
if (i > PIPE_SWIZZLE_W)
continue;
/* Invert */
unsigned idx = i - PIPE_SWIZZLE_X;
out[idx] = PIPE_SWIZZLE_X + c;
}
}
static uint32_t
compute_shader_key(enum pipe_texture_target target, unsigned num_components)
{
uint8_t key_target[] = {
[PIPE_BUFFER] = UINT8_MAX,
[PIPE_TEXTURE_1D] = 1,
[PIPE_TEXTURE_2D] = 2,
[PIPE_TEXTURE_3D] = 3,
[PIPE_TEXTURE_CUBE] = 4,
[PIPE_TEXTURE_RECT] = UINT8_MAX,
[PIPE_TEXTURE_1D_ARRAY] = 5,
[PIPE_TEXTURE_2D_ARRAY] = 6,
[PIPE_TEXTURE_CUBE_ARRAY] = UINT8_MAX,
};
assert(target < ARRAY_SIZE(key_target));
assert(key_target[target] != UINT8_MAX);
return key_target[target] | (num_components << 3);
}
static unsigned
get_dim_from_target(enum pipe_texture_target target)
{
switch (target) {
case PIPE_TEXTURE_1D:
return 1;
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_3D:
return 3;
default:
return 2;
}
}
static enum pipe_texture_target
get_target_from_texture(struct pipe_resource *src)
{
enum pipe_texture_target view_target;
switch (src->target) {
case PIPE_TEXTURE_RECT:
view_target = PIPE_TEXTURE_2D;
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
view_target = PIPE_TEXTURE_2D_ARRAY;
break;
default:
view_target = src->target;
break;
}
return view_target;
}
/* force swizzling behavior for sampling */
enum swizzle_clamp {
/* force component selection for named format */
SWIZZLE_CLAMP_LUMINANCE = 1,
SWIZZLE_CLAMP_ALPHA = 2,
SWIZZLE_CLAMP_LUMINANCE_ALPHA = 3,
SWIZZLE_CLAMP_INTENSITY = 4,
SWIZZLE_CLAMP_RGBX = 5,
/* select only 1 component */
SWIZZLE_CLAMP_GREEN = 8,
SWIZZLE_CLAMP_BLUE = 16,
/* reverse ordering for format emulation */
SWIZZLE_CLAMP_BGRA = 32,
};
static struct pipe_resource *
download_texture_compute(struct st_context *st,
const struct gl_pixelstore_attrib *pack,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLint depth,
unsigned level, unsigned layer,
GLenum format, GLenum type,
enum pipe_format src_format,
enum pipe_texture_target view_target,
struct pipe_resource *src,
enum pipe_format dst_format,
enum swizzle_clamp swizzle_clamp)
{
struct pipe_context *pipe = st->pipe;
struct pipe_screen *screen = st->screen;
struct pipe_resource *dst = NULL;
unsigned dim = get_dim_from_target(view_target);
/* clamp 3d offsets based on slice */
if (view_target == PIPE_TEXTURE_3D)
zoffset += layer;
unsigned num_components = 0;
/* Upload constants */
{
struct pipe_constant_buffer cb;
assert(view_target != PIPE_TEXTURE_1D_ARRAY || !yoffset);
struct pbo_data pd = {
.x = xoffset,
.y = yoffset,
.width = width, .height = height, .depth = depth,
.invert = pack->Invert,
.blocksize = util_format_get_blocksize(dst_format) - 1,
.alignment = ffs(MAX2(pack->Alignment, 1)) - 1,
};
num_components = fill_pbo_data(&pd, src_format, dst_format, pack->SwapBytes == 1);
cb.buffer = NULL;
cb.user_buffer = &pd;
cb.buffer_offset = 0;
cb.buffer_size = sizeof(pd);
pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cb);
}
uint32_t hash_key = compute_shader_key(view_target, num_components);
assert(hash_key != 0);
struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key);
void *cs;
if (!he) {
cs = create_conversion_shader(st, view_target, num_components);
he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs);
}
cs = he->data;
assert(cs);
struct cso_context *cso = st->cso_context;
cso_save_compute_state(cso, CSO_BIT_COMPUTE_SHADER | CSO_BIT_COMPUTE_SAMPLERS);
cso_set_compute_shader_handle(cso, cs);
/* Set up the sampler_view */
{
struct pipe_sampler_view templ;
struct pipe_sampler_view *sampler_view;
struct pipe_sampler_state sampler = {0};
const struct pipe_sampler_state *samplers[1] = {&sampler};
const struct util_format_description *desc = util_format_description(dst_format);
u_sampler_view_default_template(&templ, src, src_format);
if (util_format_is_depth_or_stencil(dst_format)) {
templ.swizzle_r = PIPE_SWIZZLE_X;
templ.swizzle_g = PIPE_SWIZZLE_X;
templ.swizzle_b = PIPE_SWIZZLE_X;
templ.swizzle_a = PIPE_SWIZZLE_X;
} else {
uint8_t invswizzle[4];
const uint8_t *swizzle;
/* these swizzle output bits require explicit component selection/ordering */
if (swizzle_clamp & SWIZZLE_CLAMP_GREEN) {
for (unsigned i = 0; i < 4; i++)
invswizzle[i] = PIPE_SWIZZLE_Y;
} else if (swizzle_clamp & SWIZZLE_CLAMP_BLUE) {
for (unsigned i = 0; i < 4; i++)
invswizzle[i] = PIPE_SWIZZLE_Z;
} else {
if (swizzle_clamp & SWIZZLE_CLAMP_BGRA) {
if (util_format_get_nr_components(dst_format) == 3)
swizzle = util_format_description(PIPE_FORMAT_B8G8R8_UNORM)->swizzle;
else
swizzle = util_format_description(PIPE_FORMAT_B8G8R8A8_UNORM)->swizzle;
} else {
swizzle = desc->swizzle;
}
invert_swizzle(invswizzle, swizzle);
}
swizzle_clamp &= ~(SWIZZLE_CLAMP_BGRA | SWIZZLE_CLAMP_GREEN | SWIZZLE_CLAMP_BLUE);
/* these swizzle input modes clamp unused components to 0 and (sometimes) alpha to 1 */
switch (swizzle_clamp) {
case SWIZZLE_CLAMP_LUMINANCE:
if (util_format_is_luminance(dst_format))
break;
for (unsigned i = 0; i < 4; i++) {
if (invswizzle[i] != PIPE_SWIZZLE_X)
invswizzle[i] = invswizzle[i] == PIPE_SWIZZLE_W ? PIPE_SWIZZLE_1 : PIPE_SWIZZLE_0;
}
break;
case SWIZZLE_CLAMP_ALPHA:
for (unsigned i = 0; i < 4; i++) {
if (invswizzle[i] != PIPE_SWIZZLE_W)
invswizzle[i] = PIPE_SWIZZLE_0;
}
break;
case SWIZZLE_CLAMP_LUMINANCE_ALPHA:
if (util_format_is_luminance_alpha(dst_format))
break;
for (unsigned i = 0; i < 4; i++) {
if (invswizzle[i] != PIPE_SWIZZLE_X && invswizzle[i] != PIPE_SWIZZLE_W)
invswizzle[i] = PIPE_SWIZZLE_0;
}
break;
case SWIZZLE_CLAMP_INTENSITY:
for (unsigned i = 0; i < 4; i++) {
if (invswizzle[i] == PIPE_SWIZZLE_W)
invswizzle[i] = PIPE_SWIZZLE_1;
else if (invswizzle[i] != PIPE_SWIZZLE_X)
invswizzle[i] = PIPE_SWIZZLE_0;
}
break;
case SWIZZLE_CLAMP_RGBX:
for (unsigned i = 0; i < 4; i++) {
if (invswizzle[i] == PIPE_SWIZZLE_W)
invswizzle[i] = PIPE_SWIZZLE_1;
}
break;
default: break;
}
templ.swizzle_r = invswizzle[0];
templ.swizzle_g = invswizzle[1];
templ.swizzle_b = invswizzle[2];
templ.swizzle_a = invswizzle[3];
}
templ.target = view_target;
templ.u.tex.first_level = level;
templ.u.tex.last_level = level;
/* array textures expect to have array index provided */
if (view_target != PIPE_TEXTURE_3D && src->array_size) {
templ.u.tex.first_layer = layer;
if (view_target == PIPE_TEXTURE_1D_ARRAY) {
templ.u.tex.first_layer += yoffset;
templ.u.tex.last_layer = templ.u.tex.first_layer + height - 1;
} else {
templ.u.tex.first_layer += zoffset;
templ.u.tex.last_layer = templ.u.tex.first_layer + depth - 1;
}
}
sampler_view = pipe->create_sampler_view(pipe, src, &templ);
if (sampler_view == NULL)
goto fail;
pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, false,
&sampler_view);
st->state.num_sampler_views[PIPE_SHADER_COMPUTE] =
MAX2(st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 1);
pipe_sampler_view_reference(&sampler_view, NULL);
cso_set_samplers(cso, PIPE_SHADER_COMPUTE, 1, samplers);
}
/* Set up destination buffer */
unsigned img_stride = _mesa_image_image_stride(pack, width, height, format, type);
unsigned buffer_size = (depth + (dim == 3 ? pack->SkipImages : 0)) * img_stride;
{
dst = pipe_buffer_create(screen, PIPE_BIND_SHADER_BUFFER, PIPE_USAGE_STAGING, buffer_size);
if (!dst)
goto fail;
struct pipe_shader_buffer buffer;
memset(&buffer, 0, sizeof(buffer));
buffer.buffer = dst;
buffer.buffer_size = buffer_size;
pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, &buffer, 0x1);
}
struct pipe_grid_info info = { 0 };
info.block[0] = src->target != PIPE_TEXTURE_1D ? 8 : 64;
info.block[1] = src->target != PIPE_TEXTURE_1D ? 8 : 1;
info.last_block[0] = width % info.block[0];
info.last_block[1] = height % info.block[1];
info.block[2] = 1;
info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
info.grid[2] = depth;
pipe->launch_grid(pipe, &info);
fail:
cso_restore_compute_state(cso);
/* Unbind all because st/mesa won't do it if the current shader doesn't
* use them.
*/
pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, false,
st->state.num_sampler_views[PIPE_SHADER_COMPUTE],
NULL);
st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 0;
pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL, 0);
st->dirty |= ST_NEW_CS_CONSTANTS |
ST_NEW_CS_SSBOS |
ST_NEW_CS_SAMPLER_VIEWS;
return dst;
}
static void
copy_converted_buffer(struct gl_context * ctx,
struct gl_pixelstore_attrib *pack,
enum pipe_texture_target view_target,
struct pipe_resource *dst, enum pipe_format dst_format,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, void *pixels)
{
struct pipe_transfer *xfer;
struct st_context *st = st_context(ctx);
unsigned dim = get_dim_from_target(view_target);
uint8_t *map = pipe_buffer_map(st->pipe, dst, PIPE_MAP_READ | PIPE_MAP_ONCE, &xfer);
if (!map)
return;
pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
/* compute shader doesn't handle these to cut down on uniform size */
if (pack->RowLength ||
pack->SkipPixels ||
pack->SkipRows ||
pack->ImageHeight ||
pack->SkipImages) {
if (view_target == PIPE_TEXTURE_1D_ARRAY) {
depth = height;
height = 1;
zoffset = yoffset;
yoffset = 0;
}
struct gl_pixelstore_attrib packing = *pack;
memset(&packing.RowLength, 0, offsetof(struct gl_pixelstore_attrib, SwapBytes) - offsetof(struct gl_pixelstore_attrib, RowLength));
for (unsigned z = 0; z < depth; z++) {
for (unsigned y = 0; y < height; y++) {
GLubyte *dst = _mesa_image_address(dim, pack, pixels,
width, height, format, type,
z, y, 0);
GLubyte *srcpx = _mesa_image_address(dim, &packing, map,
width, height, format, type,
z, y, 0);
memcpy(dst, srcpx, util_format_get_stride(dst_format, width));
}
}
} else {
/* direct copy for all other cases */
memcpy(pixels, map, dst->width0);
}
_mesa_unmap_pbo_dest(ctx, pack);
pipe_buffer_unmap(st->pipe, xfer);
}
bool
st_GetTexSubImage_shader(struct gl_context * ctx,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, void * pixels,
struct gl_texture_image *texImage)
{
struct st_context *st = st_context(ctx);
struct pipe_screen *screen = st->screen;
struct gl_texture_object *stObj = texImage->TexObject;
struct pipe_resource *src = stObj->pt;
struct pipe_resource *dst = NULL;
enum pipe_format dst_format, src_format;
unsigned level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
unsigned layer = texImage->Face + texImage->TexObject->Attrib.MinLayer;
enum pipe_texture_target view_target;
assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
!_mesa_is_format_astc_2d(texImage->TexFormat) &&
texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
/* See if the texture format already matches the format and type,
* in which case the memcpy-based fast path will be used. */
if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
type, ctx->Pack.SwapBytes, NULL)) {
return false;
}
enum swizzle_clamp swizzle_clamp = 0;
src_format = st_pbo_get_src_format(screen, stObj->surface_based ? stObj->surface_format : src->format, src);
if (src_format == PIPE_FORMAT_NONE)
return false;
if (texImage->_BaseFormat != _mesa_get_format_base_format(texImage->TexFormat)) {
/* special handling for drivers that don't support these formats natively */
if (texImage->_BaseFormat == GL_LUMINANCE)
swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE;
else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA)
swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE_ALPHA;
else if (texImage->_BaseFormat == GL_ALPHA)
swizzle_clamp = SWIZZLE_CLAMP_ALPHA;
else if (texImage->_BaseFormat == GL_INTENSITY)
swizzle_clamp = SWIZZLE_CLAMP_INTENSITY;
else if (texImage->_BaseFormat == GL_RGB)
swizzle_clamp = SWIZZLE_CLAMP_RGBX;
}
dst_format = st_pbo_get_dst_format(ctx, PIPE_BUFFER, src_format, false, format, type, 0);
if (dst_format == PIPE_FORMAT_NONE) {
bool need_bgra_swizzle = false;
dst_format = get_convert_format(ctx, src_format, format, type, &need_bgra_swizzle);
if (dst_format == PIPE_FORMAT_NONE)
return false;
/* special swizzling for component selection */
if (need_bgra_swizzle)
swizzle_clamp |= SWIZZLE_CLAMP_BGRA;
else if (format == GL_GREEN_INTEGER)
swizzle_clamp |= SWIZZLE_CLAMP_GREEN;
else if (format == GL_BLUE_INTEGER)
swizzle_clamp |= SWIZZLE_CLAMP_BLUE;
}
/* check with the driver to see if memcpy is likely to be faster */
if (!screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true))
return false;
view_target = get_target_from_texture(src);
/* I don't know why this works
* only for the texture rects
* but that's how it is
*/
if ((src->target != PIPE_TEXTURE_RECT &&
/* this would need multiple samplerviews */
((util_format_is_depth_and_stencil(src_format) && util_format_is_depth_and_stencil(dst_format)) ||
/* these format just doesn't work and science can't explain why */
dst_format == PIPE_FORMAT_Z32_FLOAT)) ||
/* L8 -> L32_FLOAT is another thinker */
(!util_format_is_float(src_format) && dst_format == PIPE_FORMAT_L32_FLOAT))
return false;
dst = download_texture_compute(st, &ctx->Pack, xoffset, yoffset, zoffset, width, height, depth,
level, layer, format, type, src_format, view_target, src, dst_format,
swizzle_clamp);
copy_converted_buffer(ctx, &ctx->Pack, view_target, dst, dst_format, xoffset, yoffset, zoffset,
width, height, depth, format, type, pixels);
pipe_resource_reference(&dst, NULL);
return true;
}