nir/i965/freedreno/vc4: add a bindless bool to type size functions

This required to calculate sizes correctly when we have bindless
samplers/images.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Timothy Arceri
2019-03-29 12:39:48 +11:00
committed by Karol Herbst
parent 3b2a9ffd60
commit 035759b61b
16 changed files with 76 additions and 59 deletions

View File

@@ -525,7 +525,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
}
static int
type_size_vec4(const struct glsl_type *type)
type_size_vec4(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}

View File

@@ -3015,7 +3015,7 @@ void nir_lower_io_to_temporaries(nir_shader *shader,
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
int (*type_size)(const struct glsl_type *));
int (*type_size)(const struct glsl_type *, bool));
/* Some helpers to do very simple linking */
bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
@@ -3036,7 +3036,7 @@ typedef enum {
} nir_lower_io_options;
bool nir_lower_io(nir_shader *shader,
nir_variable_mode modes,
int (*type_size)(const struct glsl_type *),
int (*type_size)(const struct glsl_type *, bool),
nir_lower_io_options);
typedef enum {

View File

@@ -38,7 +38,7 @@
struct lower_io_state {
void *dead_ctx;
nir_builder builder;
int (*type_size)(const struct glsl_type *type);
int (*type_size)(const struct glsl_type *type, bool);
nir_variable_mode modes;
nir_lower_io_options options;
};
@@ -95,7 +95,7 @@ global_atomic_for_deref(nir_intrinsic_op deref_op)
void
nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
int (*type_size)(const struct glsl_type *))
int (*type_size)(const struct glsl_type *, bool))
{
unsigned location = 0;
@@ -108,7 +108,10 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
continue;
var->data.driver_location = location;
location += type_size(var->type);
bool bindless_type_size = var->data.mode == nir_var_shader_in ||
var->data.mode == nir_var_shader_out ||
var->data.bindless;
location += type_size(var->type, bindless_type_size);
}
*size = location;
@@ -138,8 +141,8 @@ nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
static nir_ssa_def *
get_io_offset(nir_builder *b, nir_deref_instr *deref,
nir_ssa_def **vertex_index,
int (*type_size)(const struct glsl_type *),
unsigned *component)
int (*type_size)(const struct glsl_type *, bool),
unsigned *component, bool bts)
{
nir_deref_path path;
nir_deref_path_init(&path, deref, NULL);
@@ -165,7 +168,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
const unsigned total_offset = *component + index;
const unsigned slot_offset = total_offset / 4;
*component = total_offset % 4;
return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
}
/* Just emit code and let constant-folding go to town */
@@ -173,7 +176,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
for (; *p; p++) {
if ((*p)->deref_type == nir_deref_type_array) {
unsigned size = type_size((*p)->type);
unsigned size = type_size((*p)->type, bts);
nir_ssa_def *mul =
nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
@@ -185,7 +188,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
unsigned field_offset = 0;
for (unsigned i = 0; i < (*p)->strct.index; i++) {
field_offset += type_size(glsl_get_struct_field(parent->type, i));
field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
}
offset = nir_iadd_imm(b, offset, field_offset);
} else {
@@ -255,7 +258,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
nir_intrinsic_set_component(load, component);
if (load->intrinsic == nir_intrinsic_load_uniform)
nir_intrinsic_set_range(load, state->type_size(var->type));
nir_intrinsic_set_range(load,
state->type_size(var->type, var->data.bindless));
if (vertex_index) {
load->src[0] = nir_src_for_ssa(vertex_index);
@@ -468,9 +472,13 @@ nir_lower_io_block(nir_block *block,
nir_ssa_def *offset;
nir_ssa_def *vertex_index = NULL;
unsigned component_offset = var->data.location_frac;
bool bindless_type_size = mode == nir_var_shader_in ||
mode == nir_var_shader_out ||
var->data.bindless;
offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
state->type_size, &component_offset);
state->type_size, &component_offset,
bindless_type_size);
nir_intrinsic_instr *replacement;
@@ -538,7 +546,7 @@ nir_lower_io_block(nir_block *block,
static bool
nir_lower_io_impl(nir_function_impl *impl,
nir_variable_mode modes,
int (*type_size)(const struct glsl_type *),
int (*type_size)(const struct glsl_type *, bool),
nir_lower_io_options options)
{
struct lower_io_state state;
@@ -563,7 +571,7 @@ nir_lower_io_impl(nir_function_impl *impl,
bool
nir_lower_io(nir_shader *shader, nir_variable_mode modes,
int (*type_size)(const struct glsl_type *),
int (*type_size)(const struct glsl_type *, bool),
nir_lower_io_options options)
{
bool progress = false;

View File

@@ -35,7 +35,7 @@
#include "ir3_nir.h"
int
ir3_glsl_type_size(const struct glsl_type *type)
ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}

View File

@@ -518,7 +518,7 @@ void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out);
uint64_t ir3_shader_outputs(const struct ir3_shader *so);
int
ir3_glsl_type_size(const struct glsl_type *type);
ir3_glsl_type_size(const struct glsl_type *type, bool bindless);
static inline const char *
ir3_shader_stage(struct ir3_shader *shader)

View File

@@ -83,7 +83,7 @@ emit(struct fd_ringbuffer *ring, gl_shader_stage type,
}
static int
ir2_glsl_type_size(const struct glsl_type *type)
ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}

View File

@@ -53,7 +53,7 @@ using std::tr1::unordered_map;
using namespace nv50_ir;
int
type_size(const struct glsl_type *type)
type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
@@ -3028,7 +3028,7 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari
switch (deref->deref_type) {
case nir_deref_type_array: {
Value *indirect;
uint8_t size = type_size(deref->type);
uint8_t size = type_size(deref->type, true);
result += size * getIndirect(&deref->arr.index, 0, indirect);
if (indirect) {

View File

@@ -640,15 +640,15 @@ attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constant
}
static int
glsl_type_size(const struct glsl_type *type)
glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static int
uniform_type_size(const struct glsl_type *type)
uniform_type_size(const struct glsl_type *type, bool bindless)
{
return st_glsl_storage_type_size(type, false);
return st_glsl_storage_type_size(type, bindless);
}
/* Lower fdot2 to a vector multiplication followed by channel addition */

View File

@@ -169,7 +169,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
}
static int
type_size(const struct glsl_type *type)
type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}

View File

@@ -45,7 +45,7 @@ static void
ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
static int
type_size(const struct glsl_type *type)
type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}

View File

@@ -518,7 +518,7 @@ fs_reg::component_size(unsigned width) const
}
extern "C" int
type_size_scalar(const struct glsl_type *type)
type_size_scalar(const struct glsl_type *type, bool bindless)
{
unsigned int size, i;
@@ -540,17 +540,19 @@ type_size_scalar(const struct glsl_type *type)
case GLSL_TYPE_INT64:
return type->components() * 2;
case GLSL_TYPE_ARRAY:
return type_size_scalar(type->fields.array) * type->length;
return type_size_scalar(type->fields.array, bindless) * type->length;
case GLSL_TYPE_STRUCT:
case GLSL_TYPE_INTERFACE:
size = 0;
for (i = 0; i < type->length; i++) {
size += type_size_scalar(type->fields.structure[i].type);
size += type_size_scalar(type->fields.structure[i].type, bindless);
}
return size;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_IMAGE:
if (bindless)
return type->components() * 2;
case GLSL_TYPE_ATOMIC_UINT:
/* Samplers, atomics, and images take up no register space, since
* they're baked in at link time.
*/
@@ -1135,7 +1137,8 @@ fs_reg
fs_visitor::vgrf(const glsl_type *const type)
{
int reg_width = dispatch_width / 8;
return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width),
return fs_reg(VGRF,
alloc.allocate(type_size_scalar(type, false) * reg_width),
brw_type_for_base_type(type));
}

View File

@@ -58,7 +58,7 @@ fs_visitor::nir_setup_outputs()
const int loc = var->data.driver_location;
const unsigned var_vec4s =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
: type_size_vec4(var->type);
: type_size_vec4(var->type, true);
vec4s[loc] = MAX2(vec4s[loc], var_vec4s);
}

View File

@@ -32,20 +32,20 @@
extern "C" {
#endif
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
int type_size_dvec4(const struct glsl_type *type);
int type_size_scalar(const struct glsl_type *type, bool bindless);
int type_size_vec4(const struct glsl_type *type, bool bindless);
int type_size_dvec4(const struct glsl_type *type, bool bindless);
static inline int
type_size_scalar_bytes(const struct glsl_type *type)
type_size_scalar_bytes(const struct glsl_type *type, bool bindless)
{
return type_size_scalar(type) * 4;
return type_size_scalar(type, bindless) * 4;
}
static inline int
type_size_vec4_bytes(const struct glsl_type *type)
type_size_vec4_bytes(const struct glsl_type *type, bool bindless)
{
return type_size_vec4(type) * 16;
return type_size_vec4(type, bindless) * 16;
}
/* Flags set in the instr->pass_flags field by i965 analysis passes */

View File

@@ -576,7 +576,7 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
* false) elements needed to pack a type.
*/
static int
type_size_xvec4(const struct glsl_type *type, bool as_vec4)
type_size_xvec4(const struct glsl_type *type, bool as_vec4, bool bindless)
{
unsigned int i;
int size;
@@ -609,12 +609,14 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
}
case GLSL_TYPE_ARRAY:
assert(type->length > 0);
return type_size_xvec4(type->fields.array, as_vec4) * type->length;
return type_size_xvec4(type->fields.array, as_vec4, bindless) *
type->length;
case GLSL_TYPE_STRUCT:
case GLSL_TYPE_INTERFACE:
size = 0;
for (i = 0; i < type->length; i++) {
size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
size += type_size_xvec4(type->fields.structure[i].type, as_vec4,
bindless);
}
return size;
case GLSL_TYPE_SUBROUTINE:
@@ -624,11 +626,11 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
/* Samplers take up no register space, since they're baked in at
* link time.
*/
return 0;
return bindless ? 1 : 0;
case GLSL_TYPE_ATOMIC_UINT:
return 0;
case GLSL_TYPE_IMAGE:
return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
return bindless ? 1 : DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_FUNCTION:
@@ -649,9 +651,9 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
* store a particular type.
*/
extern "C" int
type_size_vec4(const struct glsl_type *type)
type_size_vec4(const struct glsl_type *type, bool bindless)
{
return type_size_xvec4(type, true);
return type_size_xvec4(type, true, bindless);
}
/**
@@ -674,9 +676,9 @@ type_size_vec4(const struct glsl_type *type)
* type fits in one or two vec4 slots.
*/
extern "C" int
type_size_dvec4(const struct glsl_type *type)
type_size_dvec4(const struct glsl_type *type, bool bindless)
{
return type_size_xvec4(type, false);
return type_size_xvec4(type, false, bindless);
}
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
@@ -684,7 +686,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = VGRF;
this->nr = v->alloc.allocate(type_size_vec4(type));
this->nr = v->alloc.allocate(type_size_vec4(type, false));
if (type->is_array() || type->is_struct()) {
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -702,7 +704,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
init();
this->file = VGRF;
this->nr = v->alloc.allocate(type_size_vec4(type) * size);
this->nr = v->alloc.allocate(type_size_vec4(type, false) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -714,7 +716,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = VGRF;
this->nr = v->alloc.allocate(type_size_vec4(type));
this->nr = v->alloc.allocate(type_size_vec4(type, false));
if (type->is_array() || type->is_struct()) {
this->writemask = WRITEMASK_XYZW;

View File

@@ -110,7 +110,7 @@ st_glsl_storage_type_size(const struct glsl_type *type, bool is_bindless)
}
int
st_glsl_type_dword_size(const struct glsl_type *type)
st_glsl_type_dword_size(const struct glsl_type *type, bool bindless)
{
unsigned int size, i;
@@ -127,20 +127,24 @@ st_glsl_type_dword_size(const struct glsl_type *type)
case GLSL_TYPE_UINT8:
case GLSL_TYPE_INT8:
return DIV_ROUND_UP(type->components(), 4);
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_SAMPLER:
if (!bindless)
return 0;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
return type->components() * 2;
case GLSL_TYPE_ARRAY:
return st_glsl_type_dword_size(type->fields.array) * type->length;
return st_glsl_type_dword_size(type->fields.array, bindless) *
type->length;
case GLSL_TYPE_STRUCT:
size = 0;
for (i = 0; i < type->length; i++) {
size += st_glsl_type_dword_size(type->fields.structure[i].type);
size += st_glsl_type_dword_size(type->fields.structure[i].type,
bindless);
}
return size;
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
return 0;
case GLSL_TYPE_SUBROUTINE:
@@ -162,7 +166,7 @@ st_glsl_type_dword_size(const struct glsl_type *type)
* vec4.
*/
int
st_glsl_uniforms_type_size(const struct glsl_type *type)
st_glsl_uniforms_type_size(const struct glsl_type *type, bool bindless)
{
return st_glsl_storage_type_size(type, false);
return st_glsl_storage_type_size(type, bindless);
}

View File

@@ -36,9 +36,9 @@ extern "C" {
int st_glsl_storage_type_size(const struct glsl_type *type,
bool is_bindless);
int st_glsl_uniforms_type_size(const struct glsl_type *type);
int st_glsl_uniforms_type_size(const struct glsl_type *type, bool bindless);
int st_glsl_type_dword_size(const struct glsl_type *type);
int st_glsl_type_dword_size(const struct glsl_type *type, bool bindless);
#ifdef __cplusplus
}