nir/i965/freedreno/vc4: add a bindless bool to type size functions

This required to calculate sizes correctly when we have bindless
samplers/images.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Timothy Arceri
2019-03-29 12:39:48 +11:00
committed by Karol Herbst
parent 3b2a9ffd60
commit 035759b61b
16 changed files with 76 additions and 59 deletions

View File

@@ -525,7 +525,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
} }
static int static int
type_size_vec4(const struct glsl_type *type) type_size_vec4(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }

View File

@@ -3015,7 +3015,7 @@ void nir_lower_io_to_temporaries(nir_shader *shader,
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
int (*type_size)(const struct glsl_type *)); int (*type_size)(const struct glsl_type *, bool));
/* Some helpers to do very simple linking */ /* Some helpers to do very simple linking */
bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
@@ -3036,7 +3036,7 @@ typedef enum {
} nir_lower_io_options; } nir_lower_io_options;
bool nir_lower_io(nir_shader *shader, bool nir_lower_io(nir_shader *shader,
nir_variable_mode modes, nir_variable_mode modes,
int (*type_size)(const struct glsl_type *), int (*type_size)(const struct glsl_type *, bool),
nir_lower_io_options); nir_lower_io_options);
typedef enum { typedef enum {

View File

@@ -38,7 +38,7 @@
struct lower_io_state { struct lower_io_state {
void *dead_ctx; void *dead_ctx;
nir_builder builder; nir_builder builder;
int (*type_size)(const struct glsl_type *type); int (*type_size)(const struct glsl_type *type, bool);
nir_variable_mode modes; nir_variable_mode modes;
nir_lower_io_options options; nir_lower_io_options options;
}; };
@@ -95,7 +95,7 @@ global_atomic_for_deref(nir_intrinsic_op deref_op)
void void
nir_assign_var_locations(struct exec_list *var_list, unsigned *size, nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
int (*type_size)(const struct glsl_type *)) int (*type_size)(const struct glsl_type *, bool))
{ {
unsigned location = 0; unsigned location = 0;
@@ -108,7 +108,10 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
continue; continue;
var->data.driver_location = location; var->data.driver_location = location;
location += type_size(var->type); bool bindless_type_size = var->data.mode == nir_var_shader_in ||
var->data.mode == nir_var_shader_out ||
var->data.bindless;
location += type_size(var->type, bindless_type_size);
} }
*size = location; *size = location;
@@ -138,8 +141,8 @@ nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
static nir_ssa_def * static nir_ssa_def *
get_io_offset(nir_builder *b, nir_deref_instr *deref, get_io_offset(nir_builder *b, nir_deref_instr *deref,
nir_ssa_def **vertex_index, nir_ssa_def **vertex_index,
int (*type_size)(const struct glsl_type *), int (*type_size)(const struct glsl_type *, bool),
unsigned *component) unsigned *component, bool bts)
{ {
nir_deref_path path; nir_deref_path path;
nir_deref_path_init(&path, deref, NULL); nir_deref_path_init(&path, deref, NULL);
@@ -165,7 +168,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
const unsigned total_offset = *component + index; const unsigned total_offset = *component + index;
const unsigned slot_offset = total_offset / 4; const unsigned slot_offset = total_offset / 4;
*component = total_offset % 4; *component = total_offset % 4;
return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
} }
/* Just emit code and let constant-folding go to town */ /* Just emit code and let constant-folding go to town */
@@ -173,7 +176,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
for (; *p; p++) { for (; *p; p++) {
if ((*p)->deref_type == nir_deref_type_array) { if ((*p)->deref_type == nir_deref_type_array) {
unsigned size = type_size((*p)->type); unsigned size = type_size((*p)->type, bts);
nir_ssa_def *mul = nir_ssa_def *mul =
nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
@@ -185,7 +188,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
unsigned field_offset = 0; unsigned field_offset = 0;
for (unsigned i = 0; i < (*p)->strct.index; i++) { for (unsigned i = 0; i < (*p)->strct.index; i++) {
field_offset += type_size(glsl_get_struct_field(parent->type, i)); field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
} }
offset = nir_iadd_imm(b, offset, field_offset); offset = nir_iadd_imm(b, offset, field_offset);
} else { } else {
@@ -255,7 +258,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
nir_intrinsic_set_component(load, component); nir_intrinsic_set_component(load, component);
if (load->intrinsic == nir_intrinsic_load_uniform) if (load->intrinsic == nir_intrinsic_load_uniform)
nir_intrinsic_set_range(load, state->type_size(var->type)); nir_intrinsic_set_range(load,
state->type_size(var->type, var->data.bindless));
if (vertex_index) { if (vertex_index) {
load->src[0] = nir_src_for_ssa(vertex_index); load->src[0] = nir_src_for_ssa(vertex_index);
@@ -468,9 +472,13 @@ nir_lower_io_block(nir_block *block,
nir_ssa_def *offset; nir_ssa_def *offset;
nir_ssa_def *vertex_index = NULL; nir_ssa_def *vertex_index = NULL;
unsigned component_offset = var->data.location_frac; unsigned component_offset = var->data.location_frac;
bool bindless_type_size = mode == nir_var_shader_in ||
mode == nir_var_shader_out ||
var->data.bindless;
offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL, offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
state->type_size, &component_offset); state->type_size, &component_offset,
bindless_type_size);
nir_intrinsic_instr *replacement; nir_intrinsic_instr *replacement;
@@ -538,7 +546,7 @@ nir_lower_io_block(nir_block *block,
static bool static bool
nir_lower_io_impl(nir_function_impl *impl, nir_lower_io_impl(nir_function_impl *impl,
nir_variable_mode modes, nir_variable_mode modes,
int (*type_size)(const struct glsl_type *), int (*type_size)(const struct glsl_type *, bool),
nir_lower_io_options options) nir_lower_io_options options)
{ {
struct lower_io_state state; struct lower_io_state state;
@@ -563,7 +571,7 @@ nir_lower_io_impl(nir_function_impl *impl,
bool bool
nir_lower_io(nir_shader *shader, nir_variable_mode modes, nir_lower_io(nir_shader *shader, nir_variable_mode modes,
int (*type_size)(const struct glsl_type *), int (*type_size)(const struct glsl_type *, bool),
nir_lower_io_options options) nir_lower_io_options options)
{ {
bool progress = false; bool progress = false;

View File

@@ -35,7 +35,7 @@
#include "ir3_nir.h" #include "ir3_nir.h"
int int
ir3_glsl_type_size(const struct glsl_type *type) ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }

View File

@@ -518,7 +518,7 @@ void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out);
uint64_t ir3_shader_outputs(const struct ir3_shader *so); uint64_t ir3_shader_outputs(const struct ir3_shader *so);
int int
ir3_glsl_type_size(const struct glsl_type *type); ir3_glsl_type_size(const struct glsl_type *type, bool bindless);
static inline const char * static inline const char *
ir3_shader_stage(struct ir3_shader *shader) ir3_shader_stage(struct ir3_shader *shader)

View File

@@ -83,7 +83,7 @@ emit(struct fd_ringbuffer *ring, gl_shader_stage type,
} }
static int static int
ir2_glsl_type_size(const struct glsl_type *type) ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }

View File

@@ -53,7 +53,7 @@ using std::tr1::unordered_map;
using namespace nv50_ir; using namespace nv50_ir;
int int
type_size(const struct glsl_type *type) type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }
@@ -3028,7 +3028,7 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari
switch (deref->deref_type) { switch (deref->deref_type) {
case nir_deref_type_array: { case nir_deref_type_array: {
Value *indirect; Value *indirect;
uint8_t size = type_size(deref->type); uint8_t size = type_size(deref->type, true);
result += size * getIndirect(&deref->arr.index, 0, indirect); result += size * getIndirect(&deref->arr.index, 0, indirect);
if (indirect) { if (indirect) {

View File

@@ -640,15 +640,15 @@ attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constant
} }
static int static int
glsl_type_size(const struct glsl_type *type) glsl_type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }
static int static int
uniform_type_size(const struct glsl_type *type) uniform_type_size(const struct glsl_type *type, bool bindless)
{ {
return st_glsl_storage_type_size(type, false); return st_glsl_storage_type_size(type, bindless);
} }
/* Lower fdot2 to a vector multiplication followed by channel addition */ /* Lower fdot2 to a vector multiplication followed by channel addition */

View File

@@ -169,7 +169,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
} }
static int static int
type_size(const struct glsl_type *type) type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }

View File

@@ -45,7 +45,7 @@ static void
ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list); ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
static int static int
type_size(const struct glsl_type *type) type_size(const struct glsl_type *type, bool bindless)
{ {
return glsl_count_attribute_slots(type, false); return glsl_count_attribute_slots(type, false);
} }

View File

@@ -518,7 +518,7 @@ fs_reg::component_size(unsigned width) const
} }
extern "C" int extern "C" int
type_size_scalar(const struct glsl_type *type) type_size_scalar(const struct glsl_type *type, bool bindless)
{ {
unsigned int size, i; unsigned int size, i;
@@ -540,17 +540,19 @@ type_size_scalar(const struct glsl_type *type)
case GLSL_TYPE_INT64: case GLSL_TYPE_INT64:
return type->components() * 2; return type->components() * 2;
case GLSL_TYPE_ARRAY: case GLSL_TYPE_ARRAY:
return type_size_scalar(type->fields.array) * type->length; return type_size_scalar(type->fields.array, bindless) * type->length;
case GLSL_TYPE_STRUCT: case GLSL_TYPE_STRUCT:
case GLSL_TYPE_INTERFACE: case GLSL_TYPE_INTERFACE:
size = 0; size = 0;
for (i = 0; i < type->length; i++) { for (i = 0; i < type->length; i++) {
size += type_size_scalar(type->fields.structure[i].type); size += type_size_scalar(type->fields.structure[i].type, bindless);
} }
return size; return size;
case GLSL_TYPE_SAMPLER: case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_IMAGE: case GLSL_TYPE_IMAGE:
if (bindless)
return type->components() * 2;
case GLSL_TYPE_ATOMIC_UINT:
/* Samplers, atomics, and images take up no register space, since /* Samplers, atomics, and images take up no register space, since
* they're baked in at link time. * they're baked in at link time.
*/ */
@@ -1135,7 +1137,8 @@ fs_reg
fs_visitor::vgrf(const glsl_type *const type) fs_visitor::vgrf(const glsl_type *const type)
{ {
int reg_width = dispatch_width / 8; int reg_width = dispatch_width / 8;
return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width), return fs_reg(VGRF,
alloc.allocate(type_size_scalar(type, false) * reg_width),
brw_type_for_base_type(type)); brw_type_for_base_type(type));
} }

View File

@@ -58,7 +58,7 @@ fs_visitor::nir_setup_outputs()
const int loc = var->data.driver_location; const int loc = var->data.driver_location;
const unsigned var_vec4s = const unsigned var_vec4s =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
: type_size_vec4(var->type); : type_size_vec4(var->type, true);
vec4s[loc] = MAX2(vec4s[loc], var_vec4s); vec4s[loc] = MAX2(vec4s[loc], var_vec4s);
} }

View File

@@ -32,20 +32,20 @@
extern "C" { extern "C" {
#endif #endif
int type_size_scalar(const struct glsl_type *type); int type_size_scalar(const struct glsl_type *type, bool bindless);
int type_size_vec4(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type, bool bindless);
int type_size_dvec4(const struct glsl_type *type); int type_size_dvec4(const struct glsl_type *type, bool bindless);
static inline int static inline int
type_size_scalar_bytes(const struct glsl_type *type) type_size_scalar_bytes(const struct glsl_type *type, bool bindless)
{ {
return type_size_scalar(type) * 4; return type_size_scalar(type, bindless) * 4;
} }
static inline int static inline int
type_size_vec4_bytes(const struct glsl_type *type) type_size_vec4_bytes(const struct glsl_type *type, bool bindless)
{ {
return type_size_vec4(type) * 16; return type_size_vec4(type, bindless) * 16;
} }
/* Flags set in the instr->pass_flags field by i965 analysis passes */ /* Flags set in the instr->pass_flags field by i965 analysis passes */

View File

@@ -576,7 +576,7 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
* false) elements needed to pack a type. * false) elements needed to pack a type.
*/ */
static int static int
type_size_xvec4(const struct glsl_type *type, bool as_vec4) type_size_xvec4(const struct glsl_type *type, bool as_vec4, bool bindless)
{ {
unsigned int i; unsigned int i;
int size; int size;
@@ -609,12 +609,14 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
} }
case GLSL_TYPE_ARRAY: case GLSL_TYPE_ARRAY:
assert(type->length > 0); assert(type->length > 0);
return type_size_xvec4(type->fields.array, as_vec4) * type->length; return type_size_xvec4(type->fields.array, as_vec4, bindless) *
type->length;
case GLSL_TYPE_STRUCT: case GLSL_TYPE_STRUCT:
case GLSL_TYPE_INTERFACE: case GLSL_TYPE_INTERFACE:
size = 0; size = 0;
for (i = 0; i < type->length; i++) { for (i = 0; i < type->length; i++) {
size += type_size_xvec4(type->fields.structure[i].type, as_vec4); size += type_size_xvec4(type->fields.structure[i].type, as_vec4,
bindless);
} }
return size; return size;
case GLSL_TYPE_SUBROUTINE: case GLSL_TYPE_SUBROUTINE:
@@ -624,11 +626,11 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
/* Samplers take up no register space, since they're baked in at /* Samplers take up no register space, since they're baked in at
* link time. * link time.
*/ */
return 0; return bindless ? 1 : 0;
case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_ATOMIC_UINT:
return 0; return 0;
case GLSL_TYPE_IMAGE: case GLSL_TYPE_IMAGE:
return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); return bindless ? 1 : DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
case GLSL_TYPE_VOID: case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR: case GLSL_TYPE_ERROR:
case GLSL_TYPE_FUNCTION: case GLSL_TYPE_FUNCTION:
@@ -649,9 +651,9 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
* store a particular type. * store a particular type.
*/ */
extern "C" int extern "C" int
type_size_vec4(const struct glsl_type *type) type_size_vec4(const struct glsl_type *type, bool bindless)
{ {
return type_size_xvec4(type, true); return type_size_xvec4(type, true, bindless);
} }
/** /**
@@ -674,9 +676,9 @@ type_size_vec4(const struct glsl_type *type)
* type fits in one or two vec4 slots. * type fits in one or two vec4 slots.
*/ */
extern "C" int extern "C" int
type_size_dvec4(const struct glsl_type *type) type_size_dvec4(const struct glsl_type *type, bool bindless)
{ {
return type_size_xvec4(type, false); return type_size_xvec4(type, false, bindless);
} }
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
@@ -684,7 +686,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
init(); init();
this->file = VGRF; this->file = VGRF;
this->nr = v->alloc.allocate(type_size_vec4(type)); this->nr = v->alloc.allocate(type_size_vec4(type, false));
if (type->is_array() || type->is_struct()) { if (type->is_array() || type->is_struct()) {
this->swizzle = BRW_SWIZZLE_NOOP; this->swizzle = BRW_SWIZZLE_NOOP;
@@ -702,7 +704,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
init(); init();
this->file = VGRF; this->file = VGRF;
this->nr = v->alloc.allocate(type_size_vec4(type) * size); this->nr = v->alloc.allocate(type_size_vec4(type, false) * size);
this->swizzle = BRW_SWIZZLE_NOOP; this->swizzle = BRW_SWIZZLE_NOOP;
@@ -714,7 +716,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
init(); init();
this->file = VGRF; this->file = VGRF;
this->nr = v->alloc.allocate(type_size_vec4(type)); this->nr = v->alloc.allocate(type_size_vec4(type, false));
if (type->is_array() || type->is_struct()) { if (type->is_array() || type->is_struct()) {
this->writemask = WRITEMASK_XYZW; this->writemask = WRITEMASK_XYZW;

View File

@@ -110,7 +110,7 @@ st_glsl_storage_type_size(const struct glsl_type *type, bool is_bindless)
} }
int int
st_glsl_type_dword_size(const struct glsl_type *type) st_glsl_type_dword_size(const struct glsl_type *type, bool bindless)
{ {
unsigned int size, i; unsigned int size, i;
@@ -127,20 +127,24 @@ st_glsl_type_dword_size(const struct glsl_type *type)
case GLSL_TYPE_UINT8: case GLSL_TYPE_UINT8:
case GLSL_TYPE_INT8: case GLSL_TYPE_INT8:
return DIV_ROUND_UP(type->components(), 4); return DIV_ROUND_UP(type->components(), 4);
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_SAMPLER:
if (!bindless)
return 0;
case GLSL_TYPE_DOUBLE: case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64: case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64: case GLSL_TYPE_INT64:
return type->components() * 2; return type->components() * 2;
case GLSL_TYPE_ARRAY: case GLSL_TYPE_ARRAY:
return st_glsl_type_dword_size(type->fields.array) * type->length; return st_glsl_type_dword_size(type->fields.array, bindless) *
type->length;
case GLSL_TYPE_STRUCT: case GLSL_TYPE_STRUCT:
size = 0; size = 0;
for (i = 0; i < type->length; i++) { for (i = 0; i < type->length; i++) {
size += st_glsl_type_dword_size(type->fields.structure[i].type); size += st_glsl_type_dword_size(type->fields.structure[i].type,
bindless);
} }
return size; return size;
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_ATOMIC_UINT:
return 0; return 0;
case GLSL_TYPE_SUBROUTINE: case GLSL_TYPE_SUBROUTINE:
@@ -162,7 +166,7 @@ st_glsl_type_dword_size(const struct glsl_type *type)
* vec4. * vec4.
*/ */
int int
st_glsl_uniforms_type_size(const struct glsl_type *type) st_glsl_uniforms_type_size(const struct glsl_type *type, bool bindless)
{ {
return st_glsl_storage_type_size(type, false); return st_glsl_storage_type_size(type, bindless);
} }

View File

@@ -36,9 +36,9 @@ extern "C" {
int st_glsl_storage_type_size(const struct glsl_type *type, int st_glsl_storage_type_size(const struct glsl_type *type,
bool is_bindless); bool is_bindless);
int st_glsl_uniforms_type_size(const struct glsl_type *type); int st_glsl_uniforms_type_size(const struct glsl_type *type, bool bindless);
int st_glsl_type_dword_size(const struct glsl_type *type); int st_glsl_type_dword_size(const struct glsl_type *type, bool bindless);
#ifdef __cplusplus #ifdef __cplusplus
} }