freedreno: Stop scattered remapping of SSBOs/images to IBOs.

Just make it be all SSBOs then all storage images.  The remapping table
was there to make it so that the big gap present from gallium's atomic
lowering would get cleaned up, but that's no longer case.  The table has
made it very hard to support Vulkan storage images, so it's time for it to
go.

This does mean that an SSBO/IBO that is only loaded (or size-queried) will
now occupy a slot in the table where it wouldn't before.  This seems like
a minor cost compared to being able to drop this much logic.

With the remapping table gone, SSBO array handling for turnip just falls
out.

Fixes many array cases of
dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_buffer.*

Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Jonathan Marek <jonathan@marek.ca> (turnip)
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3240>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3240>
This commit is contained in:
Eric Anholt
2019-12-20 14:02:55 -08:00
parent 7558b5da13
commit fb6fca0037
14 changed files with 100 additions and 83 deletions

View File

@@ -43,7 +43,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction *ldgb, *src0, *src1, *byte_offset, *offset; struct ir3_instruction *ldgb, *src0, *src1, *byte_offset, *offset;
/* can this be non-const buffer_index? how do we handle that? */ /* can this be non-const buffer_index? how do we handle that? */
int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0]));
byte_offset = ir3_get_src(ctx, &intr->src[1])[0]; byte_offset = ir3_get_src(ctx, &intr->src[1])[0];
offset = ir3_get_src(ctx, &intr->src[2])[0]; offset = ir3_get_src(ctx, &intr->src[2])[0];
@@ -81,7 +81,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
unsigned ncomp = ffs(~wrmask) - 1; unsigned ncomp = ffs(~wrmask) - 1;
/* can this be non-const buffer_index? how do we handle that? */ /* can this be non-const buffer_index? how do we handle that? */
int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[1])); int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1]));
byte_offset = ir3_get_src(ctx, &intr->src[2])[0]; byte_offset = ir3_get_src(ctx, &intr->src[2])[0];
offset = ir3_get_src(ctx, &intr->src[3])[0]; offset = ir3_get_src(ctx, &intr->src[3])[0];
@@ -132,7 +132,7 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
type_t type = TYPE_U32; type_t type = TYPE_U32;
/* can this be non-const buffer_index? how do we handle that? */ /* can this be non-const buffer_index? how do we handle that? */
int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0]));
ssbo = create_immed(b, ibo_idx); ssbo = create_immed(b, ibo_idx);
byte_offset = ir3_get_src(ctx, &intr->src[1])[0]; byte_offset = ir3_get_src(ctx, &intr->src[1])[0];
@@ -262,7 +262,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned ncoords = ir3_get_image_coords(var, NULL);
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format);
/* src0 is value /* src0 is value
@@ -301,7 +301,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned ncoords = ir3_get_image_coords(var, NULL);
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
image = create_immed(b, ibo_idx); image = create_immed(b, ibo_idx);

View File

@@ -48,7 +48,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction *ldib; struct ir3_instruction *ldib;
/* can this be non-const buffer_index? how do we handle that? */ /* can this be non-const buffer_index? how do we handle that? */
int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0]));
offset = ir3_get_src(ctx, &intr->src[2])[0]; offset = ir3_get_src(ctx, &intr->src[2])[0];
@@ -77,7 +77,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
unsigned ncomp = ffs(~wrmask) - 1; unsigned ncomp = ffs(~wrmask) - 1;
/* can this be non-const buffer_index? how do we handle that? */ /* can this be non-const buffer_index? how do we handle that? */
int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[1])); int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1]));
/* src0 is offset, src1 is value: /* src0 is offset, src1 is value:
*/ */
@@ -119,7 +119,8 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
type_t type = TYPE_U32; type_t type = TYPE_U32;
/* can this be non-const buffer_index? how do we handle that? */ /* can this be non-const buffer_index? how do we handle that? */
int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader,
nir_src_as_uint(intr->src[0]));
ibo = create_immed(b, ibo_idx); ibo = create_immed(b, ibo_idx);
data = ir3_get_src(ctx, &intr->src[2])[0]; data = ir3_get_src(ctx, &intr->src[2])[0];
@@ -213,7 +214,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned ncoords = ir3_get_image_coords(var, NULL);
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format);
/* src0 is offset, src1 is value: /* src0 is offset, src1 is value:
@@ -242,7 +243,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0];
unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned ncoords = ir3_get_image_coords(var, NULL);
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
ibo = create_immed(b, ibo_idx); ibo = create_immed(b, ibo_idx);
@@ -383,7 +384,7 @@ get_atomic_dest_mov(struct ir3_instruction *atomic)
void void
ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so)
{ {
if (so->image_mapping.num_ibo == 0) if (ir3_shader_nibo(so) == 0)
return; return;
foreach_block (block, &ir->block_list) { foreach_block (block, &ir->block_list) {

View File

@@ -35,20 +35,14 @@ void
ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures) ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures)
{ {
memset(mapping, IBO_INVALID, sizeof(*mapping)); memset(mapping, IBO_INVALID, sizeof(*mapping));
mapping->num_ibo = 0;
mapping->num_tex = 0; mapping->num_tex = 0;
mapping->tex_base = num_textures; mapping->tex_base = num_textures;
} }
unsigned unsigned
ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo) ir3_ssbo_to_ibo(struct ir3_shader *shader, unsigned ssbo)
{ {
if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) { return ssbo;
unsigned ibo = mapping->num_ibo++;
mapping->ssbo_to_ibo[ssbo] = ibo;
mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo;
}
return mapping->ssbo_to_ibo[ssbo];
} }
unsigned unsigned
@@ -63,14 +57,9 @@ ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo)
} }
unsigned unsigned
ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image) ir3_image_to_ibo(struct ir3_shader *shader, unsigned image)
{ {
if (mapping->image_to_ibo[image] == IBO_INVALID) { return shader->nir->info.num_ssbos + image;
unsigned ibo = mapping->num_ibo++;
mapping->image_to_ibo[image] = ibo;
mapping->ibo_to_image[ibo] = image;
}
return mapping->image_to_ibo[image];
} }
unsigned unsigned

View File

@@ -31,9 +31,9 @@
void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures); void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures);
unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo); unsigned ir3_ssbo_to_ibo(struct ir3_shader *shader, unsigned ssbo);
unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo); unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image); unsigned ir3_image_to_ibo(struct ir3_shader *shader, unsigned image);
unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image); unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);
unsigned ir3_get_image_slot(nir_deref_instr *deref); unsigned ir3_get_image_slot(nir_deref_instr *deref);

View File

@@ -427,12 +427,10 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
*/ */
struct ir3_ibo_mapping { struct ir3_ibo_mapping {
#define IBO_INVALID 0xff #define IBO_INVALID 0xff
/* Maps logical SSBO state to hw state: */ /* Maps logical SSBO state to hw tex state: */
uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS];
uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS]; uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS];
/* Maps logical Image state to hw state: */ /* Maps logical Image state to hw tex state: */
uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES];
uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES]; uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES];
/* Maps hw state back to logical SSBO or Image state: /* Maps hw state back to logical SSBO or Image state:
@@ -441,10 +439,8 @@ struct ir3_ibo_mapping {
* hw slot is used for SSBO state vs Image state. * hw slot is used for SSBO state vs Image state.
*/ */
#define IBO_SSBO 0x80 #define IBO_SSBO 0x80
uint8_t ibo_to_image[32];
uint8_t tex_to_image[32]; uint8_t tex_to_image[32];
uint8_t num_ibo;
uint8_t num_tex; /* including real textures */ uint8_t num_tex; /* including real textures */
uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */ uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */
}; };
@@ -795,4 +791,14 @@ ir3_shader_halfregs(const struct ir3_shader_variant *v)
return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1); return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1);
} }
static inline uint32_t
ir3_shader_nibo(const struct ir3_shader_variant *v)
{
/* The dummy variant used in binning mode won't have an actual shader. */
if (!v->shader)
return 0;
return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images;
}
#endif /* IR3_SHADER_H_ */ #endif /* IR3_SHADER_H_ */

View File

@@ -2956,26 +2956,25 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd,
&pipeline->program.link[type]; &pipeline->program.link[type];
VkResult result; VkResult result;
if (link->image_mapping.num_ibo == 0) { unsigned num_desc = link->ssbo_map.num_desc;
if (num_desc == 0) {
*entry = (struct tu_cs_entry) {}; *entry = (struct tu_cs_entry) {};
return VK_SUCCESS; return VK_SUCCESS;
} }
struct ts_cs_memory ibo_const; struct ts_cs_memory ibo_const;
result = tu_cs_alloc(device, draw_state, link->image_mapping.num_ibo, result = tu_cs_alloc(device, draw_state, num_desc,
A6XX_TEX_CONST_DWORDS, &ibo_const); A6XX_TEX_CONST_DWORDS, &ibo_const);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) { int ssbo_index = 0;
unsigned idx = link->image_mapping.ibo_to_image[i]; for (unsigned i = 0; i < link->ssbo_map.num; i++) {
uint32_t *dst = &ibo_const.map[A6XX_TEX_CONST_DWORDS * i]; for (int j = 0; j < link->ssbo_map.array_size[i]; j++) {
uint32_t *dst = &ibo_const.map[A6XX_TEX_CONST_DWORDS * ssbo_index];
if (idx & IBO_SSBO) { uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, i, j);
idx &= ~IBO_SSBO;
uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx,
0 /* XXX */);
/* We don't expose robustBufferAccess, so leave the size unlimited. */ /* We don't expose robustBufferAccess, so leave the size unlimited. */
uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4; uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;
@@ -2990,10 +2989,11 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd,
dst[5] = va >> 32; dst[5] = va >> 32;
for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++) for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
dst[i] = 0; dst[i] = 0;
} else {
tu_finishme("Emit images"); ssbo_index++;
} }
} }
assert(ssbo_index == num_desc);
struct tu_cs cs; struct tu_cs cs;
result = tu_cs_begin_sub_stream(device, draw_state, 7, &cs); result = tu_cs_begin_sub_stream(device, draw_state, 7, &cs);
@@ -3027,7 +3027,7 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd,
CP_LOAD_STATE6_0_STATE_TYPE(st) | CP_LOAD_STATE6_0_STATE_TYPE(st) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(sb) | CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo)); CP_LOAD_STATE6_0_NUM_UNIT(num_desc));
tu_cs_emit_qw(&cs, ibo_const.iova); /* SRC_ADDR_LO/HI */ tu_cs_emit_qw(&cs, ibo_const.iova); /* SRC_ADDR_LO/HI */
tu_cs_emit_pkt4(&cs, ibo_addr_reg, 2); tu_cs_emit_pkt4(&cs, ibo_addr_reg, 2);

View File

@@ -358,6 +358,15 @@ tu6_blend_op(VkBlendOp op)
} }
} }
static unsigned
tu_shader_nibo(const struct tu_shader *shader)
{
/* In tu_cmd_buffer.c we emit the SSBO's IBOS, but not yet storage image
* IBOs.
*/
return shader->ssbo_map.num_desc;
}
static void static void
tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader, tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader,
const struct ir3_shader_variant *vs) const struct ir3_shader_variant *vs)
@@ -457,7 +466,7 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader,
uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) | uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) | A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) |
A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo); A6XX_SP_FS_CONFIG_NIBO(tu_shader_nibo(shader));
if (fs->instrlen) if (fs->instrlen)
sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED;
@@ -479,7 +488,7 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader,
A6XX_HLSQ_FS_CNTL_ENABLED); A6XX_HLSQ_FS_CNTL_ENABLED);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_IBO_COUNT, 1); tu_cs_emit_pkt4(cs, REG_A6XX_SP_IBO_COUNT, 1);
tu_cs_emit(cs, fs->image_mapping.num_ibo); tu_cs_emit(cs, tu_shader_nibo(shader));
} }
static void static void
@@ -496,7 +505,7 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2); tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2);
tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED | tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED |
A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | A6XX_SP_CS_CONFIG_NIBO(tu_shader_nibo(shader)) |
A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) |
A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc)); A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc));
tu_cs_emit(cs, v->instrlen); tu_cs_emit(cs, v->instrlen);
@@ -525,7 +534,7 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
tu_cs_emit(cs, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */ tu_cs_emit(cs, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_IBO_COUNT, 1); tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_IBO_COUNT, 1);
tu_cs_emit(cs, v->image_mapping.num_ibo); tu_cs_emit(cs, tu_shader_nibo(shader));
} }
static void static void

View File

@@ -294,6 +294,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
tu_finishme("non-constant vulkan_resource_index array index"); tu_finishme("non-constant vulkan_resource_index array index");
index = map_add(&shader->ssbo_map, set, binding, 0, index = map_add(&shader->ssbo_map, set, binding, 0,
binding_layout->array_size); binding_layout->array_size);
index += const_val->u32;
break; break;
default: default:
tu_finishme("unsupported desc_type for vulkan_resource_index"); tu_finishme("unsupported desc_type for vulkan_resource_index");
@@ -345,6 +346,12 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
progress |= lower_impl(function->impl, tu_shader, layout); progress |= lower_impl(function->impl, tu_shader, layout);
} }
/* spirv_to_nir produces num_ssbos equal to the number of SSBO-containing
* variables, while ir3 wants the number of descriptors (like the gallium
* path).
*/
shader->info.num_ssbos = tu_shader->ssbo_map.num_desc;
return progress; return progress;
} }

View File

@@ -400,13 +400,10 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct ir3_shader_variant *v) const struct ir3_shader_variant *v)
{ {
unsigned count = util_last_bit(so->enabled_mask); unsigned count = util_last_bit(so->enabled_mask);
const struct ir3_ibo_mapping *m = &v->image_mapping;
for (unsigned i = 0; i < count; i++) { for (unsigned i = 0; i < count; i++) {
unsigned slot = m->ssbo_to_ibo[i];
OUT_PKT7(ring, CP_LOAD_STATE4, 5); OUT_PKT7(ring, CP_LOAD_STATE4, 5);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
CP_LOAD_STATE4_0_NUM_UNIT(1)); CP_LOAD_STATE4_0_NUM_UNIT(1));
@@ -424,7 +421,7 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
OUT_PKT7(ring, CP_LOAD_STATE4, 5); OUT_PKT7(ring, CP_LOAD_STATE4, 5);
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
CP_LOAD_STATE4_0_NUM_UNIT(1)); CP_LOAD_STATE4_0_NUM_UNIT(1));

View File

@@ -210,6 +210,6 @@ fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
translate_image(&img, &so->si[index]); translate_image(&img, &so->si[index]);
emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader); emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
emit_image_ssbo(ring, m->image_to_ibo[index], &img, shader); emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader);
} }
} }

View File

@@ -86,7 +86,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos +
v->shader->nir->info.num_images) |
A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */
OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */

View File

@@ -1142,11 +1142,11 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
emit_border_color(ctx, ring); emit_border_color(ctx, ring);
if (hs) { if (hs) {
debug_assert(hs->image_mapping.num_ibo == 0); debug_assert(ir3_shader_nibo(hs) == 0);
debug_assert(ds->image_mapping.num_ibo == 0); debug_assert(ir3_shader_nibo(ds) == 0);
} }
if (gs) { if (gs) {
debug_assert(gs->image_mapping.num_ibo == 0); debug_assert(ir3_shader_nibo(gs) == 0);
} }
#define DIRTY_IBO (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE | \ #define DIRTY_IBO (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE | \
@@ -1156,14 +1156,13 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_build_ibo_state(ctx, fs, PIPE_SHADER_FRAGMENT); fd6_build_ibo_state(ctx, fs, PIPE_SHADER_FRAGMENT);
struct fd_ringbuffer *obj = fd_submit_new_ringbuffer( struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(
ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING); ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);
const struct ir3_ibo_mapping *mapping = &fs->image_mapping;
OUT_PKT7(obj, CP_LOAD_STATE6, 3); OUT_PKT7(obj, CP_LOAD_STATE6, 3);
OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) | OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(fs)));
OUT_RB(obj, state); OUT_RB(obj, state);
OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2);
@@ -1173,7 +1172,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
* de-duplicate this from program->config_stateobj * de-duplicate this from program->config_stateobj
*/ */
OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1);
OUT_RING(obj, mapping->num_ibo); OUT_RING(obj, ir3_shader_nibo(fs));
ir3_emit_ssbo_sizes(ctx->screen, fs, obj, ir3_emit_ssbo_sizes(ctx->screen, fs, obj,
&ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
@@ -1250,21 +1249,20 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {
struct fd_ringbuffer *state = struct fd_ringbuffer *state =
fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE); fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE);
const struct ir3_ibo_mapping *mapping = &cp->image_mapping;
OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp)));
OUT_RB(ring, state); OUT_RB(ring, state);
OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_LO, 2); OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_LO, 2);
OUT_RB(ring, state); OUT_RB(ring, state);
OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1);
OUT_RING(ring, mapping->num_ibo); OUT_RING(ring, ir3_shader_nibo(cp));
fd_ringbuffer_del(state); fd_ringbuffer_del(state);
} }

View File

@@ -232,6 +232,15 @@ fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *p
static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
{ {
/* If the SSBO isn't present (becasue gallium doesn't pack atomic
* counters), zero-fill the slot.
*/
if (!img->prsc) {
for (int i = 0; i < 16; i++)
OUT_RING(ring, 0);
return;
}
struct fd_resource *rsc = fd_resource(img->prsc); struct fd_resource *rsc = fd_resource(img->prsc);
enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level); enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level);
bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level); bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level);
@@ -280,24 +289,24 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
{ {
struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader]; struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader]; struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
const struct ir3_ibo_mapping *mapping = &v->image_mapping;
struct fd_ringbuffer *state = struct fd_ringbuffer *state =
fd_submit_new_ringbuffer(ctx->batch->submit, fd_submit_new_ringbuffer(ctx->batch->submit,
mapping->num_ibo * 16 * 4, FD_RINGBUFFER_STREAMING); (v->shader->nir->info.num_ssbos +
v->shader->nir->info.num_images) * 16 * 4,
FD_RINGBUFFER_STREAMING);
assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
for (unsigned i = 0; i < mapping->num_ibo; i++) { for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) {
struct fd6_image img; struct fd6_image img;
unsigned idx = mapping->ibo_to_image[i]; translate_buf(&img, &bufso->sb[i]);
emit_image_ssbo(state, &img);
if (idx & IBO_SSBO) { }
translate_buf(&img, &bufso->sb[idx & ~IBO_SSBO]);
} else {
translate_image(&img, &imgso->si[idx]);
}
for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) {
struct fd6_image img;
translate_image(&img, &imgso->si[i]);
emit_image_ssbo(state, &img); emit_image_ssbo(state, &img);
} }

View File

@@ -221,39 +221,39 @@ setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *stat
OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1);
OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) | OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) |
A6XX_SP_VS_CONFIG_NIBO(state->vs->image_mapping.num_ibo) | A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) |
A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) | A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) |
A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp)); A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp));
OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1); OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1);
OUT_RING(ring, COND(state->hs, OUT_RING(ring, COND(state->hs,
A6XX_SP_HS_CONFIG_ENABLED | A6XX_SP_HS_CONFIG_ENABLED |
A6XX_SP_HS_CONFIG_NIBO(state->hs->image_mapping.num_ibo) | A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) |
A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) | A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) |
A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp))); A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp)));
OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1); OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1);
OUT_RING(ring, COND(state->ds, OUT_RING(ring, COND(state->ds,
A6XX_SP_DS_CONFIG_ENABLED | A6XX_SP_DS_CONFIG_ENABLED |
A6XX_SP_DS_CONFIG_NIBO(state->ds->image_mapping.num_ibo) | A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) |
A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) | A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) |
A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp))); A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp)));
OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1); OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1);
OUT_RING(ring, COND(state->gs, OUT_RING(ring, COND(state->gs,
A6XX_SP_GS_CONFIG_ENABLED | A6XX_SP_GS_CONFIG_ENABLED |
A6XX_SP_GS_CONFIG_NIBO(state->gs->image_mapping.num_ibo) | A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) |
A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) | A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) |
A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp))); A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp)));
OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1); OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1);
OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) | OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) |
A6XX_SP_FS_CONFIG_NIBO(state->fs->image_mapping.num_ibo) | A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) |
A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) | A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) |
A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp)); A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp));
OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1); OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
OUT_RING(ring, state->fs->image_mapping.num_ibo); OUT_RING(ring, ir3_shader_nibo(state->fs));
} }
static inline uint32_t static inline uint32_t