diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index dd654bacc7c..264b22fd430 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -43,7 +43,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *ldgb, *src0, *src1, *byte_offset, *offset; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0])); byte_offset = ir3_get_src(ctx, &intr->src[1])[0]; offset = ir3_get_src(ctx, &intr->src[2])[0]; @@ -81,7 +81,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) unsigned ncomp = ffs(~wrmask) - 1; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[1])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1])); byte_offset = ir3_get_src(ctx, &intr->src[2])[0]; offset = ir3_get_src(ctx, &intr->src[3])[0]; @@ -132,7 +132,7 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) type_t type = TYPE_U32; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0])); ssbo = create_immed(b, ibo_idx); byte_offset = ir3_get_src(ctx, &intr->src[1])[0]; @@ -262,7 +262,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); /* src0 is value @@ -301,7 +301,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); image = create_immed(b, ibo_idx); diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index fe3355bf2eb..b75489b6b6a 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -48,7 +48,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *ldib; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0])); offset = ir3_get_src(ctx, &intr->src[2])[0]; @@ -77,7 +77,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) unsigned ncomp = ffs(~wrmask) - 1; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[1])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1])); /* src0 is offset, src1 is value: */ @@ -119,7 +119,8 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) type_t type = TYPE_U32; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, + nir_src_as_uint(intr->src[0])); ibo = create_immed(b, ibo_idx); data = ir3_get_src(ctx, &intr->src[2])[0]; @@ -213,7 +214,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); /* src0 is offset, src1 is value: @@ -242,7 +243,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); ibo = create_immed(b, ibo_idx); @@ -383,7 +384,7 @@ get_atomic_dest_mov(struct ir3_instruction *atomic) void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) { - if (so->image_mapping.num_ibo == 0) + if (ir3_shader_nibo(so) == 0) return; foreach_block (block, &ir->block_list) { diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c index 60c71901c9c..6dabf6c0376 100644 --- a/src/freedreno/ir3/ir3_image.c +++ b/src/freedreno/ir3/ir3_image.c @@ -35,20 +35,14 @@ void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures) { memset(mapping, IBO_INVALID, sizeof(*mapping)); - mapping->num_ibo = 0; mapping->num_tex = 0; mapping->tex_base = num_textures; } unsigned -ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo) +ir3_ssbo_to_ibo(struct ir3_shader *shader, unsigned ssbo) { - if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) { - unsigned ibo = mapping->num_ibo++; - mapping->ssbo_to_ibo[ssbo] = ibo; - mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo; - } - return mapping->ssbo_to_ibo[ssbo]; + return ssbo; } unsigned @@ -63,14 +57,9 @@ ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo) } unsigned -ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image) +ir3_image_to_ibo(struct ir3_shader *shader, unsigned image) { - if (mapping->image_to_ibo[image] == IBO_INVALID) { - unsigned ibo = mapping->num_ibo++; - mapping->image_to_ibo[image] = ibo; - mapping->ibo_to_image[ibo] = image; - } - return mapping->image_to_ibo[image]; + return shader->nir->info.num_ssbos + image; } unsigned diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h index c89e581eef8..b0e0959b157 100644 --- a/src/freedreno/ir3/ir3_image.h +++ b/src/freedreno/ir3/ir3_image.h @@ -31,9 +31,9 @@ void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures); -unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo); +unsigned ir3_ssbo_to_ibo(struct ir3_shader *shader, unsigned ssbo); unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo); -unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image); +unsigned ir3_image_to_ibo(struct ir3_shader *shader, unsigned image); unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image); unsigned ir3_get_image_slot(nir_deref_instr *deref); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 528764b0e27..f056a3e5cd6 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -427,12 +427,10 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type) */ struct ir3_ibo_mapping { #define IBO_INVALID 0xff - /* Maps logical SSBO state to hw state: */ - uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS]; + /* Maps logical SSBO state to hw tex state: */ uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS]; - /* Maps logical Image state to hw state: */ - uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES]; + /* Maps logical Image state to hw tex state: */ uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES]; /* Maps hw state back to logical SSBO or Image state: @@ -441,10 +439,8 @@ struct ir3_ibo_mapping { * hw slot is used for SSBO state vs Image state. */ #define IBO_SSBO 0x80 - uint8_t ibo_to_image[32]; uint8_t tex_to_image[32]; - uint8_t num_ibo; uint8_t num_tex; /* including real textures */ uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */ }; @@ -795,4 +791,14 @@ ir3_shader_halfregs(const struct ir3_shader_variant *v) return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1); } +static inline uint32_t +ir3_shader_nibo(const struct ir3_shader_variant *v) +{ + /* The dummy variant used in binning mode won't have an actual shader. */ + if (!v->shader) + return 0; + + return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images; +} + #endif /* IR3_SHADER_H_ */ diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 516133def56..c922c34a275 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2956,26 +2956,25 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, &pipeline->program.link[type]; VkResult result; - if (link->image_mapping.num_ibo == 0) { + unsigned num_desc = link->ssbo_map.num_desc; + + if (num_desc == 0) { *entry = (struct tu_cs_entry) {}; return VK_SUCCESS; } struct ts_cs_memory ibo_const; - result = tu_cs_alloc(device, draw_state, link->image_mapping.num_ibo, + result = tu_cs_alloc(device, draw_state, num_desc, A6XX_TEX_CONST_DWORDS, &ibo_const); if (result != VK_SUCCESS) return result; - for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) { - unsigned idx = link->image_mapping.ibo_to_image[i]; - uint32_t *dst = &ibo_const.map[A6XX_TEX_CONST_DWORDS * i]; + int ssbo_index = 0; + for (unsigned i = 0; i < link->ssbo_map.num; i++) { + for (int j = 0; j < link->ssbo_map.array_size[i]; j++) { + uint32_t *dst = &ibo_const.map[A6XX_TEX_CONST_DWORDS * ssbo_index]; - if (idx & IBO_SSBO) { - idx &= ~IBO_SSBO; - - uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx, - 0 /* XXX */); + uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, i, j); /* We don't expose robustBufferAccess, so leave the size unlimited. */ uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4; @@ -2990,10 +2989,11 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, dst[5] = va >> 32; for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++) dst[i] = 0; - } else { - tu_finishme("Emit images"); + + ssbo_index++; } } + assert(ssbo_index == num_desc); struct tu_cs cs; result = tu_cs_begin_sub_stream(device, draw_state, 7, &cs); @@ -3027,7 +3027,7 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, CP_LOAD_STATE6_0_STATE_TYPE(st) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo)); + CP_LOAD_STATE6_0_NUM_UNIT(num_desc)); tu_cs_emit_qw(&cs, ibo_const.iova); /* SRC_ADDR_LO/HI */ tu_cs_emit_pkt4(&cs, ibo_addr_reg, 2); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index f170fa1958b..1b43264b5bc 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -358,6 +358,15 @@ tu6_blend_op(VkBlendOp op) } } +static unsigned +tu_shader_nibo(const struct tu_shader *shader) +{ + /* In tu_cmd_buffer.c we emit the SSBO's IBOS, but not yet storage image + * IBOs. + */ + return shader->ssbo_map.num_desc; +} + static void tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader, const struct ir3_shader_variant *vs) @@ -457,7 +466,7 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) | - A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo); + A6XX_SP_FS_CONFIG_NIBO(tu_shader_nibo(shader)); if (fs->instrlen) sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; @@ -479,7 +488,7 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, A6XX_HLSQ_FS_CNTL_ENABLED); tu_cs_emit_pkt4(cs, REG_A6XX_SP_IBO_COUNT, 1); - tu_cs_emit(cs, fs->image_mapping.num_ibo); + tu_cs_emit(cs, tu_shader_nibo(shader)); } static void @@ -496,7 +505,7 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2); tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | + A6XX_SP_CS_CONFIG_NIBO(tu_shader_nibo(shader)) | A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc)); tu_cs_emit(cs, v->instrlen); @@ -525,7 +534,7 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, tu_cs_emit(cs, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */ tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_IBO_COUNT, 1); - tu_cs_emit(cs, v->image_mapping.num_ibo); + tu_cs_emit(cs, tu_shader_nibo(shader)); } static void diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 1fc9da9a779..0b755a99f8e 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -294,6 +294,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, tu_finishme("non-constant vulkan_resource_index array index"); index = map_add(&shader->ssbo_map, set, binding, 0, binding_layout->array_size); + index += const_val->u32; break; default: tu_finishme("unsupported desc_type for vulkan_resource_index"); @@ -345,6 +346,12 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, progress |= lower_impl(function->impl, tu_shader, layout); } + /* spirv_to_nir produces num_ssbos equal to the number of SSBO-containing + * variables, while ir3 wants the number of descriptors (like the gallium + * path). + */ + shader->info.num_ssbos = tu_shader->ssbo_map.num_desc; + return progress; } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 486657d92f4..9b337d33de2 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -400,13 +400,10 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_shader_variant *v) { unsigned count = util_last_bit(so->enabled_mask); - const struct ir3_ibo_mapping *m = &v->image_mapping; for (unsigned i = 0; i < count; i++) { - unsigned slot = m->ssbo_to_ibo[i]; - OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_NUM_UNIT(1)); @@ -424,7 +421,7 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_NUM_UNIT(1)); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c index e46a21c4523..4da1f16385a 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c @@ -210,6 +210,6 @@ fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, translate_image(&img, &so->si[index]); emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader); - emit_image_ssbo(ring, m->image_to_ibo[index], &img, shader); + emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader); } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 2e6a7fd21b0..36ae9f5b86d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -86,7 +86,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | + A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos + + v->shader->nir->info.num_images) | A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 7b1b88cee7e..9fb0125409e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1142,11 +1142,11 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) emit_border_color(ctx, ring); if (hs) { - debug_assert(hs->image_mapping.num_ibo == 0); - debug_assert(ds->image_mapping.num_ibo == 0); + debug_assert(ir3_shader_nibo(hs) == 0); + debug_assert(ir3_shader_nibo(ds) == 0); } if (gs) { - debug_assert(gs->image_mapping.num_ibo == 0); + debug_assert(ir3_shader_nibo(gs) == 0); } #define DIRTY_IBO (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE | \ @@ -1156,14 +1156,13 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_build_ibo_state(ctx, fs, PIPE_SHADER_FRAGMENT); struct fd_ringbuffer *obj = fd_submit_new_ringbuffer( ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING); - const struct ir3_ibo_mapping *mapping = &fs->image_mapping; OUT_PKT7(obj, CP_LOAD_STATE6, 3); OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) | - CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); + CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(fs))); OUT_RB(obj, state); OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); @@ -1173,7 +1172,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) * de-duplicate this from program->config_stateobj */ OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); - OUT_RING(obj, mapping->num_ibo); + OUT_RING(obj, ir3_shader_nibo(fs)); ir3_emit_ssbo_sizes(ctx->screen, fs, obj, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); @@ -1250,21 +1249,20 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { struct fd_ringbuffer *state = fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE); - const struct ir3_ibo_mapping *mapping = &cp->image_mapping; OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); + CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp))); OUT_RB(ring, state); OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_LO, 2); OUT_RB(ring, state); OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); - OUT_RING(ring, mapping->num_ibo); + OUT_RING(ring, ir3_shader_nibo(cp)); fd_ringbuffer_del(state); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index 8cc95b7cb5b..7a126ddf7b9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -232,6 +232,15 @@ fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *p static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) { + /* If the SSBO isn't present (becasue gallium doesn't pack atomic + * counters), zero-fill the slot. + */ + if (!img->prsc) { + for (int i = 0; i < 16; i++) + OUT_RING(ring, 0); + return; + } + struct fd_resource *rsc = fd_resource(img->prsc); enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level); bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level); @@ -280,24 +289,24 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, { struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader]; struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader]; - const struct ir3_ibo_mapping *mapping = &v->image_mapping; struct fd_ringbuffer *state = fd_submit_new_ringbuffer(ctx->batch->submit, - mapping->num_ibo * 16 * 4, FD_RINGBUFFER_STREAMING); + (v->shader->nir->info.num_ssbos + + v->shader->nir->info.num_images) * 16 * 4, + FD_RINGBUFFER_STREAMING); assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); - for (unsigned i = 0; i < mapping->num_ibo; i++) { + for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) { struct fd6_image img; - unsigned idx = mapping->ibo_to_image[i]; - - if (idx & IBO_SSBO) { - translate_buf(&img, &bufso->sb[idx & ~IBO_SSBO]); - } else { - translate_image(&img, &imgso->si[idx]); - } + translate_buf(&img, &bufso->sb[i]); + emit_image_ssbo(state, &img); + } + for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) { + struct fd6_image img; + translate_image(&img, &imgso->si[i]); emit_image_ssbo(state, &img); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 14b57bfb238..ffd633aa6c0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -221,39 +221,39 @@ setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *stat OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) | - A6XX_SP_VS_CONFIG_NIBO(state->vs->image_mapping.num_ibo) | + A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) | A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) | A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp)); OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1); OUT_RING(ring, COND(state->hs, A6XX_SP_HS_CONFIG_ENABLED | - A6XX_SP_HS_CONFIG_NIBO(state->hs->image_mapping.num_ibo) | + A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) | A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) | A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp))); OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1); OUT_RING(ring, COND(state->ds, A6XX_SP_DS_CONFIG_ENABLED | - A6XX_SP_DS_CONFIG_NIBO(state->ds->image_mapping.num_ibo) | + A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) | A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) | A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp))); OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1); OUT_RING(ring, COND(state->gs, A6XX_SP_GS_CONFIG_ENABLED | - A6XX_SP_GS_CONFIG_NIBO(state->gs->image_mapping.num_ibo) | + A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) | A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) | A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp))); OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1); OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) | - A6XX_SP_FS_CONFIG_NIBO(state->fs->image_mapping.num_ibo) | + A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) | A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) | A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp)); OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1); - OUT_RING(ring, state->fs->image_mapping.num_ibo); + OUT_RING(ring, ir3_shader_nibo(state->fs)); } static inline uint32_t