diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 8d3ba1381d5..738bc53ba97 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -281,7 +281,7 @@ static void r300_clear(struct pipe_context* pipe, if (!r300->hyperz_enabled && (r300->screen->caps.is_r500 || debug_get_option_hyperz())) { r300->hyperz_enabled = - r300->rws->cs_request_feature(r300->cs, + r300->rws->cs_request_feature(&r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, TRUE); if (r300->hyperz_enabled) { @@ -319,7 +319,7 @@ static void r300_clear(struct pipe_context* pipe, /* Try to obtain the access to the CMASK if we don't have one. */ if (!r300->cmask_access) { r300->cmask_access = - r300->rws->cs_request_feature(r300->cs, + r300->rws->cs_request_feature(&r300->cs, RADEON_FID_R300_CMASK_ACCESS, TRUE); } @@ -384,7 +384,7 @@ static void r300_clear(struct pipe_context* pipe, r300_get_num_cs_end_dwords(r300); /* Reserve CS space. */ - if (!r300->rws->cs_check_space(r300->cs, dwords, false)) { + if (!r300->rws->cs_check_space(&r300->cs, dwords, false)) { r300_flush(&r300->context, PIPE_FLUSH_ASYNC, NULL); } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 114f74ead24..5a9bae09210 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -74,11 +74,11 @@ static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); - if (r300->cs && r300->hyperz_enabled) { - r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); + if (r300->cs.priv && r300->hyperz_enabled) { + r300->rws->cs_request_feature(&r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } - if (r300->cs && r300->cmask_access) { - r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE); + if (r300->cs.priv && r300->cmask_access) { + r300->rws->cs_request_feature(&r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE); } if (r300->blitter) @@ -94,8 +94,7 @@ static void r300_destroy_context(struct pipe_context* context) /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); - if (r300->cs) - r300->rws->cs_destroy(r300->cs); + r300->rws->cs_destroy(&r300->cs); if (r300->ctx) r300->rws->ctx_destroy(r300->ctx); @@ -393,8 +392,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300->ctx) goto fail; - r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, false); - if (r300->cs == NULL) + + if (!rws->cs_create(&r300->cs, r300->ctx, RING_GFX, r300_flush_callback, r300, false)) goto fail; if (!r300screen->caps.has_tcl) { diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e63beee26b0..e7e6f331c0e 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -449,7 +449,7 @@ struct r300_context { /* The submission context. */ struct radeon_winsys_ctx *ctx; /* The command stream. */ - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; /* Screen. */ struct r300_screen *screen; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 560b77533cf..dc17d0cb1d8 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -39,7 +39,7 @@ */ #define CS_LOCALS(context) \ - struct radeon_cmdbuf *cs_copy = (context)->cs; \ + struct radeon_cmdbuf *cs_copy = &(context)->cs; \ struct radeon_winsys *cs_winsys = (context)->rws; \ int cs_count = 0; (void) cs_count; (void) cs_winsys; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index fed3f71e5da..b1c1d36d8a7 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1051,7 +1051,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) assert(r300->vbo); OUT_CS(0xc0001000); /* PKT3_NOP */ - OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo) * 4); + OUT_CS(r300->rws->cs_lookup_buffer(&r300->cs, r300->vbo) * 4); END_CS; } @@ -1322,7 +1322,7 @@ validate: continue; tex = r300_resource(fb->cbufs[i]->texture); assert(tex && tex->buf && "cbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->buf, + r300->rws->cs_add_buffer(&r300->cs, tex->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, r300_surface(fb->cbufs[i])->domain, tex->b.b.nr_samples > 1 ? @@ -1333,7 +1333,7 @@ validate: if (fb->zsbuf) { tex = r300_resource(fb->zsbuf->texture); assert(tex && tex->buf && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->buf, + r300->rws->cs_add_buffer(&r300->cs, tex->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, r300_surface(fb->zsbuf)->domain, tex->b.b.nr_samples > 1 ? @@ -1344,7 +1344,7 @@ validate: /* The AA resolve buffer. */ if (r300->aa_state.dirty) { if (aa->dest) { - r300->rws->cs_add_buffer(r300->cs, aa->dest->buf, + r300->rws->cs_add_buffer(&r300->cs, aa->dest->buf, RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, aa->dest->domain, RADEON_PRIO_COLOR_BUFFER); @@ -1358,20 +1358,20 @@ validate: } tex = r300_resource(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->buf, + r300->rws->cs_add_buffer(&r300->cs, tex->buf, RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf, + r300->rws->cs_add_buffer(&r300->cs, r300->query_current->buf, RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_GTT, RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_buffer(r300->cs, r300->vbo, + r300->rws->cs_add_buffer(&r300->cs, r300->vbo, RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_GTT, RADEON_PRIO_VERTEX_BUFFER); @@ -1387,7 +1387,7 @@ validate: if (!buf) continue; - r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf, + r300->rws->cs_add_buffer(&r300->cs, r300_resource(buf)->buf, RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, r300_resource(buf)->domain, RADEON_PRIO_SAMPLER_BUFFER); @@ -1395,13 +1395,13 @@ validate: } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf, + r300->rws->cs_add_buffer(&r300->cs, r300_resource(index_buffer)->buf, RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, r300_resource(index_buffer)->domain, RADEON_PRIO_INDEX_BUFFER); /* Now do the validation (flush is called inside cs_validate on failure). */ - if (!r300->rws->cs_validate(r300->cs)) { + if (!r300->rws->cs_validate(&r300->cs)) { /* Ooops, an infinite loop, give up. */ if (flushed) return FALSE; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index f6c1bf32ca2..5a172baf4bc 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -53,7 +53,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags, } r300->flush_counter++; - r300->rws->cs_flush(r300->cs, flags, fence); + r300->rws->cs_flush(&r300->cs, flags, fence); r300->dirty_hw = 0; /* New kitchen sink, baby. */ @@ -86,11 +86,11 @@ void r300_flush(struct pipe_context *pipe, * and we cannot emit an empty CS. Let's write to some reg. */ CS_LOCALS(r300); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); - r300->rws->cs_flush(r300->cs, flags, fence); + r300->rws->cs_flush(&r300->cs, flags, fence); } else { /* Even if hw is not dirty, we should at least reset the CS in case * the space checking failed for the first draw operation. */ - r300->rws->cs_flush(r300->cs, flags, NULL); + r300->rws->cs_flush(&r300->cs, flags, NULL); } } @@ -118,7 +118,7 @@ void r300_flush(struct pipe_context *pipe, } /* Revoke Hyper-Z access, so that some other process can take it. */ - r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, + r300->rws->cs_request_feature(&r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); r300->hyperz_enabled = FALSE; } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 842b9219a66..d0987ad0ca4 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -159,7 +159,7 @@ static bool r300_get_query_result(struct pipe_context* pipe, return vresult->b; } - map = r300->rws->buffer_map(q->buf, r300->cs, + map = r300->rws->buffer_map(q->buf, &r300->cs, PIPE_MAP_READ | (!wait ? PIPE_MAP_DONTBLOCK : 0)); if (!map) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c38a78e04e5..567f468d1f3 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -215,7 +215,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ - if (!r300->rws->cs_check_space(r300->cs, cs_dwords, false)) { + if (!r300->rws->cs_check_space(&r300->cs, cs_dwords, false)) { r300_flush(&r300->context, PIPE_FLUSH_ASYNC, NULL); flushed = TRUE; } @@ -375,7 +375,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, if (!map[vbi]) { map[vbi] = (uint32_t*)r300->rws->buffer_map( r300_resource(vbuf->buffer.resource)->buf, - r300->cs, PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED); + &r300->cs, PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * draw->start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); @@ -611,7 +611,7 @@ static void r300_draw_elements(struct r300_context *r300, if (indexSize == 2 && (start & 1) && indexBuffer) { /* If we got here, then orgIndexBuffer == indexBuffer. */ uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf, - r300->cs, + &r300->cs, PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED); @@ -935,7 +935,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, return FALSE; } r300->draw_vbo_offset = 0; - r300render->vbo_ptr = rws->buffer_map(r300->vbo, r300->cs, + r300render->vbo_ptr = rws->buffer_map(r300->vbo, &r300->cs, PIPE_MAP_WRITE); } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 721d8c82b80..461a017a049 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -95,7 +95,7 @@ r300_buffer_transfer_map( struct pipe_context *context, assert(usage & PIPE_MAP_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, RADEON_USAGE_READWRITE) || + if (r300->rws->cs_is_buffer_referenced(&r300->cs, rbuf->buf, RADEON_USAGE_READWRITE) || !r300->rws->buffer_wait(rbuf->buf, 0, RADEON_USAGE_READWRITE)) { unsigned i; struct pb_buffer *new_buf; @@ -127,7 +127,7 @@ r300_buffer_transfer_map( struct pipe_context *context, usage |= PIPE_MAP_UNSYNCHRONIZED; } - map = rws->buffer_map(rbuf->buf, r300->cs, usage); + map = rws->buffer_map(rbuf->buf, &r300->cs, usage); if (!map) { slab_free(&r300->pool_transfers, transfer); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index e438923e89f..292de41dd82 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -115,7 +115,7 @@ r300_texture_transfer_map(struct pipe_context *ctx, char *map; referenced_cs = - r300->rws->cs_is_buffer_referenced(r300->cs, tex->buf, RADEON_USAGE_READWRITE); + r300->rws->cs_is_buffer_referenced(&r300->cs, tex->buf, RADEON_USAGE_READWRITE); if (referenced_cs) { referenced_hw = TRUE; } else { @@ -219,7 +219,7 @@ r300_texture_transfer_map(struct pipe_context *ctx, /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ map = r300->rws->buffer_map(trans->linear_texture->buf, - r300->cs, usage); + &r300->cs, usage); if (!map) { pipe_resource_reference( (struct pipe_resource**)&trans->linear_texture, NULL); @@ -230,7 +230,7 @@ r300_texture_transfer_map(struct pipe_context *ctx, return map; } else { /* Tiling is disabled. */ - map = r300->rws->buffer_map(tex->buf, r300->cs, usage); + map = r300->rws->buffer_map(tex->buf, &r300->cs, usage); if (!map) { FREE(trans); return NULL; diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 0397c44326f..e5c30e095a5 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -598,7 +598,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx, uint32_t indirect_grid[3]) { int i; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; unsigned num_waves; @@ -678,7 +678,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx, static void compute_setup_cbs(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned i; /* Emit colorbuffers. */ @@ -720,7 +720,7 @@ static void compute_setup_cbs(struct r600_context *rctx) static void compute_emit_cs(struct r600_context *rctx, const struct pipe_grid_info *info) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; bool compute_dirty = false; struct r600_pipe_shader *current; struct r600_shader_atomic combined_atomics[8]; @@ -728,7 +728,7 @@ static void compute_emit_cs(struct r600_context *rctx, uint32_t indirect_grid[3] = { 0, 0, 0 }; /* make sure that the gfx ring is only one active */ - if (radeon_emitted(rctx->b.dma.cs, 0)) { + if (radeon_emitted(&rctx->b.dma.cs, 0)) { rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL); } @@ -890,7 +890,7 @@ void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint64_t va; struct r600_resource *code_bo; unsigned ngpr, nstack; diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index da8553886ce..54bd19fbc3c 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, uint64_t src_offset, uint64_t size) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; unsigned i, ncopy, csize, sub_cmd, shift; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; @@ -85,7 +85,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, unsigned size, uint32_t clear_value, enum r600_coherency coher) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f9c71668fbf..3d8088ac60b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -975,7 +975,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx, static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_config_state *a = (struct r600_config_state*)atom; radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3); @@ -1002,7 +1002,7 @@ static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_a static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4); @@ -1658,7 +1658,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx, static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned max_dist = 0; switch (nr_samples) { @@ -1707,7 +1707,7 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at { struct r600_image_state *state = (struct r600_image_state *)atom; struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_texture *rtex; struct r600_resource *resource; int i; @@ -1834,7 +1834,7 @@ static void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struc static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; unsigned i, tl, br; @@ -1973,7 +1973,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -2031,7 +2031,7 @@ uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; unsigned fb_colormask = a->bound_cbufs_target_mask; unsigned ps_colormask = a->ps_color_export_mask; @@ -2046,7 +2046,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->db_htile_surface) { @@ -2069,7 +2069,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_count_control = 0; @@ -2124,7 +2124,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, unsigned resource_offset, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2183,7 +2183,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_const_cache, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2335,7 +2335,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2444,7 +2444,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, unsigned border_index_reg, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; union pipe_color_union border_color = {{0,0,0,1}}; union pipe_color_union *border_color_ptr = &border_color; @@ -2528,14 +2528,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, + radeon_set_context_reg(&rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) { struct r600_sample_mask *s = (struct r600_sample_mask*)a; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint16_t mask = s->sample_mask; radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); @@ -2545,7 +2545,7 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; @@ -2562,7 +2562,7 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0; @@ -2666,7 +2666,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; struct r600_resource *rbuffer; @@ -3776,7 +3776,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, unsigned pitch, unsigned bpp) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; @@ -3898,7 +3898,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx, unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (rctx->b.dma.cs == NULL) { + if (rctx->b.dma.cs.priv == NULL) { goto fallback; } @@ -4752,7 +4752,7 @@ bool evergreen_adjust_gprs(struct r600_context *rctx) void eg_trace_emit(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned reloc; if (rctx->b.chip_class < EVERGREEN) @@ -4782,7 +4782,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, resource, RADEON_USAGE_READ, @@ -4805,7 +4805,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t event = EVENT_TYPE_PS_DONE; uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, @@ -4832,7 +4832,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t event = EVENT_TYPE_PS_DONE; uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, resource, @@ -4858,7 +4858,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, resource, RADEON_USAGE_READ, @@ -4953,7 +4953,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, struct r600_shader_atomic *combined_atomics, uint8_t *atomic_used_mask_p) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; uint32_t pkt_flags = 0; uint32_t event = EVENT_TYPE_PS_DONE; diff --git a/src/gallium/drivers/r600/r600_buffer_common.c b/src/gallium/drivers/r600/r600_buffer_common.c index dbf7757296f..070a4163ce7 100644 --- a/src/gallium/drivers/r600/r600_buffer_common.c +++ b/src/gallium/drivers/r600/r600_buffer_common.c @@ -34,11 +34,11 @@ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, struct pb_buffer *buf, enum radeon_bo_usage usage) { - if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) { + if (ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, buf, usage)) { return true; } - if (radeon_emitted(ctx->dma.cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) { + if (radeon_emitted(&ctx->dma.cs, 0) && + ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, buf, usage)) { return true; } return false; @@ -62,8 +62,8 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, rusage = RADEON_USAGE_WRITE; } - if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, + if (radeon_emitted(&ctx->gfx.cs, ctx->initial_gfx_cs_size) && + ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, resource->buf, rusage)) { if (usage & PIPE_MAP_DONTBLOCK) { ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); @@ -73,8 +73,8 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, busy = true; } } - if (radeon_emitted(ctx->dma.cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, + if (radeon_emitted(&ctx->dma.cs, 0) && + ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, resource->buf, rusage)) { if (usage & PIPE_MAP_DONTBLOCK) { ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); @@ -91,9 +91,9 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, } else { /* We will be wait for the GPU. Wait for any offloaded * CS flush to complete to avoid busy-waiting in the winsys. */ - ctx->ws->cs_sync_flush(ctx->gfx.cs); - if (ctx->dma.cs) - ctx->ws->cs_sync_flush(ctx->dma.cs); + ctx->ws->cs_sync_flush(&ctx->gfx.cs); + if (ctx->dma.cs.priv) + ctx->ws->cs_sync_flush(&ctx->dma.cs); } } @@ -334,7 +334,7 @@ static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); return rctx->screen->has_cp_dma || - (dword_aligned && (rctx->dma.cs || + (dword_aligned && (rctx->dma.cs.priv || rctx->screen->has_streamout)); } diff --git a/src/gallium/drivers/r600/r600_cs.h b/src/gallium/drivers/r600/r600_cs.h index 424adba2782..80f7162049c 100644 --- a/src/gallium/drivers/r600/r600_cs.h +++ b/src/gallium/drivers/r600/r600_cs.h @@ -74,7 +74,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct { assert(usage); return rctx->ws->cs_add_buffer( - ring->cs, rbo->buf, + &ring->cs, rbo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), rbo->domains, priority) * 4; } @@ -105,7 +105,7 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx, bool check_mem) { if (check_mem && - !radeon_cs_memory_below_limit(rctx->screen, ring->cs, + !radeon_cs_memory_below_limit(rctx->screen, &ring->cs, rctx->vram + rbo->vram_usage, rctx->gtt + rbo->gart_usage)) ring->flush(rctx, PIPE_FLUSH_ASYNC, NULL); @@ -118,7 +118,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx, enum radeon_bo_usage usage, enum radeon_bo_priority priority) { - struct radeon_cmdbuf *cs = ring->cs; + struct radeon_cmdbuf *cs = &ring->cs; bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_has_virtual_memory; unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index c7a53e5094c..fec8ed6d611 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -34,10 +34,10 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics) { /* Flush the DMA IB if it's not empty. */ - if (radeon_emitted(ctx->b.dma.cs, 0)) + if (radeon_emitted(&ctx->b.dma.cs, 0)) ctx->b.dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); - if (!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs, + if (!radeon_cs_memory_below_limit(ctx->b.screen, &ctx->b.gfx.cs, ctx->b.vram, ctx->b.gtt)) { ctx->b.gtt = 0; ctx->b.vram = 0; @@ -84,14 +84,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += 10; /* Flush if there's not enough space. */ - if (!ctx->b.ws->cs_check_space(ctx->b.gfx.cs, num_dw, false)) { + if (!ctx->b.ws->cs_check_space(&ctx->b.gfx.cs, num_dw, false)) { ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); } } void r600_flush_emit(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned cp_coher_cntl = 0; unsigned wait_until = 0; @@ -260,7 +260,7 @@ void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *ctx = context; - struct radeon_cmdbuf *cs = ctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &ctx->b.gfx.cs; struct radeon_winsys *ws = ctx->b.ws; if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size)) @@ -345,7 +345,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->b.vram = 0; /* Begin a new CS. */ - r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd); + r600_emit_command_buffer(&ctx->b.gfx.cs, &ctx->start_cs_cmd); /* Re-emit states. */ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom); @@ -430,13 +430,13 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->last_rast_prim = -1; ctx->current_rast_prim = -1; - assert(!ctx->b.gfx.cs->prev_dw); - ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; + assert(!ctx->b.gfx.cs.prev_dw); + ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs.current.cdw; } void r600_emit_pfp_sync_me(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; if (rctx->b.chip_class >= EVERGREEN && rctx->b.screen->info.drm_minor >= 46) { @@ -502,7 +502,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *src, uint64_t src_offset, unsigned size) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); @@ -584,7 +584,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx, uint64_t src_offset, uint64_t size) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 3068833dcdd..297f80c4e51 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -209,8 +209,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, goto fail; } - rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, - r600_context_gfx_flush, rctx, false); + ws->cs_create(&rctx->b.gfx.cs, rctx->b.ctx, RING_GFX, + r600_context_gfx_flush, rctx, false); rctx->b.gfx.flush = r600_context_gfx_flush; u_suballocator_init(&rctx->allocator_fetch_shader, &rctx->b.b, 64 * 1024, diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 3f5cbf03d9b..101f98a722a 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -78,7 +78,7 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx, struct r600_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type) { - struct radeon_cmdbuf *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = &ctx->gfx.cs; unsigned op = EVENT_TYPE(event) | EVENT_INDEX(5) | event_flags; @@ -110,7 +110,7 @@ void r600_gfx_wait_fence(struct r600_common_context *ctx, struct r600_resource *buf, uint64_t va, uint32_t ref, uint32_t mask) { - struct radeon_cmdbuf *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = &ctx->gfx.cs; radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); @@ -215,7 +215,7 @@ void r600_draw_rectangle(struct blitter_context *blitter, static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) { - struct radeon_cmdbuf *cs = rctx->dma.cs; + struct radeon_cmdbuf *cs = &rctx->dma.cs; if (rctx->chip_class >= EVERGREEN) radeon_emit(cs, 0xf0000000); /* NOP */ @@ -228,8 +228,8 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src) { - uint64_t vram = ctx->dma.cs->used_vram; - uint64_t gtt = ctx->dma.cs->used_gart; + uint64_t vram = ctx->dma.cs.used_vram; + uint64_t gtt = ctx->dma.cs.used_gart; if (dst) { vram += dst->vram_usage; @@ -241,12 +241,12 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, } /* Flush the GFX IB if DMA depends on it. */ - if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && + if (radeon_emitted(&ctx->gfx.cs, ctx->initial_gfx_cs_size) && ((dst && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf, + ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, dst->buf, RADEON_USAGE_READWRITE)) || (src && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf, + ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, src->buf, RADEON_USAGE_WRITE)))) ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); @@ -263,21 +263,21 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, * engine busy while uploads are being submitted. */ num_dw++; /* for emit_wait_idle below */ - if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw, false) || - ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 || - !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) { + if (!ctx->ws->cs_check_space(&ctx->dma.cs, num_dw, false) || + ctx->dma.cs.used_vram + ctx->dma.cs.used_gart > 64 * 1024 * 1024 || + !radeon_cs_memory_below_limit(ctx->screen, &ctx->dma.cs, vram, gtt)) { ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); - assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw); + assert((num_dw + ctx->dma.cs.current.cdw) <= ctx->dma.cs.current.max_dw); } /* Wait for idle if either buffer has been used in the IB before to * prevent read-after-write hazards. */ if ((dst && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf, + ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, dst->buf, RADEON_USAGE_READWRITE)) || (src && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf, + ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, src->buf, RADEON_USAGE_WRITE))) r600_dma_emit_wait_idle(ctx); @@ -346,14 +346,14 @@ static void r600_flush_from_st(struct pipe_context *ctx, rflags |= PIPE_FLUSH_END_OF_FRAME; /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ - if (rctx->dma.cs) + if (rctx->dma.cs.priv) rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); - if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { + if (!radeon_emitted(&rctx->gfx.cs, rctx->initial_gfx_cs_size)) { if (fence) ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); if (!(flags & PIPE_FLUSH_DEFERRED)) - ws->cs_sync_flush(rctx->gfx.cs); + ws->cs_sync_flush(&rctx->gfx.cs); } else { /* Instead of flushing, create a deferred fence. Constraints: * - the gallium frontend must allow a deferred flush. @@ -361,7 +361,7 @@ static void r600_flush_from_st(struct pipe_context *ctx, * Thread safety in fence_finish must be ensured by the gallium frontend. */ if (flags & PIPE_FLUSH_DEFERRED && fence) { - gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); + gfx_fence = rctx->ws->cs_get_next_fence(&rctx->gfx.cs); deferred_fence = true; } else { rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); @@ -393,9 +393,9 @@ static void r600_flush_from_st(struct pipe_context *ctx, } finish: if (!(flags & PIPE_FLUSH_DEFERRED)) { - if (rctx->dma.cs) - ws->cs_sync_flush(rctx->dma.cs); - ws->cs_sync_flush(rctx->gfx.cs); + if (rctx->dma.cs.priv) + ws->cs_sync_flush(&rctx->dma.cs); + ws->cs_sync_flush(&rctx->gfx.cs); } } @@ -403,7 +403,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct radeon_cmdbuf *cs = rctx->dma.cs; + struct radeon_cmdbuf *cs = &rctx->dma.cs; struct radeon_saved_cs saved; bool check_vm = (rctx->screen->debug_flags & DBG_CHECK_VM) && @@ -557,19 +557,19 @@ static bool r600_resource_commit(struct pipe_context *pctx, * (b) wait for threaded submit to finish, including those that were * triggered by some other, earlier operation. */ - if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && - ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, + if (radeon_emitted(&ctx->gfx.cs, ctx->initial_gfx_cs_size) && + ctx->ws->cs_is_buffer_referenced(&ctx->gfx.cs, res->buf, RADEON_USAGE_READWRITE)) { ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); } - if (radeon_emitted(ctx->dma.cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, + if (radeon_emitted(&ctx->dma.cs, 0) && + ctx->ws->cs_is_buffer_referenced(&ctx->dma.cs, res->buf, RADEON_USAGE_READWRITE)) { ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); } - ctx->ws->cs_sync_flush(ctx->dma.cs); - ctx->ws->cs_sync_flush(ctx->gfx.cs); + ctx->ws->cs_sync_flush(&ctx->dma.cs); + ctx->ws->cs_sync_flush(&ctx->gfx.cs); assert(resource->target == PIPE_BUFFER); @@ -635,9 +635,8 @@ bool r600_common_context_init(struct r600_common_context *rctx, return false; if (rscreen->info.num_rings[RING_DMA] && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { - rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, - r600_flush_dma_ring, - rctx, false); + rctx->ws->cs_create(&rctx->dma.cs, rctx->ctx, RING_DMA, + r600_flush_dma_ring, rctx, false); rctx->dma.flush = r600_flush_dma_ring; } @@ -649,10 +648,8 @@ void r600_common_context_cleanup(struct r600_common_context *rctx) if (rctx->query_result_shader) rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); - if (rctx->gfx.cs) - rctx->ws->cs_destroy(rctx->gfx.cs); - if (rctx->dma.cs) - rctx->ws->cs_destroy(rctx->dma.cs); + rctx->ws->cs_destroy(&rctx->gfx.cs); + rctx->ws->cs_destroy(&rctx->dma.cs); if (rctx->ctx) rctx->ws->ctx_destroy(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_pipe_common.h b/src/gallium/drivers/r600/r600_pipe_common.h index 49c7daf12a0..d8f1f9625af 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.h +++ b/src/gallium/drivers/r600/r600_pipe_common.h @@ -474,7 +474,7 @@ struct r600_viewports { }; struct r600_ring { - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); }; diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 2ac012f1b93..6993f1665a5 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -728,7 +728,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, struct r600_resource *buffer, uint64_t va) { - struct radeon_cmdbuf *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = &ctx->gfx.cs; switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: @@ -808,7 +808,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, struct r600_resource *buffer, uint64_t va) { - struct radeon_cmdbuf *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = &ctx->gfx.cs; uint64_t fence_va = 0; switch (query->b.type) { @@ -900,7 +900,7 @@ static void emit_set_predicate(struct r600_common_context *ctx, struct r600_resource *buf, uint64_t va, uint32_t op) { - struct radeon_cmdbuf *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = &ctx->gfx.cs; radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); radeon_emit(cs, va); @@ -1834,7 +1834,7 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) { struct r600_common_context *ctx = (struct r600_common_context*)rscreen->aux_context; - struct radeon_cmdbuf *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = &ctx->gfx.cs; struct r600_resource *buffer; uint32_t *results; unsigned i, mask = 0; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 28e253069f6..fa0d0874402 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -246,7 +246,7 @@ bool r600_is_format_supported(struct pipe_screen *screen, static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -790,7 +790,7 @@ r600_create_sampler_view(struct pipe_context *ctx, static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4); @@ -1282,7 +1282,7 @@ static void r600_get_sample_position(struct pipe_context *ctx, static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned max_dist = 0; if (rctx->b.family == CHIP_R600) { @@ -1349,7 +1349,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0]; @@ -1515,7 +1515,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) { @@ -1545,7 +1545,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->db_htile_surface) { @@ -1566,7 +1566,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_render_override = @@ -1651,7 +1651,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_config_state *a = (struct r600_config_state*)atom; radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1); @@ -1660,7 +1660,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom * static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; while (dirty_mask) { @@ -1700,7 +1700,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_constbuf_size, unsigned reg_alu_const_cache) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1774,7 +1774,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1821,7 +1821,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx, unsigned resource_id_base, unsigned border_color_reg) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; while (dirty_mask) { @@ -1881,7 +1881,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned tmp; tmp = S_009508_DISABLE_CUBE_ANISO(1) | @@ -1899,13 +1899,13 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK, + radeon_set_context_reg(&rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; @@ -1921,7 +1921,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600 static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; uint32_t v2 = 0, primid = 0; @@ -1956,7 +1956,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; struct r600_resource *rbuffer; @@ -2854,7 +2854,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, unsigned pitch, unsigned bpp) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; @@ -2958,7 +2958,7 @@ static void r600_dma_copy(struct pipe_context *ctx, unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (rctx->b.dma.cs == NULL) { + if (rctx->b.dma.cs.priv == NULL) { goto fallback; } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 7b0696eaefb..fc002fdd92c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -76,12 +76,12 @@ void r600_init_atom(struct r600_context *rctx, void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom) { - r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb); + r600_emit_command_buffer(&rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb); } void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom; unsigned alpha_ref = a->sx_alpha_ref; @@ -249,7 +249,7 @@ static void r600_set_blend_color(struct pipe_context *ctx, void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_blend_color *state = &rctx->blend_color.state; radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); @@ -261,7 +261,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_vgt_state *a = (struct r600_vgt_state *)atom; radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en); @@ -295,7 +295,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom; radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); @@ -1665,7 +1665,7 @@ void r600_setup_scratch_area_for_shader(struct r600_context *rctx, if (scratch->dirty || unlikely(shader->scratch_space_needed != scratch->item_size || size > scratch->size)) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; scratch->dirty = false; @@ -2009,7 +2009,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_clip_misc_state *state = &rctx->clip_misc_state; radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, @@ -2029,7 +2029,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom /* rast_prim is the primitive type after GS. */ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; enum pipe_prim_type rast_prim = rctx->current_rast_prim; /* Skip this if not rendering lines. */ @@ -2059,7 +2059,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info { struct r600_context *rctx = (struct r600_context *)ctx; struct pipe_resource *indexbuf = info->has_user_indices ? NULL : info->index.resource; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; bool has_user_indices = info->has_user_indices; uint64_t mask; @@ -2090,7 +2090,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* make sure that the gfx ring is only one active */ - if (radeon_emitted(rctx->b.dma.cs, 0)) { + if (radeon_emitted(&rctx->b.dma.cs, 0)) { rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL); } @@ -2585,7 +2585,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader; if (!shader) diff --git a/src/gallium/drivers/r600/r600_streamout.c b/src/gallium/drivers/r600/r600_streamout.c index e79beb74b6f..f45561d2921 100644 --- a/src/gallium/drivers/r600/r600_streamout.c +++ b/src/gallium/drivers/r600/r600_streamout.c @@ -154,7 +154,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx, static void r600_flush_vgt_streamout(struct r600_common_context *rctx) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ @@ -180,7 +180,7 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx) static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; struct r600_so_target **t = rctx->streamout.targets; uint16_t *stride_in_dw = rctx->streamout.stride_in_dw; unsigned i, update_flags = 0; @@ -253,7 +253,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r void r600_emit_streamout_end(struct r600_common_context *rctx) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; struct r600_so_target **t = rctx->streamout.targets; unsigned i; uint64_t va; @@ -315,8 +315,8 @@ static void r600_emit_streamout_enable(struct r600_common_context *rctx, S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) | S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx)); } - radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val); - radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val); + radeon_set_context_reg(&rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val); + radeon_set_context_reg(&rctx->gfx.cs, strmout_config_reg, strmout_config_val); } static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 5cc35334feb..75cbbf0cf06 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -52,7 +52,7 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, unsigned src_level, const struct pipe_box *src_box) { - if (!rctx->dma.cs) + if (!rctx->dma.cs.priv) return false; if (rdst->surface.bpe != rsrc->surface.bpe) diff --git a/src/gallium/drivers/r600/r600_viewport.c b/src/gallium/drivers/r600/r600_viewport.c index 7a5bf8f39aa..a8ed01a0c8e 100644 --- a/src/gallium/drivers/r600/r600_viewport.c +++ b/src/gallium/drivers/r600/r600_viewport.c @@ -185,7 +185,7 @@ static void r600_emit_one_scissor(struct r600_common_context *rctx, static void r600_emit_guardband(struct r600_common_context *rctx, struct r600_signed_scissor *vp_as_scissor) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; struct pipe_viewport_state vp; float left, top, right, bottom, max_range, guardband_x, guardband_y; @@ -235,7 +235,7 @@ static void r600_emit_guardband(struct r600_common_context *rctx, static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; struct pipe_scissor_state *states = rctx->scissors.states; unsigned mask = rctx->scissors.dirty_mask; bool scissor_enabled = rctx->scissor_enabled; @@ -306,7 +306,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx, static void r600_emit_one_viewport(struct r600_common_context *rctx, struct pipe_viewport_state *state) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; radeon_emit(cs, fui(state->scale[0])); radeon_emit(cs, fui(state->translate[0])); @@ -318,7 +318,7 @@ static void r600_emit_one_viewport(struct r600_common_context *rctx, static void r600_emit_viewports(struct r600_common_context *rctx) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; struct pipe_viewport_state *states = rctx->viewports.states; unsigned mask = rctx->viewports.dirty_mask; @@ -348,7 +348,7 @@ static void r600_emit_viewports(struct r600_common_context *rctx) static void r600_emit_depth_ranges(struct r600_common_context *rctx) { - struct radeon_cmdbuf *cs = rctx->gfx.cs; + struct radeon_cmdbuf *cs = &rctx->gfx.cs; struct pipe_viewport_state *states = rctx->viewports.states; unsigned mask = rctx->viewports.depth_range_dirty_mask; float zmin, zmax; diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c index f0fb8ee4bad..959f2ebbb06 100644 --- a/src/gallium/drivers/r600/radeon_uvd.c +++ b/src/gallium/drivers/r600/radeon_uvd.c @@ -73,7 +73,7 @@ struct ruvd_decoder { struct pipe_screen *screen; struct radeon_winsys* ws; - struct radeon_cmdbuf* cs; + struct radeon_cmdbuf cs; unsigned cur_buffer; @@ -102,14 +102,14 @@ struct ruvd_decoder { /* flush IB to the hardware */ static int flush(struct ruvd_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(&dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0)); + radeon_emit(&dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ @@ -119,7 +119,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, { int reloc_idx; - reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); if (!dec->use_legacy) { uint64_t addr; @@ -152,7 +152,7 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + ptr = dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); /* calc buffer offsets */ @@ -809,7 +809,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder) flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(&dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); @@ -842,7 +842,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder, dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); + &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); } /** @@ -890,12 +890,12 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, if (new_size > buf->res->buf->size) { dec->ws->buffer_unmap(buf->res->buf); dec->bs_ptr = NULL; - if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + if (!rvid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) { RVID_ERR("Can't resize bitstream buffer!"); return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); if (!dec->bs_ptr) @@ -1090,8 +1090,8 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, dec->stream_handle = rvid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, false); - if (!dec->cs) { + + if (!ws->cs_create(&dec->cs, rctx->ctx, RING_UVD, NULL, NULL, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -1151,7 +1151,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, return &dec->base; error: - if (dec->cs) dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(&dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); diff --git a/src/gallium/drivers/r600/radeon_vce.c b/src/gallium/drivers/r600/radeon_vce.c index 8ff8fa5a144..866dcfed466 100644 --- a/src/gallium/drivers/r600/radeon_vce.c +++ b/src/gallium/drivers/r600/radeon_vce.c @@ -63,7 +63,7 @@ static void (*get_pic_param)(struct rvce_encoder *enc, */ static void flush(struct rvce_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL); enc->task_info_idx = 0; enc->bs_idx = 0; } @@ -71,7 +71,7 @@ static void flush(struct rvce_encoder *enc) #if 0 static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_MAP_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE); unsigned i = 0; fprintf(stderr, "\n"); fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]); @@ -256,7 +256,7 @@ static void rvce_destroy(struct pipe_video_codec *encoder) rvid_destroy_buffer(&fb); } rvid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); FREE(enc->cpb_array); FREE(enc); } @@ -323,7 +323,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder, RVID_ERR("Can't create feedback buffer.\n"); return; } - if (!radeon_emitted(enc->cs, 0)) + if (!radeon_emitted(&enc->cs, 0)) enc->session(enc); enc->encode(enc); enc->feedback(enc); @@ -358,7 +358,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder, if (size) { uint32_t *ptr = enc->ws->buffer_map( - fb->res->buf, enc->cs, + fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); if (ptr[1]) { @@ -431,8 +431,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, false); - if (!enc->cs) { + + if (!ws->cs_create(&enc->cs, rctx->ctx, RING_VCE, rvce_cs_flush, enc, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -477,8 +477,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); rvid_destroy_buffer(&enc->cpb); @@ -519,7 +518,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, { int reloc_idx; - reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); if (enc->use_vm) { uint64_t addr; diff --git a/src/gallium/drivers/r600/radeon_vce.h b/src/gallium/drivers/r600/radeon_vce.h index c5e05477763..3faea7ad2f5 100644 --- a/src/gallium/drivers/r600/radeon_vce.h +++ b/src/gallium/drivers/r600/radeon_vce.h @@ -36,14 +36,14 @@ #include "util/list.h" -#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) +#define RVCE_CS(value) (enc->cs.current.buf[enc->cs.current.cdw++] = (value)) #define RVCE_BEGIN(cmd) { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + uint32_t *begin = &enc->cs.current.buf[enc->cs.current.cdw++]; \ RVCE_CS(cmd) #define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) #define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) #define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) -#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; } +#define RVCE_END() *begin = (&enc->cs.current.buf[enc->cs.current.cdw] - begin) * 4; } #define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5) #define RVCE_MAX_AUX_BUFFER_NUM 4 @@ -387,7 +387,7 @@ struct rvce_encoder { struct pipe_screen *screen; struct radeon_winsys* ws; - struct radeon_cmdbuf* cs; + struct radeon_cmdbuf cs; rvce_get_buffer get_buffer; diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 667d0b2157f..f8aec3bed68 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -65,7 +65,7 @@ struct ruvd_decoder { struct pipe_screen *screen; struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; unsigned cur_buffer; @@ -96,14 +96,14 @@ struct ruvd_decoder { /* flush IB to the hardware */ static int flush(struct ruvd_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(&dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0)); + radeon_emit(&dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ @@ -112,7 +112,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, struct pb_buffer *b { int reloc_idx; - reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); if (!dec->use_legacy) { uint64_t addr; addr = dec->ws->buffer_get_virtual_address(buf); @@ -144,7 +144,7 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) /* and map it for CPU access */ ptr = - dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); + dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); /* calc buffer offsets */ dec->msg = (struct ruvd_msg *)ptr; @@ -983,7 +983,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder) flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(&dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); @@ -1013,7 +1013,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder, struct pipe_video &ruvd_destroy_associated_data); dec->bs_size = 0; - dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, + dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); } @@ -1052,12 +1052,12 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, if (new_size > buf->res->buf->size) { dec->ws->buffer_unmap(buf->res->buf); - if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + if (!si_vid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) { RVID_ERR("Can't resize bitstream buffer!"); return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); if (!dec->bs_ptr) return; @@ -1268,8 +1268,8 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte dec->stream_handle = si_vid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false); - if (!dec->cs) { + + if (!ws->cs_create(&dec->cs, sctx->ctx, RING_UVD, NULL, NULL, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -1356,8 +1356,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte return &dec->base; error: - if (dec->cs) - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(&dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c index c76f28ea973..8298298879f 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -103,7 +103,7 @@ static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, static void flush(struct radeon_uvd_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL); } static void radeon_uvd_enc_flush(struct pipe_video_codec *encoder) @@ -235,7 +235,7 @@ static void radeon_uvd_enc_destroy(struct pipe_video_codec *encoder) } si_vid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); FREE(enc); } @@ -247,7 +247,7 @@ static void radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, void * if (NULL != size) { radeon_uvd_enc_feedback_t *fb_data = (radeon_uvd_enc_feedback_t *)enc->ws->buffer_map( - fb->res->buf, enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); + fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); if (!fb_data->status) *size = fb_data->bitstream_size; @@ -294,9 +294,8 @@ struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context *context, enc->bits_in_shifter = 0; enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc, false); - if (!enc->cs) { + if (!ws->cs_create(&enc->cs, sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -342,8 +341,7 @@ struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context *context, return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); si_vid_destroy_buffer(&enc->cpb); diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h index c121dc60dc7..89dc60a50b7 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc.h +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h @@ -395,7 +395,7 @@ struct radeon_uvd_encoder { struct pipe_screen *screen; struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; radeon_uvd_enc_get_buffer get_buffer; diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c index b6e02e56c8e..9b0f9872f61 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c @@ -35,10 +35,10 @@ #include -#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) +#define RADEON_ENC_CS(value) (enc->cs.current.buf[enc->cs.current.cdw++] = (value)) #define RADEON_ENC_BEGIN(cmd) \ { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + uint32_t *begin = &enc->cs.current.buf[enc->cs.current.cdw++]; \ RADEON_ENC_CS(cmd) #define RADEON_ENC_READ(buf, domain, off) \ radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) @@ -47,7 +47,7 @@ #define RADEON_ENC_READWRITE(buf, domain, off) \ radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) #define RADEON_ENC_END() \ - *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ + *begin = (&enc->cs.current.buf[enc->cs.current.cdw] - begin) * 4; \ enc->total_task_size += *begin; \ } @@ -57,7 +57,7 @@ static void radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc, struct pb_ enum radeon_bo_usage usage, enum radeon_bo_domain domain, signed offset) { - enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); addr = addr + offset; @@ -76,14 +76,14 @@ static void radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *e static void radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc, unsigned char byte) { if (enc->byte_index == 0) - enc->cs->current.buf[enc->cs->current.cdw] = 0; - enc->cs->current.buf[enc->cs->current.cdw] |= + enc->cs.current.buf[enc->cs.current.cdw] = 0; + enc->cs.current.buf[enc->cs.current.cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); enc->byte_index++; if (enc->byte_index >= 4) { enc->byte_index = 0; - enc->cs->current.cdw++; + enc->cs.current.cdw++; } } @@ -159,7 +159,7 @@ static void radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc) } if (enc->byte_index > 0) { - enc->cs->current.cdw++; + enc->cs.current.cdw++; enc->byte_index = 0; } } @@ -211,7 +211,7 @@ static void radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_f enc->enc_pic.task_info.allowed_max_num_feedbacks = 0; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_TASK_INFO); - enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++]; + enc->p_task_size = &enc->cs.current.buf[enc->cs.current.cdw++]; RADEON_ENC_CS(enc->enc_pic.task_info.task_id); RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks); RADEON_ENC_END(); @@ -391,7 +391,7 @@ static void radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; int i; radeon_uvd_enc_reset(enc); @@ -486,7 +486,7 @@ static void radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_uvd_enc_reset(enc); radeon_uvd_enc_set_emulation_prevention(enc, false); radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32); @@ -546,7 +546,7 @@ static void radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; int i; radeon_uvd_enc_reset(enc); @@ -600,7 +600,7 @@ static void radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_uvd_enc_reset(enc); radeon_uvd_enc_set_emulation_prevention(enc, false); radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32); diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 6743f483639..1f23ebf064d 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -51,7 +51,7 @@ */ static void flush(struct rvce_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL); enc->task_info_idx = 0; enc->bs_idx = 0; } @@ -59,7 +59,7 @@ static void flush(struct rvce_encoder *enc) #if 0 static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_MAP_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE); unsigned i = 0; fprintf(stderr, "\n"); fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]); @@ -248,7 +248,7 @@ static void rvce_destroy(struct pipe_video_codec *encoder) si_vid_destroy_buffer(&fb); } si_vid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); FREE(enc->cpb_array); FREE(enc); } @@ -316,7 +316,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder, RVID_ERR("Can't create feedback buffer.\n"); return; } - if (!radeon_emitted(enc->cs, 0)) + if (!radeon_emitted(&enc->cs, 0)) enc->session(enc); enc->encode(enc); enc->feedback(enc); @@ -347,7 +347,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); if (ptr[1]) { @@ -428,8 +428,8 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false); - if (!enc->cs) { + + if (!ws->cs_create(&enc->cs, sctx->ctx, RING_VCE, rvce_cs_flush, enc, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -501,8 +501,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); si_vid_destroy_buffer(&enc->cpb); @@ -542,7 +541,7 @@ void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum rad { int reloc_idx; - reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 7ee0500e130..007d6029a31 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -31,10 +31,10 @@ #include "radeon_video.h" #include "util/list.h" -#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) +#define RVCE_CS(value) (enc->cs.current.buf[enc->cs.current.cdw++] = (value)) #define RVCE_BEGIN(cmd) \ { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + uint32_t *begin = &enc->cs.current.buf[enc->cs.current.cdw++]; \ RVCE_CS(cmd) #define RVCE_READ(buf, domain, off) \ si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) @@ -43,7 +43,7 @@ #define RVCE_READWRITE(buf, domain, off) \ si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) #define RVCE_END() \ - *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ + *begin = (&enc->cs.current.buf[enc->cs.current.cdw] - begin) * 4; \ } #define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5) @@ -389,7 +389,7 @@ struct rvce_encoder { struct pipe_screen *screen; struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; rvce_get_buffer get_buffer; diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c index 3653540eaba..bcd96093af9 100644 --- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c +++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c @@ -48,11 +48,11 @@ static void task_info(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint3 RVCE_BEGIN(0x00000002); // task info if (op == 0x3) { if (enc->task_info_idx) { - uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; + uint32_t offs = enc->cs.current.cdw - enc->task_info_idx + 3; // Update offsetOfNextTaskInfo - enc->cs->current.buf[enc->task_info_idx] = offs; + enc->cs.current.buf[enc->task_info_idx] = offs; } - enc->task_info_idx = enc->cs->current.cdw; + enc->task_info_idx = enc->cs.current.cdw; } RVCE_CS(0xffffffff); // offsetOfNextTaskInfo RVCE_CS(op); // taskOperation diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c index dd2ffbb10d3..e4a50053f06 100644 --- a/src/gallium/drivers/radeon/radeon_vce_52.c +++ b/src/gallium/drivers/radeon/radeon_vce_52.c @@ -589,11 +589,11 @@ static void task_info(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint3 RVCE_BEGIN(0x00000002); // task info if (op == 0x3) { if (enc->task_info_idx) { - uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; + uint32_t offs = enc->cs.current.cdw - enc->task_info_idx + 3; // Update offsetOfNextTaskInfo - enc->cs->current.buf[enc->task_info_idx] = offs; + enc->cs.current.buf[enc->task_info_idx] = offs; } - enc->task_info_idx = enc->cs->current.cdw; + enc->task_info_idx = enc->cs.current.cdw; } enc->enc_pic.ti.task_operation = op; enc->enc_pic.ti.reference_picture_dependency = dep; diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c index bfef85e84ee..8a16ea7a092 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -1454,7 +1454,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, si_vid_clear_buffer(dec->base.context, &dec->ctx); /* ctx needs probs table */ - ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, + ptr = dec->ws->buffer_map(dec->ctx.res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); fill_probs_table(ptr); dec->ws->buffer_unmap(dec->ctx.res->buf); @@ -1479,8 +1479,8 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, si_vid_clear_buffer(dec->base.context, &dec->ctx); } } - if (encrypted != dec->ws->cs_is_secure(dec->cs)) { - dec->ws->cs_flush(dec->cs, RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); + if (encrypted != dec->ws->cs_is_secure(&dec->cs)) { + dec->ws->cs_flush(&dec->cs, RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); } decode->dpb_size = dec->dpb.res->buf->size; @@ -1617,7 +1617,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, RVID_ERR("Can't allocated context buffer.\n"); si_vid_clear_buffer(dec->base.context, &dec->ctx); - ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); + ptr = dec->ws->buffer_map(dec->ctx.res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); for (i = 0; i < 4; ++i) { rvcn_init_mode_probs((void*)(ptr + i * align(sizeof(rvcn_av1_frame_context_t), 2048))); @@ -1665,14 +1665,14 @@ static void rvcn_dec_message_feedback(struct radeon_decoder *dec) /* flush IB to the hardware */ static int flush(struct radeon_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(&dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(&dec->cs, RDECODE_PKT0(reg >> 2, 0)); + radeon_emit(&dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ @@ -1681,7 +1681,7 @@ static void send_cmd(struct radeon_decoder *dec, unsigned cmd, struct pb_buffer { uint64_t addr; - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; @@ -1713,7 +1713,7 @@ static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec) /* and map it for CPU access */ ptr = - dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); + dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); /* calc buffer offsets */ dec->msg = ptr; @@ -1964,7 +1964,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder) flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(&dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); @@ -1996,7 +1996,7 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder, &radeon_dec_destroy_associated_data); dec->bs_size = 0; - dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, + dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); } @@ -2036,12 +2036,12 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder, if (new_size > buf->res->buf->size) { dec->ws->buffer_unmap(buf->res->buf); dec->bs_ptr = NULL; - if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + if (!si_vid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) { RVID_ERR("Can't resize bitstream buffer!"); return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); if (!dec->bs_ptr) return; @@ -2191,8 +2191,8 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, dec->stream_handle = si_vid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false); - if (!dec->cs) { + + if (!ws->cs_create(&dec->cs, sctx->ctx, ring, NULL, NULL, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -2229,7 +2229,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, void *ptr; buf = &dec->msg_fb_it_probs_buffers[i]; - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + ptr = dec->ws->buffer_map(buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; fill_probs_table(ptr); @@ -2297,8 +2297,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, return &dec->base; error: - if (dec->cs) - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(&dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.h b/src/gallium/drivers/radeon/radeon_vcn_dec.h index 5ad847ab716..4100968a75e 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.h +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.h @@ -1040,7 +1040,7 @@ struct radeon_decoder { struct pipe_screen *screen; struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; void *msg; uint32_t *fb; diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c b/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c index 6d8546797ab..69256cd7edb 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c @@ -57,8 +57,8 @@ static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_decoder *dec static void set_reg_jpeg(struct radeon_decoder *dec, unsigned reg, unsigned cond, unsigned type, uint32_t val) { - radeon_emit(dec->cs, RDECODE_PKTJ(reg, cond, type)); - radeon_emit(dec->cs, val); + radeon_emit(&dec->cs, RDECODE_PKTJ(reg, cond, type)); + radeon_emit(&dec->cs, val); } /* send a bitstream buffer command */ @@ -85,7 +85,7 @@ static void send_cmd_bitstream(struct radeon_decoder *dec, struct pb_buffer *buf set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; @@ -117,7 +117,7 @@ static void send_cmd_target(struct radeon_decoder *dec, struct pb_buffer *buf, u set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TILING_CTRL), COND0, TYPE0, 0); set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_TILING_CTRL), COND0, TYPE0, 0); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; @@ -205,7 +205,7 @@ static void send_cmd_bitstream_direct(struct radeon_decoder *dec, struct pb_buff set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0 << 0x10)); set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; @@ -236,7 +236,7 @@ static void send_cmd_target_direct(struct radeon_decoder *dec, struct pb_buffer set_reg_jpeg(dec, vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE, COND0, TYPE0, 0); set_reg_jpeg(dec, vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE, COND0, TYPE0, 0); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); addr = dec->ws->buffer_get_virtual_address(buf); addr = addr + off; diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c index 84f766d2211..305f02ee39a 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c @@ -204,7 +204,7 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic static void flush(struct radeon_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL); } static void radeon_enc_flush(struct pipe_video_codec *encoder) @@ -353,7 +353,7 @@ static void radeon_enc_destroy(struct pipe_video_codec *encoder) } si_vid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); FREE(enc); } @@ -364,7 +364,7 @@ static void radeon_enc_get_feedback(struct pipe_video_codec *encoder, void *feed struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); if (ptr[1]) *size = ptr[6]; @@ -407,9 +407,8 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, enc->bits_in_shifter = 0; enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc, false); - if (!enc->cs) { + if (!ws->cs_create(&enc->cs, sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc, false)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -462,8 +461,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + enc->ws->cs_destroy(&enc->cs); si_vid_destroy_buffer(&enc->cpb); @@ -474,7 +472,7 @@ error: void radeon_enc_add_buffer(struct radeon_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domain, signed offset) { - enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); addr = addr + offset; @@ -493,14 +491,14 @@ void radeon_enc_set_emulation_prevention(struct radeon_encoder *enc, bool set) void radeon_enc_output_one_byte(struct radeon_encoder *enc, unsigned char byte) { if (enc->byte_index == 0) - enc->cs->current.buf[enc->cs->current.cdw] = 0; - enc->cs->current.buf[enc->cs->current.cdw] |= + enc->cs.current.buf[enc->cs.current.cdw] = 0; + enc->cs.current.buf[enc->cs.current.cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); enc->byte_index++; if (enc->byte_index >= 4) { enc->byte_index = 0; - enc->cs->current.cdw++; + enc->cs.current.cdw++; } } @@ -576,7 +574,7 @@ void radeon_enc_flush_headers(struct radeon_encoder *enc) } if (enc->byte_index > 0) { - enc->cs->current.cdw++; + enc->cs.current.cdw++; enc->byte_index = 0; } } diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h index 8058f9ab4f6..8b798c536c6 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h @@ -125,10 +125,10 @@ #define RENCODE_FEEDBACK_BUFFER_MODE_LINEAR 0 #define RENCODE_FEEDBACK_BUFFER_MODE_CIRCULAR 1 -#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) +#define RADEON_ENC_CS(value) (enc->cs.current.buf[enc->cs.current.cdw++] = (value)) #define RADEON_ENC_BEGIN(cmd) \ { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + uint32_t *begin = &enc->cs.current.buf[enc->cs.current.cdw++]; \ RADEON_ENC_CS(cmd) #define RADEON_ENC_READ(buf, domain, off) \ radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) @@ -137,7 +137,7 @@ #define RADEON_ENC_READWRITE(buf, domain, off) \ radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) #define RADEON_ENC_END() \ - *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ + *begin = (&enc->cs.current.buf[enc->cs.current.cdw] - begin) * 4; \ enc->total_task_size += *begin; \ } @@ -512,7 +512,7 @@ struct radeon_encoder { struct pipe_screen *screen; struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; + struct radeon_cmdbuf cs; radeon_enc_get_buffer get_buffer; diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c index 1b8cf798254..e24e60e2609 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c @@ -81,7 +81,7 @@ static void radeon_enc_task_info(struct radeon_encoder *enc, bool need_feedback) enc->enc_pic.task_info.allowed_max_num_feedbacks = 0; RADEON_ENC_BEGIN(enc->cmd.task_info); - enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++]; + enc->p_task_size = &enc->cs.current.buf[enc->cs.current.cdw++]; RADEON_ENC_CS(enc->enc_pic.task_info.task_id); RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks); RADEON_ENC_END(); @@ -282,7 +282,7 @@ static void radeon_enc_nalu_sps(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); radeon_enc_code_fixed_bits(enc, 0x00000001, 32); @@ -364,7 +364,7 @@ static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; int i; radeon_enc_reset(enc); @@ -455,7 +455,7 @@ static void radeon_enc_nalu_pps(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); radeon_enc_code_fixed_bits(enc, 0x00000001, 32); @@ -490,7 +490,7 @@ static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); radeon_enc_code_fixed_bits(enc, 0x00000001, 32); @@ -548,7 +548,7 @@ static void radeon_enc_nalu_vps(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_VPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; int i; radeon_enc_reset(enc); @@ -602,7 +602,7 @@ static void radeon_enc_nalu_aud_hevc(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_AUD); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); radeon_enc_code_fixed_bits(enc, 0x00000001, 32); @@ -647,7 +647,7 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc) radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); - cdw_start = enc->cs->current.cdw; + cdw_start = enc->cs.current.cdw; if (enc->enc_pic.is_idr) radeon_enc_code_fixed_bits(enc, 0x65, 8); else if (enc->enc_pic.not_referenced) @@ -748,7 +748,7 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc) instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - cdw_filled = enc->cs->current.cdw - cdw_start; + cdw_filled = enc->cs.current.cdw - cdw_start; for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++) RADEON_ENC_CS(0x00000000); @@ -772,7 +772,7 @@ static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); - cdw_start = enc->cs->current.cdw; + cdw_start = enc->cs.current.cdw; radeon_enc_code_fixed_bits(enc, 0x0, 1); radeon_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6); radeon_enc_code_fixed_bits(enc, 0x0, 6); @@ -862,7 +862,7 @@ static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - cdw_filled = enc->cs->current.cdw - cdw_start; + cdw_filled = enc->cs.current.cdw - cdw_start; for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++) RADEON_ENC_CS(0x00000000); diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c b/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c index b8dd69ae802..0ef1ba36f8f 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c @@ -91,7 +91,7 @@ static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); - cdw_start = enc->cs->current.cdw; + cdw_start = enc->cs.current.cdw; radeon_enc_code_fixed_bits(enc, 0x0, 1); radeon_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6); radeon_enc_code_fixed_bits(enc, 0x0, 6); @@ -202,7 +202,7 @@ static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) inst_index++; instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - cdw_filled = enc->cs->current.cdw - cdw_start; + cdw_filled = enc->cs.current.cdw - cdw_start; for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++) RADEON_ENC_CS(0x00000000); @@ -246,7 +246,7 @@ static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; int i; radeon_enc_reset(enc); @@ -337,7 +337,7 @@ static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); radeon_enc_code_fixed_bits(enc, 0x00000001, 32); diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_3_0.c b/src/gallium/drivers/radeon/radeon_vcn_enc_3_0.c index 0e381045b51..6c5871eae0f 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_3_0.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_3_0.c @@ -114,7 +114,7 @@ static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc) RADEON_ENC_BEGIN(enc->cmd.nalu); RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; radeon_enc_reset(enc); radeon_enc_set_emulation_prevention(enc, false); diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 34d1c49d9de..0bd1cdbdbe6 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -206,6 +206,9 @@ struct radeon_cmdbuf { uint64_t used_vram; uint64_t used_gart; uint64_t gpu_address; + + /* Private winsys data. */ + void *priv; }; /* Tiling info for display code, DRI sharing, and other data. */ @@ -482,15 +485,19 @@ struct radeon_winsys { /** * Create a command stream. * + * \param cs The returned structure that is initialized by cs_create. * \param ctx The submission context * \param ring_type The ring type (GFX, DMA, UVD) * \param flush Flush callback function associated with the command stream. * \param user User pointer that will be passed to the flush callback. + * + * \return true on success */ - struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, - void (*flush)(void *ctx, unsigned flags, - struct pipe_fence_handle **fence), - void *flush_ctx, bool stop_exec_on_failure); + bool (*cs_create)(struct radeon_cmdbuf *cs, + struct radeon_winsys_ctx *ctx, enum ring_type ring_type, + void (*flush)(void *ctx, unsigned flags, + struct pipe_fence_handle **fence), + void *flush_ctx, bool stop_exec_on_failure); /** * Add a parallel compute IB to a gfx IB. It will share the buffer list @@ -501,13 +508,16 @@ struct radeon_winsys { * to use a wait packet for synchronization. * * The returned IB is only a stream for writing packets to the new - * IB. Calling other winsys functions with it is not allowed, not even - * "cs_destroy". Use the gfx IB instead. + * IB. The only function that can be used on the compute cs is cs_check_space. * - * \param cs Gfx IB + * \param compute_cs The returned structure of the command stream. + * \param gfx_cs Gfx IB + * + * \return true on success */ - struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs, - bool uses_gds_ordered_append); + bool (*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *compute_cs, + struct radeon_cmdbuf *gfx_cs, + bool uses_gds_ordered_append); /** * Set up and enable mid command buffer preemption for the command stream. diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index d9ba9ad88b8..1c154050ed6 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -94,7 +94,7 @@ static bool si_sdma_v4_copy_texture(struct si_context *sctx, struct pipe_resourc /* Linear -> linear sub-window copy. */ if (ssrc->surface.is_linear && sdst->surface.is_linear) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; /* Check if everything fits into the bitfields */ if (!(src_pitch <= (1 << 19) && dst_pitch <= (1 << 19) && src_slice_pitch <= (1 << 28) && @@ -153,7 +153,7 @@ static bool si_sdma_v4_copy_texture(struct si_context *sctx, struct pipe_resourc unsigned linear_slice_pitch = linear == ssrc ? src_slice_pitch : dst_slice_pitch; uint64_t tiled_address = tiled == ssrc ? src_address : dst_address; uint64_t linear_address = linear == ssrc ? src_address : dst_address; - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; linear_address += linear->surface.u.gfx9.offset[linear_level]; @@ -270,7 +270,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, struct pipe_resource /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI) || (srcx + copy_width != (1 << 14) && srcy + copy_height != (1 << 14)))) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; si_need_dma_space(sctx, 13, &sdst->buffer, &ssrc->buffer); @@ -407,7 +407,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, struct pipe_resource slice_tile_max < (1 << 22) && linear_pitch <= (1 << 14) && linear_slice_pitch <= (1 << 28) && copy_width_aligned <= (1 << 14) && copy_height <= (1 << 14) && copy_depth <= (1 << 11)) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; uint32_t direction = linear == sdst ? 1u << 31 : 0; si_need_dma_space(sctx, 14, &sdst->buffer, &ssrc->buffer); @@ -483,7 +483,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, struct pipe_resource sctx->family != CHIP_KABINI) || (srcx + copy_width_aligned != (1 << 14) && srcy + copy_height_aligned != (1 << 14) && dstx + copy_width != (1 << 14)))) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; si_need_dma_space(sctx, 15, &sdst->buffer, &ssrc->buffer); @@ -523,7 +523,7 @@ static void cik_sdma_copy(struct pipe_context *ctx, struct pipe_resource *dst, u assert(src->target != PIPE_BUFFER); - if (!sctx->sdma_cs || src->flags & PIPE_RESOURCE_FLAG_SPARSE || + if (!sctx->sdma_cs.priv || src->flags & PIPE_RESOURCE_FLAG_SPARSE || dst->flags & PIPE_RESOURCE_FLAG_SPARSE) goto fallback; diff --git a/src/gallium/drivers/radeonsi/gfx10_query.c b/src/gallium/drivers/radeonsi/gfx10_query.c index a7729632819..f9aa027d16f 100644 --- a/src/gallium/drivers/radeonsi/gfx10_query.c +++ b/src/gallium/drivers/radeonsi/gfx10_query.c @@ -223,7 +223,7 @@ static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery) uint64_t fence_va = query->last->buf->gpu_address; fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem); fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence); - si_cp_release_mem(sctx, sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va, 0xffffffff, PIPE_QUERY_GPU_FINISHED); } @@ -451,7 +451,7 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s va += end - sizeof(struct gfx10_sh_query_buffer_mem); va += offsetof(struct gfx10_sh_query_buffer_mem, fence); - si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0); + si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0); } void *saved_cs = sctx->cs_shader_state.program; diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 6ae6c6f7643..6ac94e33dda 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -33,11 +33,11 @@ bool si_rings_is_buffer_referenced(struct si_context *sctx, struct pb_buffer *buf, enum radeon_bo_usage usage) { - if (sctx->ws->cs_is_buffer_referenced(sctx->gfx_cs, buf, usage)) { + if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, buf, usage)) { return true; } - if (radeon_emitted(sctx->sdma_cs, 0) && - sctx->ws->cs_is_buffer_referenced(sctx->sdma_cs, buf, usage)) { + if (radeon_emitted(&sctx->sdma_cs, 0) && + sctx->ws->cs_is_buffer_referenced(&sctx->sdma_cs, buf, usage)) { return true; } return false; @@ -60,8 +60,8 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx, struct si_resource rusage = RADEON_USAGE_WRITE; } - if (radeon_emitted(sctx->gfx_cs, sctx->initial_gfx_cs_size) && - sctx->ws->cs_is_buffer_referenced(sctx->gfx_cs, resource->buf, rusage)) { + if (radeon_emitted(&sctx->gfx_cs, sctx->initial_gfx_cs_size) && + sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, resource->buf, rusage)) { if (usage & PIPE_MAP_DONTBLOCK) { si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return NULL; @@ -70,8 +70,8 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx, struct si_resource busy = true; } } - if (radeon_emitted(sctx->sdma_cs, 0) && - sctx->ws->cs_is_buffer_referenced(sctx->sdma_cs, resource->buf, rusage)) { + if (radeon_emitted(&sctx->sdma_cs, 0) && + sctx->ws->cs_is_buffer_referenced(&sctx->sdma_cs, resource->buf, rusage)) { if (usage & PIPE_MAP_DONTBLOCK) { si_flush_dma_cs(sctx, PIPE_FLUSH_ASYNC, NULL); return NULL; @@ -87,9 +87,9 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx, struct si_resource } else { /* We will be wait for the GPU. Wait for any offloaded * CS flush to complete to avoid busy-waiting in the winsys. */ - sctx->ws->cs_sync_flush(sctx->gfx_cs); - if (sctx->sdma_cs) - sctx->ws->cs_sync_flush(sctx->sdma_cs); + sctx->ws->cs_sync_flush(&sctx->gfx_cs); + if (sctx->sdma_cs.priv) + sctx->ws->cs_sync_flush(&sctx->sdma_cs); } } @@ -804,18 +804,18 @@ static bool si_resource_commit(struct pipe_context *pctx, struct pipe_resource * * (b) wait for threaded submit to finish, including those that were * triggered by some other, earlier operation. */ - if (radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) && - ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, res->buf, RADEON_USAGE_READWRITE)) { + if (radeon_emitted(&ctx->gfx_cs, ctx->initial_gfx_cs_size) && + ctx->ws->cs_is_buffer_referenced(&ctx->gfx_cs, res->buf, RADEON_USAGE_READWRITE)) { si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } - if (radeon_emitted(ctx->sdma_cs, 0) && - ctx->ws->cs_is_buffer_referenced(ctx->sdma_cs, res->buf, RADEON_USAGE_READWRITE)) { + if (radeon_emitted(&ctx->sdma_cs, 0) && + ctx->ws->cs_is_buffer_referenced(&ctx->sdma_cs, res->buf, RADEON_USAGE_READWRITE)) { si_flush_dma_cs(ctx, PIPE_FLUSH_ASYNC, NULL); } - if (ctx->sdma_cs) - ctx->ws->cs_sync_flush(ctx->sdma_cs); - ctx->ws->cs_sync_flush(ctx->gfx_cs); + if (ctx->sdma_cs.priv) + ctx->ws->cs_sync_flush(&ctx->sdma_cs); + ctx->ws->cs_sync_flush(&ctx->gfx_cs); assert(resource->target == PIPE_BUFFER); diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 26a80fd7ecc..47af020c9e4 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -150,7 +150,7 @@ static inline void radeon_opt_set_context_reg_rmw(struct si_context *sctx, unsig enum si_tracked_reg reg, unsigned value, unsigned mask) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; assert((value & ~mask) == 0); value &= mask; @@ -168,7 +168,7 @@ static inline void radeon_opt_set_context_reg_rmw(struct si_context *sctx, unsig static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset, enum si_tracked_reg reg, unsigned value) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 || sctx->tracked_regs.reg_value[reg] != value) { @@ -189,7 +189,7 @@ static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned enum si_tracked_reg reg, unsigned value1, unsigned value2) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (((sctx->tracked_regs.reg_saved >> reg) & 0x3) != 0x3 || sctx->tracked_regs.reg_value[reg] != value1 || @@ -211,7 +211,7 @@ static inline void radeon_opt_set_context_reg3(struct si_context *sctx, unsigned enum si_tracked_reg reg, unsigned value1, unsigned value2, unsigned value3) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (((sctx->tracked_regs.reg_saved >> reg) & 0x7) != 0x7 || sctx->tracked_regs.reg_value[reg] != value1 || @@ -236,7 +236,7 @@ static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned enum si_tracked_reg reg, unsigned value1, unsigned value2, unsigned value3, unsigned value4) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (((sctx->tracked_regs.reg_saved >> reg) & 0xf) != 0xf || sctx->tracked_regs.reg_value[reg] != value1 || @@ -263,7 +263,7 @@ static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset, unsigned *value, unsigned *saved_val, unsigned num) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; int i, j; for (i = 0; i < num; i++) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 791f438f161..abf11665fb8 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -376,7 +376,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); /* Disable profiling on compute queues. */ - if (cs != sctx->gfx_cs || !sctx->screen->info.has_graphics) { + if (cs != &sctx->gfx_cs || !sctx->screen->info.has_graphics) { radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0); radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 0); } @@ -391,7 +391,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf * if we are on a compute queue. */ if (sctx->chip_class >= GFX9 && - (cs != sctx->gfx_cs || !sctx->screen->info.has_graphics)) { + (cs != &sctx->gfx_cs || !sctx->screen->info.has_graphics)) { radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, sctx->chip_class >= GFX10 ? 0x20 : 0); } @@ -444,7 +444,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute struct si_shader *shader, const amd_kernel_code_t *code_object, unsigned offset, bool *prefetch) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct ac_shader_config inline_config = {0}; struct ac_shader_config *config; uint64_t shader_va; @@ -491,7 +491,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute sctx->scratch_waves, config->scratch_bytes_per_wave, config->scratch_bytes_per_wave * sctx->scratch_waves); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->scratch_bo, RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->scratch_bo, RADEON_USAGE_READWRITE, RADEON_PRIO_SCRATCH_BUFFER); } @@ -502,7 +502,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute shader_va += sizeof(amd_kernel_code_t); } - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->bo, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2); @@ -536,7 +536,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx, const amd_kernel_code_t *code_object, unsigned user_sgpr) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address; unsigned max_private_element_size = @@ -573,7 +573,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_ const struct pipe_grid_info *info, uint64_t kernel_args_va) { struct si_compute *program = sctx->cs_shader_state.program; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; static const enum amd_code_property_mask_t workgroup_count_masks[] = { AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, @@ -618,7 +618,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_ fprintf(stderr, "Error: Failed to allocate dispatch " "packet."); } - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, dispatch_buf, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, dispatch_buf, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); dispatch_va = dispatch_buf->gpu_address + dispatch_offset; @@ -674,7 +674,7 @@ static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_co COMPUTE_DBG(sctx->screen, "input %u : %u\n", i, kernel_args[i]); } - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, input_buffer, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, input_buffer, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); si_setup_user_sgprs_co_v2(sctx, code_object, info, kernel_args_va); @@ -686,7 +686,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr { struct si_compute *program = sctx->cs_shader_state.program; struct si_shader_selector *sel = &program->sel; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 + 4 * SI_NUM_RESOURCE_SGPRS; unsigned block_size_reg = grid_size_reg + /* 12 bytes = 3 dwords. */ @@ -696,7 +696,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr if (sel->info.uses_grid_size) { if (info->indirect) { for (unsigned i = 0; i < 3; ++i) { - si_cp_copy_data(sctx, sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i, + si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i, COPY_DATA_SRC_MEM, si_resource(info->indirect), info->indirect_offset + 4 * i); } @@ -724,7 +724,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info) { struct si_screen *sscreen = sctx->screen; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off; unsigned threads_per_threadgroup = info->block[0] * info->block[1] * info->block[2]; unsigned waves_per_threadgroup = @@ -775,7 +775,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ if (info->indirect) { uint64_t base_va = si_resource(info->indirect)->gpu_address; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(info->indirect), RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(info->indirect), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1)); @@ -845,7 +845,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info /* If we're using a secure context, determine if cs must be secure or not */ if (unlikely(radeon_uses_secure_bos(sctx->ws))) { bool secure = si_compute_resources_check_encrypted(sctx); - if (secure != sctx->ws->cs_is_secure(sctx->gfx_cs)) { + if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) { si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW | RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); @@ -856,7 +856,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info si_compute_resources_add_all_to_bo_list(sctx); if (!sctx->cs_shader_state.initialized) { - si_emit_initial_compute_regs(sctx, sctx->gfx_cs); + si_emit_initial_compute_regs(sctx, &sctx->gfx_cs); sctx->cs_shader_state.emitted_program = NULL; sctx->cs_shader_state.initialized = true; @@ -880,7 +880,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info if (!buffer) { continue; } - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_COMPUTE_GLOBAL); } diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 62d022c394d..80365915bec 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -354,7 +354,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_ clear_value_size, coher); } else { assert(clear_value_size == 4); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, offset, aligned_size, *clear_value, 0, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, offset, aligned_size, *clear_value, 0, coher, get_cache_policy(sctx, coher, size)); } diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 01bf52c5a6b..37ef1dee16d 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -905,7 +905,7 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) if (sctx->index_ring) return true; - if (!sctx->prim_discard_compute_cs) { + if (!sctx->prim_discard_compute_cs.priv) { struct radeon_winsys *ws = sctx->ws; unsigned gds_size = VERTEX_COUNTER_GDS_MODE == 1 ? GDS_SIZE_UNORDERED : VERTEX_COUNTER_GDS_MODE == 2 ? 8 : 0; @@ -917,7 +917,7 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) if (!sctx->gds) return false; - ws->cs_add_buffer(sctx->gfx_cs, sctx->gds, RADEON_USAGE_READWRITE, 0, 0); + ws->cs_add_buffer(&sctx->gfx_cs, sctx->gds, RADEON_USAGE_READWRITE, 0, 0); } if (num_oa_counters) { assert(gds_size); @@ -926,12 +926,11 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) if (!sctx->gds_oa) return false; - ws->cs_add_buffer(sctx->gfx_cs, sctx->gds_oa, RADEON_USAGE_READWRITE, 0, 0); + ws->cs_add_buffer(&sctx->gfx_cs, sctx->gds_oa, RADEON_USAGE_READWRITE, 0, 0); } - sctx->prim_discard_compute_cs = - ws->cs_add_parallel_compute_ib(sctx->gfx_cs, num_oa_counters > 0); - if (!sctx->prim_discard_compute_cs) + if (!ws->cs_add_parallel_compute_ib(&sctx->prim_discard_compute_cs, + &sctx->gfx_cs, num_oa_counters > 0)) return false; } @@ -966,7 +965,7 @@ si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct pipe if (!si_initialize_prim_discard_cmdbuf(sctx)) return SI_PRIM_DISCARD_DISABLED; - struct radeon_cmdbuf *gfx_cs = sctx->gfx_cs; + struct radeon_cmdbuf *gfx_cs = &sctx->gfx_cs; unsigned prim = info->mode; unsigned count = total_count; unsigned instance_count = info->instance_count; @@ -1093,7 +1092,7 @@ si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct pipe } /* The compute IB is always chained, but we need to call cs_check_space to add more space. */ - struct radeon_cmdbuf *cs = sctx->prim_discard_compute_cs; + struct radeon_cmdbuf *cs = &sctx->prim_discard_compute_cs; ASSERTED bool compute_has_space = sctx->ws->cs_check_space(cs, need_compute_dw, false); assert(compute_has_space); assert(si_check_ring_space(sctx, out_indexbuf_size)); @@ -1102,7 +1101,7 @@ si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct pipe void si_compute_signal_gfx(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->prim_discard_compute_cs; + struct radeon_cmdbuf *cs = &sctx->prim_discard_compute_cs; unsigned writeback_L2_flags = 0; /* The writeback L2 flags vary with each chip generation. */ @@ -1141,8 +1140,8 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, unsigned base_vertex, uint64_t input_indexbuf_va, unsigned input_indexbuf_num_elements) { - struct radeon_cmdbuf *gfx_cs = sctx->gfx_cs; - struct radeon_cmdbuf *cs = sctx->prim_discard_compute_cs; + struct radeon_cmdbuf *gfx_cs = &sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->prim_discard_compute_cs; unsigned num_prims_per_instance = u_decomposed_prims_for_vertices(info->mode, count); if (!num_prims_per_instance) return; diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 67362f8ad18..18b5ed01a6c 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -140,7 +140,7 @@ void si_cp_dma_wait_for_idle(struct si_context *sctx) * DMA request, however, the CP will see the sync flag and still wait * for all DMAs to complete. */ - si_emit_cp_dma(sctx, sctx->gfx_cs, 0, 0, 0, CP_DMA_SYNC, L2_BYPASS); + si_emit_cp_dma(sctx, &sctx->gfx_cs, 0, 0, 0, CP_DMA_SYNC, L2_BYPASS); } static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst, @@ -168,10 +168,10 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst /* This must be done after need_cs_space. */ if (!(user_flags & SI_CPDMA_SKIP_BO_LIST_UPDATE)) { if (dst) - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(dst), RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(dst), RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); if (src) - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(src), RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(src), RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); } @@ -278,7 +278,7 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns user_flags, coher, is_first, &dma_flags); va = sctx->scratch_buffer->gpu_address; - si_emit_cp_dma(sctx, sctx->gfx_cs, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags, cache_policy); + si_emit_cp_dma(sctx, &sctx->gfx_cs, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags, cache_policy); } /** @@ -342,7 +342,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, !(user_flags & SI_CPDMA_SKIP_TMZ))) { bool secure = src && (si_resource(src)->flags & RADEON_FLAG_ENCRYPTED); assert(!secure || (!dst || (si_resource(dst)->flags & RADEON_FLAG_ENCRYPTED))); - if (secure != sctx->ws->cs_is_secure(sctx->gfx_cs)) { + if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) { si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW | RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); } @@ -365,7 +365,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, si_cp_dma_prepare(sctx, dst, src, byte_count, size + skipped_size + realign_size, user_flags, coher, &is_first, &dma_flags); - si_emit_cp_dma(sctx, sctx->gfx_cs, main_dst_offset, main_src_offset, byte_count, dma_flags, + si_emit_cp_dma(sctx, &sctx->gfx_cs, main_dst_offset, main_src_offset, byte_count, dma_flags, cache_policy); size -= byte_count; @@ -380,7 +380,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, si_cp_dma_prepare(sctx, dst, src, skipped_size, skipped_size + realign_size, user_flags, coher, &is_first, &dma_flags); - si_emit_cp_dma(sctx, sctx->gfx_cs, dst_offset, src_offset, skipped_size, dma_flags, + si_emit_cp_dma(sctx, &sctx->gfx_cs, dst_offset, src_offset, skipped_size, dma_flags, cache_policy); } @@ -536,15 +536,15 @@ void si_test_gds(struct si_context *sctx) src = pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_DEFAULT, 16); dst = pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_DEFAULT, 16); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 0, 4, 0xabcdef01, 0, SI_COHERENCY_SHADER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, src, 0, 4, 0xabcdef01, 0, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 4, 4, 0x23456789, 0, SI_COHERENCY_SHADER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, src, 4, 4, 0x23456789, 0, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 8, 4, 0x87654321, 0, SI_COHERENCY_SHADER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, src, 8, 4, 0x87654321, 0, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 12, 4, 0xfedcba98, 0, SI_COHERENCY_SHADER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, src, 12, 4, 0xfedcba98, 0, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, 16, 0xdeadbeef, 0, SI_COHERENCY_SHADER, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, 0, 16, 0xdeadbeef, 0, SI_COHERENCY_SHADER, L2_BYPASS); si_cp_dma_copy_buffer(sctx, NULL, src, offset, 0, 16, 0, SI_COHERENCY_NONE, L2_BYPASS); @@ -556,7 +556,7 @@ void si_test_gds(struct si_context *sctx) ? "pass" : "fail"); - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, NULL, offset, 16, 0xc1ea4146, 0, SI_COHERENCY_NONE, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, NULL, offset, 16, 0xc1ea4146, 0, SI_COHERENCY_NONE, L2_BYPASS); si_cp_dma_copy_buffer(sctx, dst, NULL, 0, offset, 16, 0, SI_COHERENCY_NONE, L2_BYPASS); @@ -574,7 +574,7 @@ void si_test_gds(struct si_context *sctx) void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned offset, unsigned size, unsigned dst_sel, unsigned engine, const void *data) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; assert(offset % 4 == 0); assert(size % 4 == 0); @@ -598,10 +598,10 @@ void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned { /* cs can point to the compute IB, which has the buffer list in gfx_cs. */ if (dst) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); } if (src) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); } uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset; diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index 5df3b764096..d48fb14278b 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -162,7 +162,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) if (sctx->shadowed_regs) { /* We need to clear the shadowed reg buffer. */ - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, &sctx->shadowed_regs->b.b, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowed_regs->b.b, 0, sctx->shadowed_regs->bo_size, 0, 0, SI_COHERENCY_CP, L2_BYPASS); @@ -171,10 +171,10 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) si_create_shadowing_ib_preamble(sctx); /* Initialize shadowed registers as follows. */ - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, sctx->shadowed_regs, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs, RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); si_pm4_emit(sctx, shadowing_preamble); - ac_emulate_clear_state(&sctx->screen->info, sctx->gfx_cs, + ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, radeon_set_context_reg_seq_array); si_pm4_emit(sctx, sctx->cs_preamble_state); @@ -187,7 +187,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) /* Setup preemption. The shadowing preamble will be executed as a preamble IB, * which will load register values from memory on a context switch. */ - sctx->ws->cs_setup_preemption(sctx->gfx_cs, shadowing_preamble->pm4, + sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->pm4, shadowing_preamble->ndw); si_pm4_free_state(sctx, shadowing_preamble, ~0); } diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 725d7b0354b..c242c8e5d25 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -424,20 +424,20 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f) ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, chunk->gfx_end - chunk->gfx_begin, &last_trace_id, map ? 1 : 0, "IB", ctx->chip_class, NULL, NULL); } else { - si_parse_current_ib(f, ctx->gfx_cs, chunk->gfx_begin, chunk->gfx_end, &last_trace_id, + si_parse_current_ib(f, &ctx->gfx_cs, chunk->gfx_begin, chunk->gfx_end, &last_trace_id, map ? 1 : 0, "IB", ctx->chip_class); } } if (chunk->compute_end != chunk->compute_begin) { - assert(ctx->prim_discard_compute_cs); + assert(ctx->prim_discard_compute_cs.priv); if (scs->flushed) { ac_parse_ib(f, scs->compute.ib + chunk->compute_begin, chunk->compute_end - chunk->compute_begin, &last_compute_trace_id, map ? 1 : 0, "Compute IB", ctx->chip_class, NULL, NULL); } else { - si_parse_current_ib(f, ctx->prim_discard_compute_cs, chunk->compute_begin, + si_parse_current_ib(f, &ctx->prim_discard_compute_cs, chunk->compute_begin, chunk->compute_end, &last_compute_trace_id, map ? 1 : 0, "Compute IB", ctx->chip_class); } @@ -461,12 +461,12 @@ static void si_log_cs(struct si_context *ctx, struct u_log_context *log, bool du assert(ctx->current_saved_cs); struct si_saved_cs *scs = ctx->current_saved_cs; - unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw; + unsigned gfx_cur = ctx->gfx_cs.prev_dw + ctx->gfx_cs.current.cdw; unsigned compute_cur = 0; - if (ctx->prim_discard_compute_cs) + if (ctx->prim_discard_compute_cs.priv) compute_cur = - ctx->prim_discard_compute_cs->prev_dw + ctx->prim_discard_compute_cs->current.cdw; + ctx->prim_discard_compute_cs.prev_dw + ctx->prim_discard_compute_cs.current.cdw; if (!dump_bo_list && gfx_cur == scs->gfx_last_dw && compute_cur == scs->compute_last_dw) return; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 8f688fa3650..ec3ee50597e 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -164,7 +164,7 @@ static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors util_memcpy_cpu_to_le32(ptr, (char *)desc->list + first_slot_offset, upload_size); desc->gpu_list = ptr - first_slot_offset / 4; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); /* The shader pointer should point to slot 0. */ @@ -185,7 +185,7 @@ si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *de if (!desc->buffer) return; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } @@ -906,7 +906,7 @@ void si_update_ps_colorbuf0_slot(struct si_context *sctx) si_set_shader_image_desc(sctx, &view, true, desc, desc + 8); pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_RW_IMAGE); buffers->enabled_mask |= 1llu << slot; } else { @@ -1003,7 +1003,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, int i = u_bit_scan64(&mask); radeon_add_to_buffer_list( - sctx, sctx->gfx_cs, si_resource(buffers->buffers[i]), + sctx, &sctx->gfx_cs, si_resource(buffers->buffers[i]), buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf); } @@ -1062,14 +1062,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) if (!sctx->vertex_buffer[vb].buffer.resource) continue; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sctx->vertex_buffer[vb].buffer.resource), RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } if (!sctx->vb_descriptors_buffer) return; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } @@ -1393,7 +1393,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource } pipe_resource_reference(&buffers->buffers[slot], buffer); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE, buffers->priority); buffers->enabled_mask |= 1llu << slot; } else { @@ -1963,7 +1963,7 @@ static void si_emit_shader_pointer_body(struct si_screen *sscreen, struct radeon static void si_emit_shader_pointer(struct si_context *sctx, struct si_descriptors *desc, unsigned sh_base) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned sh_offset = sh_base + desc->shader_userdata_offset; si_emit_shader_pointer_head(cs, sh_offset, 1); @@ -1976,7 +1976,7 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx, unsigne if (!sh_base) return; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned mask = sctx->shader_pointers_dirty & pointer_mask; while (mask) { @@ -2044,7 +2044,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx) sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); if (sctx->vertex_buffer_pointer_dirty && sctx->num_vertex_elements) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; /* Find the location of the VB descriptor pointer. */ unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR; @@ -2064,7 +2064,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx) if (sctx->vertex_buffer_user_sgprs_dirty && sctx->num_vertex_elements && sctx->screen->num_vbos_in_user_sgprs) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned num_desc = MIN2(sctx->num_vertex_elements, sctx->screen->num_vbos_in_user_sgprs); unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4; @@ -2081,7 +2081,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx) void si_emit_compute_shader_pointers(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel; unsigned base = R_00B900_COMPUTE_USER_DATA_0; diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index 98f37f2c8a4..2fa77871918 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -27,7 +27,7 @@ static void si_dma_emit_wait_idle(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; /* NOP waits for idle. */ if (sctx->chip_class >= GFX7) @@ -38,7 +38,7 @@ static void si_dma_emit_wait_idle(struct si_context *sctx) void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, uint64_t offset) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; uint64_t va = dst->gpu_address + offset; if (sctx->chip_class == GFX6) { @@ -65,7 +65,7 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, uin void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; unsigned i, ncopy, csize; struct si_resource *sdst = si_resource(dst); @@ -73,7 +73,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, ui assert(size); assert(size % 4 == 0); - if (!cs || dst->flags & PIPE_RESOURCE_FLAG_SPARSE || + if (!cs->priv || dst->flags & PIPE_RESOURCE_FLAG_SPARSE || sctx->screen->debug_flags & DBG(NO_SDMA_CLEARS) || unlikely(radeon_uses_secure_bos(sctx->ws))) { sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4); @@ -129,12 +129,12 @@ void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, uint64_t size) { - struct radeon_cmdbuf *cs = sctx->sdma_cs; + struct radeon_cmdbuf *cs = &sctx->sdma_cs; unsigned i, ncopy, csize; struct si_resource *sdst = si_resource(dst); struct si_resource *ssrc = si_resource(src); - if (!cs || dst->flags & PIPE_RESOURCE_FLAG_SPARSE || src->flags & PIPE_RESOURCE_FLAG_SPARSE || + if (!cs->priv || dst->flags & PIPE_RESOURCE_FLAG_SPARSE || src->flags & PIPE_RESOURCE_FLAG_SPARSE || (ssrc->flags & RADEON_FLAG_ENCRYPTED) != (sdst->flags & RADEON_FLAG_ENCRYPTED)) { si_copy_buffer(sctx, dst, src, dst_offset, src_offset, size); return; @@ -214,8 +214,8 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct si_resour struct si_resource *src) { struct radeon_winsys *ws = ctx->ws; - uint64_t vram = ctx->sdma_cs->used_vram; - uint64_t gtt = ctx->sdma_cs->used_gart; + uint64_t vram = ctx->sdma_cs.used_vram; + uint64_t gtt = ctx->sdma_cs.used_gart; if (dst) { vram += dst->vram_usage; @@ -227,9 +227,9 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct si_resour } /* Flush the GFX IB if DMA depends on it. */ - if (!ctx->sdma_uploads_in_progress && radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) && - ((dst && ws->cs_is_buffer_referenced(ctx->gfx_cs, dst->buf, RADEON_USAGE_READWRITE)) || - (src && ws->cs_is_buffer_referenced(ctx->gfx_cs, src->buf, RADEON_USAGE_WRITE)))) + if (!ctx->sdma_uploads_in_progress && radeon_emitted(&ctx->gfx_cs, ctx->initial_gfx_cs_size) && + ((dst && ws->cs_is_buffer_referenced(&ctx->gfx_cs, dst->buf, RADEON_USAGE_READWRITE)) || + (src && ws->cs_is_buffer_referenced(&ctx->gfx_cs, src->buf, RADEON_USAGE_WRITE)))) si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); bool use_secure_cmd = false; @@ -256,28 +256,28 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct si_resour */ num_dw++; /* for emit_wait_idle below */ if (!ctx->sdma_uploads_in_progress && - (use_secure_cmd != ctx->ws->cs_is_secure(ctx->sdma_cs) || - !ws->cs_check_space(ctx->sdma_cs, num_dw, false) || - ctx->sdma_cs->used_vram + ctx->sdma_cs->used_gart > 64 * 1024 * 1024 || - !radeon_cs_memory_below_limit(ctx->screen, ctx->sdma_cs, vram, gtt))) { + (use_secure_cmd != ctx->ws->cs_is_secure(&ctx->sdma_cs) || + !ws->cs_check_space(&ctx->sdma_cs, num_dw, false) || + ctx->sdma_cs.used_vram + ctx->sdma_cs.used_gart > 64 * 1024 * 1024 || + !radeon_cs_memory_below_limit(ctx->screen, &ctx->sdma_cs, vram, gtt))) { si_flush_dma_cs(ctx, PIPE_FLUSH_ASYNC | RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); - assert(ctx->ws->cs_is_secure(ctx->sdma_cs) == use_secure_cmd); - assert((num_dw + ctx->sdma_cs->current.cdw) <= ctx->sdma_cs->current.max_dw); + assert(ctx->ws->cs_is_secure(&ctx->sdma_cs) == use_secure_cmd); + assert((num_dw + ctx->sdma_cs.current.cdw) <= ctx->sdma_cs.current.max_dw); } /* Wait for idle if either buffer has been used in the IB before to * prevent read-after-write hazards. */ - if ((dst && ws->cs_is_buffer_referenced(ctx->sdma_cs, dst->buf, RADEON_USAGE_READWRITE)) || - (src && ws->cs_is_buffer_referenced(ctx->sdma_cs, src->buf, RADEON_USAGE_WRITE))) + if ((dst && ws->cs_is_buffer_referenced(&ctx->sdma_cs, dst->buf, RADEON_USAGE_READWRITE)) || + (src && ws->cs_is_buffer_referenced(&ctx->sdma_cs, src->buf, RADEON_USAGE_WRITE))) si_dma_emit_wait_idle(ctx); unsigned sync = ctx->sdma_uploads_in_progress ? 0 : RADEON_USAGE_SYNCHRONIZED; if (dst) { - ws->cs_add_buffer(ctx->sdma_cs, dst->buf, RADEON_USAGE_WRITE | sync, dst->domains, 0); + ws->cs_add_buffer(&ctx->sdma_cs, dst->buf, RADEON_USAGE_WRITE | sync, dst->domains, 0); } if (src) { - ws->cs_add_buffer(ctx->sdma_cs, src->buf, RADEON_USAGE_READ | sync, src->domains, 0); + ws->cs_add_buffer(&ctx->sdma_cs, src->buf, RADEON_USAGE_READ | sync, src->domains, 0); } /* this function is called before all DMA calls, so increment this. */ @@ -286,7 +286,7 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct si_resour void si_flush_dma_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { - struct radeon_cmdbuf *cs = ctx->sdma_cs; + struct radeon_cmdbuf *cs = &ctx->sdma_cs; struct radeon_saved_cs saved; bool check_vm = (ctx->screen->debug_flags & DBG(CHECK_VM)) != 0; diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 744127f074a..d1521485444 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -74,7 +74,7 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags; unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel); - bool compute_ib = !ctx->has_graphics || cs == ctx->prim_discard_compute_cs; + bool compute_ib = !ctx->has_graphics || cs == &ctx->prim_discard_compute_cs; if (ctx->chip_class >= GFX9 || (compute_ib && ctx->chip_class >= GFX7)) { /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion @@ -87,7 +87,7 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne if (ctx->chip_class == GFX9 && !compute_ib && query_type != PIPE_QUERY_OCCLUSION_COUNTER && query_type != PIPE_QUERY_OCCLUSION_PREDICATE && query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - struct si_resource *scratch = unlikely(ctx->ws->cs_is_secure(ctx->gfx_cs)) ? + struct si_resource *scratch = unlikely(ctx->ws->cs_is_secure(&ctx->gfx_cs)) ? ctx->eop_bug_scratch_tmz : ctx->eop_bug_scratch; assert(16 * ctx->screen->info.max_render_backends <= scratch->b.b.width0); @@ -96,7 +96,7 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne radeon_emit(cs, scratch->gpu_address); radeon_emit(cs, scratch->gpu_address >> 32); - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, scratch, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); } @@ -125,7 +125,7 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne radeon_emit(cs, 0); /* immediate data */ radeon_emit(cs, 0); /* unused */ - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, scratch, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); } @@ -138,7 +138,7 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne } if (buf) { - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); } } @@ -168,14 +168,14 @@ static void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_h { struct radeon_winsys *ws = sctx->ws; - if (sctx->sdma_cs) - ws->cs_add_fence_dependency(sctx->sdma_cs, fence, 0); - ws->cs_add_fence_dependency(sctx->gfx_cs, fence, 0); + if (sctx->sdma_cs.priv) + ws->cs_add_fence_dependency(&sctx->sdma_cs, fence, 0); + ws->cs_add_fence_dependency(&sctx->gfx_cs, fence, 0); } static void si_add_syncobj_signal(struct si_context *sctx, struct pipe_fence_handle *fence) { - sctx->ws->cs_add_syncobj_signal(sctx->gfx_cs, fence); + sctx->ws->cs_add_syncobj_signal(&sctx->gfx_cs, fence); } static void si_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **dst, @@ -252,8 +252,8 @@ static void si_fine_fence_set(struct si_context *ctx, struct si_fine_fence *fine } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { uint64_t fence_va = fine->buf->gpu_address + fine->offset; - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - si_cp_release_mem(ctx, ctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, fine->buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); + si_cp_release_mem(ctx, &ctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, NULL, fence_va, 0x80000000, PIPE_QUERY_GPU_FINISHED); } else { @@ -486,18 +486,18 @@ static void si_flush_all_queues(struct pipe_context *ctx, } /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ - if (sctx->sdma_cs) + if (sctx->sdma_cs.priv) si_flush_dma_cs(sctx, rflags, fence ? &sdma_fence : NULL); if (force_flush) { sctx->initial_gfx_cs_size = 0; } - if (!radeon_emitted(sctx->gfx_cs, sctx->initial_gfx_cs_size)) { + if (!radeon_emitted(&sctx->gfx_cs, sctx->initial_gfx_cs_size)) { if (fence) ws->fence_reference(&gfx_fence, sctx->last_gfx_fence); if (!(flags & PIPE_FLUSH_DEFERRED)) - ws->cs_sync_flush(sctx->gfx_cs); + ws->cs_sync_flush(&sctx->gfx_cs); } else { /* Instead of flushing, create a deferred fence. Constraints: * - the gallium frontend must allow a deferred flush. @@ -506,7 +506,7 @@ static void si_flush_all_queues(struct pipe_context *ctx, * Thread safety in fence_finish must be ensured by the gallium frontend. */ if (flags & PIPE_FLUSH_DEFERRED && !(flags & PIPE_FLUSH_FENCE_FD) && fence) { - gfx_fence = sctx->ws->cs_get_next_fence(sctx->gfx_cs); + gfx_fence = sctx->ws->cs_get_next_fence(&sctx->gfx_cs); deferred_fence = true; } else { si_flush_gfx_cs(sctx, rflags, fence ? &gfx_fence : NULL); @@ -552,9 +552,9 @@ static void si_flush_all_queues(struct pipe_context *ctx, assert(!fine.buf); finish: if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) { - if (sctx->sdma_cs) - ws->cs_sync_flush(sctx->sdma_cs); - ws->cs_sync_flush(sctx->gfx_cs); + if (sctx->sdma_cs.priv) + ws->cs_sync_flush(&sctx->sdma_cs); + ws->cs_sync_flush(&sctx->gfx_cs); } } diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index df554473bd5..e21986fed1b 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -32,7 +32,7 @@ /* initialize */ void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; /* There is no need to flush the DMA IB here, because * si_need_dma_space always flushes the GFX IB if there is @@ -45,7 +45,7 @@ void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) * that have been added (cs_add_buffer) and two counters in the pipe * driver for those that haven't been added yet. */ - if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, ctx->gfx_cs, ctx->vram, ctx->gtt))) { + if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->vram, ctx->gtt))) { ctx->gtt = 0; ctx->vram = 0; si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); @@ -70,7 +70,7 @@ void si_unref_sdma_uploads(struct si_context *sctx) void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; struct radeon_winsys *ws = ctx->ws; struct si_screen *sscreen = ctx->screen; const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; @@ -125,7 +125,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h * If the driver flushes the GFX IB internally, and it should never ask * for a fence handle. */ - assert(!radeon_emitted(ctx->sdma_cs, 0) || fence == NULL); + assert(!radeon_emitted(&ctx->sdma_cs, 0) || fence == NULL); /* Update the sdma_uploads list by flushing the uploader. */ u_upload_unmap(ctx->b.const_uploader); @@ -144,11 +144,11 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h si_unref_sdma_uploads(ctx); /* Flush SDMA (preamble IB). */ - if (radeon_emitted(ctx->sdma_cs, 0)) + if (radeon_emitted(&ctx->sdma_cs, 0)) si_flush_dma_cs(ctx, flags, NULL); - if (radeon_emitted(ctx->prim_discard_compute_cs, 0)) { - struct radeon_cmdbuf *compute_cs = ctx->prim_discard_compute_cs; + if (radeon_emitted(&ctx->prim_discard_compute_cs, 0)) { + struct radeon_cmdbuf *compute_cs = &ctx->prim_discard_compute_cs; si_compute_signal_gfx(ctx); /* Make sure compute shaders are idle before leaving the IB, so that @@ -206,9 +206,9 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h if (si_compute_prim_discard_enabled(ctx)) { /* The compute IB can start after the previous gfx IB starts. */ - if (radeon_emitted(ctx->prim_discard_compute_cs, 0) && ctx->last_gfx_fence) { + if (radeon_emitted(&ctx->prim_discard_compute_cs, 0) && ctx->last_gfx_fence) { ctx->ws->cs_add_fence_dependency( - ctx->gfx_cs, ctx->last_gfx_fence, + &ctx->gfx_cs, ctx->last_gfx_fence, RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY | RADEON_DEPENDENCY_START_FENCE); } @@ -290,16 +290,16 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx) si_trace_emit(ctx); - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->current_saved_cs->trace_buf, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->current_saved_cs->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); } static void si_add_gds_to_buffer_list(struct si_context *sctx) { if (sctx->gds) { - sctx->ws->cs_add_buffer(sctx->gfx_cs, sctx->gds, RADEON_USAGE_READWRITE, 0, 0); + sctx->ws->cs_add_buffer(&sctx->gfx_cs, sctx->gds, RADEON_USAGE_READWRITE, 0, 0); if (sctx->gds_oa) { - sctx->ws->cs_add_buffer(sctx->gfx_cs, sctx->gds_oa, RADEON_USAGE_READWRITE, 0, 0); + sctx->ws->cs_add_buffer(&sctx->gfx_cs, sctx->gds_oa, RADEON_USAGE_READWRITE, 0, 0); } } } @@ -404,7 +404,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) */ ctx->prim_discard_vertex_count_threshold = UINT_MAX; - is_secure = ctx->ws->cs_is_secure(ctx->gfx_cs); + is_secure = ctx->ws->cs_is_secure(&ctx->gfx_cs); } if (ctx->is_debug) @@ -429,10 +429,10 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) if (ctx->chip_class == GFX10 && ctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) ctx->flags |= SI_CONTEXT_VGT_FLUSH; - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->border_color_buffer, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer, RADEON_USAGE_READ, RADEON_PRIO_BORDER_COLORS); if (ctx->shadowed_regs) { - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->shadowed_regs, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs, RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } @@ -445,12 +445,12 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) } if (!ctx->has_graphics) { - ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw; + ctx->initial_gfx_cs_size = ctx->gfx_cs.current.cdw; return; } if (ctx->tess_rings) { - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, unlikely(is_secure) ? si_resource(ctx->tess_rings_tmz) : si_resource(ctx->tess_rings), RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS); } @@ -575,8 +575,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) if (!list_is_empty(&ctx->active_queries)) si_resume_queries(ctx); - assert(!ctx->gfx_cs->prev_dw); - ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw; + assert(!ctx->gfx_cs.prev_dw); + ctx->initial_gfx_cs_size = ctx->gfx_cs.current.cdw; ctx->small_prim_cull_info_dirty = ctx->small_prim_cull_info_buf != NULL; ctx->prim_discard_compute_ib_initialized = false; diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index fc82f3cd3b2..a1b516f48d8 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -703,7 +703,7 @@ static struct si_pc_block *lookup_group(struct si_perfcounters *pc, unsigned *in static void si_pc_emit_instance(struct si_context *sctx, int se, int instance) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned value = S_030800_SH_BROADCAST_WRITES(1); if (se >= 0) { @@ -728,7 +728,7 @@ static void si_pc_emit_instance(struct si_context *sctx, int se, int instance) static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2); radeon_emit(cs, shaders & 0x7f); @@ -739,7 +739,7 @@ static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block unsigned *selectors) { struct si_pc_block_base *regs = block->b->b; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned idx; unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK; unsigned dw; @@ -830,9 +830,9 @@ static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; - si_cp_copy_data(sctx, sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, + si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, COPY_DATA_IMM, NULL, 1); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, @@ -847,7 +847,7 @@ static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer * do it again in here. */ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); @@ -866,7 +866,7 @@ static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block, uint64_t va) { struct si_pc_block_base *regs = block->b->b; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned idx; unsigned reg = regs->counter0_lo; unsigned reg_delta = 8; @@ -922,10 +922,10 @@ static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery static void si_inhibit_clockgating(struct si_context *sctx, bool inhibit) { if (sctx->chip_class >= GFX10) { - radeon_set_uconfig_reg(sctx->gfx_cs, R_037390_RLC_PERFMON_CLK_CNTL, + radeon_set_uconfig_reg(&sctx->gfx_cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit)); } else if (sctx->chip_class >= GFX8) { - radeon_set_uconfig_reg(sctx->gfx_cs, R_0372FC_RLC_PERFMON_CLK_CNTL, + radeon_set_uconfig_reg(&sctx->gfx_cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit)); } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 91818288f42..4982f08f9ee 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -284,10 +284,8 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->sh_query_result_shader) sctx->b.delete_compute_state(&sctx->b, sctx->sh_query_result_shader); - if (sctx->gfx_cs) - sctx->ws->cs_destroy(sctx->gfx_cs); - if (sctx->sdma_cs) - sctx->ws->cs_destroy(sctx->sdma_cs); + sctx->ws->cs_destroy(&sctx->gfx_cs); + sctx->ws->cs_destroy(&sctx->sdma_cs); if (sctx->ctx) sctx->ws->ctx_destroy(sctx->ctx); @@ -520,11 +518,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign */ if (sscreen->info.num_rings[RING_DMA] && !(sscreen->debug_flags & DBG(NO_SDMA)) && sscreen->debug_flags & DBG(FORCE_SDMA)) { - sctx->sdma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA, (void *)si_flush_dma_cs, sctx, - stop_exec_on_failure); + sctx->ws->cs_create(&sctx->sdma_cs, sctx->ctx, RING_DMA, (void *)si_flush_dma_cs, + sctx, stop_exec_on_failure); } - bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->sdma_cs; + bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->sdma_cs.priv; sctx->b.const_uploader = u_upload_create(&sctx->b, 256 * 1024, 0, PIPE_USAGE_DEFAULT, SI_RESOURCE_FLAG_32BIT | @@ -535,8 +533,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign if (use_sdma_upload) u_upload_enable_flush_explicit(sctx->b.const_uploader); - sctx->gfx_cs = ws->cs_create(sctx->ctx, sctx->has_graphics ? RING_GFX : RING_COMPUTE, - (void *)si_flush_gfx_cs, sctx, stop_exec_on_failure); + ws->cs_create(&sctx->gfx_cs, sctx->ctx, sctx->has_graphics ? RING_GFX : RING_COMPUTE, + (void *)si_flush_gfx_cs, sctx, stop_exec_on_failure); /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); @@ -723,14 +721,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign &sctx->sample_positions); /* The remainder of this function initializes the gfx CS and must be last. */ - assert(sctx->gfx_cs->current.cdw == 0); + assert(sctx->gfx_cs.current.cdw == 0); if (sctx->has_graphics) { si_init_cp_reg_shadowing(sctx); } si_begin_new_gfx_cs(sctx, true); - assert(sctx->gfx_cs->current.cdw == sctx->initial_gfx_cs_size); + assert(sctx->gfx_cs.current.cdw == sctx->initial_gfx_cs_size); /* Initialize per-context buffers. */ if (sctx->wait_mem_scratch) @@ -750,7 +748,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign &clear_value, 4, SI_COHERENCY_SHADER, true); } - sctx->initial_gfx_cs_size = sctx->gfx_cs->current.cdw; + sctx->initial_gfx_cs_size = sctx->gfx_cs.current.cdw; return &sctx->b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); @@ -906,11 +904,11 @@ static void si_test_gds_memory_management(struct si_context *sctx, unsigned allo unsigned alignment, enum radeon_bo_domain domain) { struct radeon_winsys *ws = sctx->ws; - struct radeon_cmdbuf *cs[8]; + struct radeon_cmdbuf cs[8]; struct pb_buffer *gds_bo[ARRAY_SIZE(cs)]; for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { - cs[i] = ws->cs_create(sctx->ctx, RING_COMPUTE, NULL, NULL, false); + ws->cs_create(&cs[i], sctx->ctx, RING_COMPUTE, NULL, NULL, false); gds_bo[i] = ws->buffer_create(ws, alloc_size, alignment, domain, 0); assert(gds_bo[i]); } @@ -923,12 +921,12 @@ static void si_test_gds_memory_management(struct si_context *sctx, unsigned allo * to make the GPU busy for a moment. */ si_cp_dma_clear_buffer( - sctx, cs[i], NULL, 0, alloc_size, 0, + sctx, &cs[i], NULL, 0, alloc_size, 0, SI_CPDMA_SKIP_BO_LIST_UPDATE | SI_CPDMA_SKIP_CHECK_CS_SPACE | SI_CPDMA_SKIP_GFX_SYNC, 0, 0); - ws->cs_add_buffer(cs[i], gds_bo[i], RADEON_USAGE_READWRITE, domain, 0); - ws->cs_flush(cs[i], PIPE_FLUSH_ASYNC, NULL); + ws->cs_add_buffer(&cs[i], gds_bo[i], RADEON_USAGE_READWRITE, domain, 0); + ws->cs_flush(&cs[i], PIPE_FLUSH_ASYNC, NULL); } } exit(0); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2936e571585..743f112aa26 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -902,8 +902,8 @@ struct si_context { struct radeon_winsys *ws; struct radeon_winsys_ctx *ctx; - struct radeon_cmdbuf *gfx_cs; /* compute IB if graphics is disabled */ - struct radeon_cmdbuf *sdma_cs; + struct radeon_cmdbuf gfx_cs; /* compute IB if graphics is disabled */ + struct radeon_cmdbuf sdma_cs; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; struct si_resource *eop_bug_scratch; @@ -976,7 +976,7 @@ struct si_context { unsigned prim_discard_vertex_count_threshold; struct pb_buffer *gds; struct pb_buffer *gds_oa; - struct radeon_cmdbuf *prim_discard_compute_cs; + struct radeon_cmdbuf prim_discard_compute_cs; unsigned compute_gds_offset; struct si_shader *compute_ib_last_shader; uint32_t compute_rewind_va; @@ -1916,11 +1916,11 @@ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sc bool check_mem) { if (check_mem && - !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs, sctx->vram + bo->vram_usage, + !radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram + bo->vram_usage, sctx->gtt + bo->gart_usage)) si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, bo, usage, priority); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage, priority); } static inline bool si_compute_prim_discard_enabled(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index e4ddbc8d59b..2f63fc02105 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -109,10 +109,10 @@ void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsi void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (state->shader) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, state->shader->bo, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, state->shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); } diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 83eacf28903..17d9228e493 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -832,7 +832,7 @@ static void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, unsigne static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query, struct si_resource *buffer, uint64_t va) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; switch (query->b.type) { case SI_QUERY_TIME_ELAPSED_SDMA: @@ -869,7 +869,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h default: assert(0); } - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); } @@ -896,7 +896,7 @@ static void si_query_hw_emit_start(struct si_context *sctx, struct si_query_hw * static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query, struct si_resource *buffer, uint64_t va) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; uint64_t fence_va = 0; switch (query->b.type) { @@ -949,7 +949,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw default: assert(0); } - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); if (fence_va) { @@ -991,7 +991,7 @@ static void si_query_hw_emit_stop(struct si_context *sctx, struct si_query_hw *q static void emit_set_predicate(struct si_context *ctx, struct si_resource *buf, uint64_t va, uint32_t op) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; if (ctx->chip_class >= GFX9) { radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); @@ -1003,7 +1003,7 @@ static void emit_set_predicate(struct si_context *ctx, struct si_resource *buf, radeon_emit(cs, va); radeon_emit(cs, op | ((va >> 32) & 0xFF)); } - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY); + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY); } static void si_emit_query_predication(struct si_context *ctx) @@ -1567,7 +1567,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_q va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; va += params.fence_offset; - si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x80000000, 0x80000000, WAIT_REG_MEM_EQUAL); + si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x80000000, 0x80000000, WAIT_REG_MEM_EQUAL); } sctx->b.launch_grid(&sctx->b, &grid); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ca87c671dda..5758cad76e6 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -68,7 +68,7 @@ static unsigned si_pack_float_12p4(float x) */ static void si_emit_cb_render_state(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct si_state_blend *blend = sctx->queued.named.blend; /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, * but you never know. */ @@ -687,7 +687,7 @@ static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend static void si_emit_blend_color(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); radeon_emit_array(cs, (uint32_t *)sctx->blend_color.state.color, 4); @@ -720,7 +720,7 @@ static void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_s static void si_emit_clip_state(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6 * 4); radeon_emit_array(cs, (uint32_t *)sctx->clip_state.state.ucp, 6 * 4); @@ -748,7 +748,7 @@ static void si_emit_clip_regs(struct si_context *sctx) clipdist_mask &= rs->clip_plane_enable; culldist_mask |= clipdist_mask; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) | S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 && @@ -767,7 +767,7 @@ static void si_emit_clip_regs(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; } @@ -1041,7 +1041,7 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state) */ static void si_emit_stencil_ref(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; @@ -1339,7 +1339,7 @@ static void si_emit_db_render_state(struct si_context *sctx) { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned db_shader_control, db_render_control, db_count_control; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; /* DB_RENDER_CONTROL */ if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { @@ -1423,7 +1423,7 @@ static void si_emit_db_render_state(struct si_context *sctx) S_028064_VRS_OVERRIDE_RATE_Y(0)); } - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; } @@ -2894,7 +2894,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, static void si_emit_framebuffer_state(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct pipe_framebuffer_state *state = &sctx->framebuffer.state; unsigned i, nr_cbufs = state->nr_cbufs; struct si_texture *tex = NULL; @@ -2918,16 +2918,16 @@ static void si_emit_framebuffer_state(struct si_context *sctx) tex = (struct si_texture *)cb->base.texture; radeon_add_to_buffer_list( - sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, + sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER); if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, tex->cmask_buffer, RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); } if (tex->dcc_separate_buffer) - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, tex->dcc_separate_buffer, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->dcc_separate_buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); /* Compute mutable surface parameters. */ @@ -3119,7 +3119,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) unsigned db_stencil_info = zb->db_stencil_info; unsigned db_htile_surface = zb->db_htile_surface; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA : RADEON_PRIO_DEPTH_BUFFER); @@ -3261,7 +3261,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) static void si_emit_msaa_sample_locs(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned nr_samples = sctx->framebuffer.nr_samples; bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug; @@ -3372,7 +3372,7 @@ static bool si_out_of_order_rasterization(struct si_context *sctx) static void si_emit_msaa_config(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; /* 33% faster rendering to linear color buffers */ bool dst_is_linear = sctx->framebuffer.any_dst_linear; @@ -4487,7 +4487,7 @@ static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) static void si_emit_sample_mask(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned mask = sctx->sample_mask; /* Needed for line and polygon smoothing as well as for the Polaris diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 162b61b2f51..d3425e68449 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -404,7 +404,7 @@ static void gfx10_get_bin_sizes(struct si_context *sctx, unsigned cb_target_enab static void si_emit_dpbb_disable(struct si_context *sctx) { - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (sctx->chip_class >= GFX10) { struct uvec2 bin_size = {}; @@ -441,7 +441,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx) radeon_opt_set_context_reg( sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; sctx->last_binning_enabled = false; @@ -526,7 +526,7 @@ void si_emit_dpbb_state(struct si_context *sctx) if (bin_size.y >= 32) bin_size_extend.y = util_logbase2(bin_size.y) - 5; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; radeon_opt_set_context_reg( sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0, S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) | @@ -546,7 +546,7 @@ void si_emit_dpbb_state(struct si_context *sctx) radeon_opt_set_context_reg( sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; sctx->last_binning_enabled = true; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index a0f94b97fd2..597fb579d4c 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -68,7 +68,7 @@ static unsigned si_conv_pipe_prim(unsigned mode) static void si_emit_derived_tess_state(struct si_context *sctx, const struct pipe_draw_info *info, unsigned *num_patches) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct si_shader *ls_current; struct si_shader_selector *ls; /* The TES pointer will only be used for sctx->last_tcs. @@ -242,7 +242,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, const struct pip assert(*num_patches <= 64); assert(((pervertex_output_patch_size * *num_patches) & ~0x1fffff) == 0); - uint64_t ring_va = (unlikely(sctx->ws->cs_is_secure(sctx->gfx_cs)) ? + uint64_t ring_va = (unlikely(sctx->ws->cs_is_secure(&sctx->gfx_cs)) ? si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address; assert((ring_va & u_bit_consecutive(0, 19)) == 0); @@ -600,7 +600,7 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode) ALWAYS_INLINE static void si_emit_rasterizer_prim_state(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; enum pipe_prim_type rast_prim = sctx->current_rast_prim; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned initial_cdw = cs->current.cdw; @@ -660,7 +660,7 @@ static void si_emit_vs_state(struct si_context *sctx, const struct pipe_draw_inf } if (sctx->current_vs_state != sctx->last_vs_state) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; /* For the API vertex shader (VS_STATE_INDEXED, LS_OUT_*). */ radeon_set_sh_reg( @@ -703,7 +703,7 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx, const struct pip unsigned instance_count, bool primitive_restart, unsigned min_vertex_count) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned ia_multi_vgt_param; ia_multi_vgt_param = @@ -764,7 +764,7 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches) ge_cntl |= S_03096C_PACKET_TO_ONE_PA(si_is_line_stipple_enabled(sctx)); if (ge_cntl != sctx->last_multi_vgt_param) { - radeon_set_uconfig_reg(sctx->gfx_cs, R_03096C_GE_CNTL, ge_cntl); + radeon_set_uconfig_reg(&sctx->gfx_cs, R_03096C_GE_CNTL, ge_cntl); sctx->last_multi_vgt_param = ge_cntl; } } @@ -776,7 +776,7 @@ static void si_emit_draw_registers(struct si_context *sctx, const struct pipe_dr unsigned instance_count, bool primitive_restart, unsigned min_vertex_count) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned vgt_prim = si_conv_pipe_prim(prim); if (sctx->chip_class >= GFX10) @@ -820,7 +820,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw unsigned index_offset, unsigned instance_count, bool dispatch_prim_discard_cs, unsigned original_index_size) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned sh_base_reg = sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX]; bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off; uint32_t index_max_size = 0; @@ -831,7 +831,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw struct si_streamout_target *t = (struct si_streamout_target *)indirect->count_from_stream_output; radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); - si_cp_copy_data(sctx, sctx->gfx_cs, COPY_DATA_REG, NULL, + si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, COPY_DATA_SRC_MEM, t->buf_filled_size, t->buf_filled_size_offset); use_opaque = S_0287F0_USE_OPAQUE(1); @@ -884,7 +884,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw index_va = si_resource(indexbuf)->gpu_address + index_offset; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(indexbuf), RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(indexbuf), RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); } } else { @@ -908,7 +908,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit(cs, indirect_va); radeon_emit(cs, indirect_va >> 32); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(indirect->buffer), + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(indirect->buffer), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); unsigned di_src_sel = index_size ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; @@ -937,7 +937,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw if (indirect->indirect_draw_count) { struct si_resource *params_buf = si_resource(indirect->indirect_draw_count); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, params_buf, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, params_buf, RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset; @@ -1058,7 +1058,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw if (sctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0)) { index_max_size = (indexbuf->width0 - index_offset) >> util_logbase2(original_index_size); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(indexbuf), + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(indexbuf), RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); uint64_t base_index_va = si_resource(indexbuf)->gpu_address + index_offset; @@ -1113,7 +1113,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl) { - bool compute_ib = !sctx->has_graphics || cs == sctx->prim_discard_compute_cs; + bool compute_ib = !sctx->has_graphics || cs == &sctx->prim_discard_compute_cs; assert(sctx->chip_class <= GFX9); @@ -1158,7 +1158,7 @@ void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx) si_cp_write_data(sctx, sctx->barrier_buf, sctx->barrier_buf_offset, 4, V_370_MEM, V_370_ME, &signal); - sctx->last_pkt3_write_data = &sctx->gfx_cs->current.buf[sctx->gfx_cs->current.cdw - 5]; + sctx->last_pkt3_write_data = &sctx->gfx_cs.current.buf[sctx->gfx_cs.current.cdw - 5]; /* Only the last occurence of WRITE_DATA will be executed. * The packet will be enabled in si_flush_gfx_cs. @@ -1168,7 +1168,7 @@ void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx) void gfx10_emit_cache_flush(struct si_context *ctx) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; uint32_t gcr_cntl = 0; unsigned cb_db_event = 0; unsigned flags = ctx->flags; @@ -1310,7 +1310,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx) EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number, SI_NOT_QUERY); - si_cp_wait_mem(ctx, ctx->gfx_cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); + si_cp_wait_mem(ctx, &ctx->gfx_cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); } /* Ignore fields that only modify the behavior of other fields. */ @@ -1347,7 +1347,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx) void si_emit_cache_flush(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; uint32_t flags = sctx->flags; if (!sctx->has_graphics) { @@ -1536,7 +1536,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) * WB must be set on GFX8+ when TC_ACTION is set. */ - si_emit_surface_sync(sctx, sctx->gfx_cs, + si_emit_surface_sync(sctx, &sctx->gfx_cs, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); cp_coher_cntl = 0; @@ -1553,21 +1553,21 @@ void si_emit_cache_flush(struct si_context *sctx) * WB doesn't work without NC. */ si_emit_surface_sync( - sctx, sctx->gfx_cs, + sctx, &sctx->gfx_cs, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; sctx->num_L2_writebacks++; } if (flags & SI_CONTEXT_INV_VCACHE) { /* Invalidate per-CU VMEM L1. */ - si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); + si_emit_surface_sync(sctx, &sctx->gfx_cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; } } /* If TC flushes haven't cleared this... */ if (cp_coher_cntl) - si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl); + si_emit_surface_sync(sctx, &sctx->gfx_cs, cp_coher_cntl); if (is_barrier) si_prim_discard_signal_next_compute_ib_start(sctx); @@ -1607,7 +1607,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) } sctx->vb_descriptors_gpu_list = ptr; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); sctx->vertex_buffer_pointer_dirty = true; sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS; @@ -1669,7 +1669,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) desc[3] = rsrc_word3; if (first_vb_use_mask & (1 << i)) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(vb->buffer.resource), + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(vb->buffer.resource), RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } } @@ -1796,7 +1796,7 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i static bool si_all_vs_resources_read_only(struct si_context *sctx, struct pipe_resource *indexbuf) { struct radeon_winsys *ws = sctx->ws; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; /* Index buffer. */ if (indexbuf && ws->cs_is_buffer_referenced(cs, si_resource(indexbuf)->buf, RADEON_USAGE_WRITE)) @@ -2265,7 +2265,7 @@ static void si_draw_vbo(struct pipe_context *ctx, /* If we're using a secure context, determine if cs must be secure or not */ if (unlikely(radeon_uses_secure_bos(sctx->ws))) { bool secure = si_gfx_resources_check_encrypted(sctx); - if (secure != sctx->ws->cs_is_secure(sctx->gfx_cs)) { + if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) { si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW | RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); } @@ -2448,7 +2448,7 @@ static void si_draw_rectangle(struct blitter_context *blitter, void *vertex_elem void si_trace_emit(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; uint32_t trace_id = ++sctx->current_saved_cs->trace_id; si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8d8fee64bdf..73fcf719671 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -566,7 +566,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) static void si_emit_shader_es(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.es->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -584,7 +584,7 @@ static void si_emit_shader_es(struct si_context *sctx) SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; } @@ -729,7 +729,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector * static void si_emit_shader_gs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.gs->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -783,7 +783,7 @@ static void si_emit_shader_gs(struct si_context *sctx) shader->vgt_vertex_reuse_block_cntl); } - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; } @@ -929,7 +929,7 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value) if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 || sctx->tracked_regs.reg_value[reg] != value) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (sctx->chip_class == GFX10) { /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */ @@ -976,7 +976,7 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl, SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ @@ -986,7 +986,7 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.gs->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -997,7 +997,7 @@ static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx) static void gfx10_emit_shader_ngg_tess_nogs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.gs->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -1011,7 +1011,7 @@ static void gfx10_emit_shader_ngg_tess_nogs(struct si_context *sctx) static void gfx10_emit_shader_ngg_notess_gs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.gs->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -1025,7 +1025,7 @@ static void gfx10_emit_shader_ngg_notess_gs(struct si_context *sctx) static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.gs->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -1308,7 +1308,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader static void si_emit_shader_vs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.vs->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -1357,7 +1357,7 @@ static void si_emit_shader_vs(struct si_context *sctx) SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); } - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ @@ -1536,7 +1536,7 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader) static void si_emit_shader_ps(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.ps->shader; - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; if (!shader) return; @@ -1559,7 +1559,7 @@ static void si_emit_shader_ps(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, shader->ctx_reg.ps.cb_shader_mask); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; } @@ -3378,11 +3378,11 @@ static void si_emit_spi_map(struct si_context *sctx) /* R_028644_SPI_PS_INPUT_CNTL_0 */ /* Dota 2: Only ~16% of SPI map updates set different values. */ /* Talos: Only ~9% of SPI map updates set different values. */ - unsigned initial_cdw = sctx->gfx_cs->current.cdw; + unsigned initial_cdw = sctx->gfx_cs.current.cdw; radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl, sctx->tracked_regs.spi_ps_input_cntl, num_interp); - if (initial_cdw != sctx->gfx_cs->current.cdw) + if (initial_cdw != sctx->gfx_cs.current.cdw) sctx->context_roll = true; } @@ -3506,7 +3506,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (sctx->shadowed_regs) { /* These registers will be shadowed, so set them only once. */ - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; assert(sctx->chip_class >= GFX7); @@ -3787,11 +3787,11 @@ static void si_init_tess_factor_ring(struct si_context *sctx) if (sctx->shadowed_regs) { /* These registers will be shadowed, so set them only once. */ /* TODO: tmz + shadowed_regs support */ - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; assert(sctx->chip_class >= GFX7); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(sctx->tess_rings), + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(sctx->tess_rings), RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS); si_emit_vgt_flush(cs); @@ -4139,12 +4139,12 @@ bool si_update_shaders(struct si_context *sctx) static void si_emit_scratch_state(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, sctx->spi_tmpring_size); if (sctx->scratch_buffer) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, sctx->scratch_buffer, RADEON_USAGE_READWRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->scratch_buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SCRATCH_BUFFER); } } diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 346a603d795..f52e4b1b70d 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -213,7 +213,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ static void gfx10_emit_streamout_begin(struct si_context *sctx) { struct si_streamout_target **t = sctx->streamout.targets; - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned last_target = 0; for (unsigned i = 0; i < sctx->streamout.num_targets; i++) { @@ -231,7 +231,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx) uint64_t va = 0; if (append) { - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; @@ -260,7 +260,7 @@ static void gfx10_emit_streamout_end(struct si_context *sctx) uint64_t va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; - si_cp_release_mem(sctx, sctx->gfx_cs, V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, + si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_GDS, t[i]->buf_filled_size, va, EOP_DATA_GDS(i, 1), 0); @@ -272,7 +272,7 @@ static void gfx10_emit_streamout_end(struct si_context *sctx) static void si_flush_vgt_streamout(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ @@ -299,7 +299,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx) static void si_emit_streamout_begin(struct si_context *sctx) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct si_streamout_target **t = sctx->streamout.targets; uint16_t *stride_in_dw = sctx->streamout.stride_in_dw; unsigned i; @@ -331,7 +331,7 @@ static void si_emit_streamout_begin(struct si_context *sctx) radeon_emit(cs, va); /* src address lo */ radeon_emit(cs, va >> 32); /* src address hi */ - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_READ, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); } else { /* Start from the beginning. */ @@ -355,7 +355,7 @@ void si_emit_streamout_end(struct si_context *sctx) return; } - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct si_streamout_target **t = sctx->streamout.targets; unsigned i; uint64_t va; @@ -375,7 +375,7 @@ void si_emit_streamout_end(struct si_context *sctx) radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */ - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, t[i]->buf_filled_size, RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE); /* Zero the buffer size. The counters (primitives generated, @@ -402,13 +402,13 @@ static void si_emit_streamout_enable(struct si_context *sctx) { assert(!sctx->screen->use_ngg_streamout); - radeon_set_context_reg_seq(sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2); - radeon_emit(sctx->gfx_cs, S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) | + radeon_set_context_reg_seq(&sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2); + radeon_emit(&sctx->gfx_cs, S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) | S_028B94_RAST_STREAM(0) | S_028B94_STREAMOUT_1_EN(si_get_strmout_en(sctx)) | S_028B94_STREAMOUT_2_EN(si_get_strmout_en(sctx)) | S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx))); - radeon_emit(sctx->gfx_cs, + radeon_emit(&sctx->gfx_cs, sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask); } diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 9d62b2c85f2..9dce8079613 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -316,7 +316,7 @@ static void si_emit_guardband(struct si_context *ctx) * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ - unsigned initial_cdw = ctx->gfx_cs->current.cdw; + unsigned initial_cdw = ctx->gfx_cs.current.cdw; radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y), fui(guardband_x), fui(discard_x)); @@ -328,7 +328,7 @@ static void si_emit_guardband(struct si_context *ctx) ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(rs->half_pixel_center) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode)); - if (initial_cdw != ctx->gfx_cs->current.cdw) + if (initial_cdw != ctx->gfx_cs.current.cdw) ctx->context_roll = true; si_update_ngg_small_prim_precision(ctx); @@ -336,7 +336,7 @@ static void si_emit_guardband(struct si_context *ctx) static void si_emit_scissors(struct si_context *ctx) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; struct pipe_scissor_state *states = ctx->scissors; bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; @@ -439,7 +439,7 @@ static void si_set_viewport_states(struct pipe_context *pctx, unsigned start_slo static void si_emit_one_viewport(struct si_context *ctx, struct pipe_viewport_state *state) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; radeon_emit(cs, fui(state->scale[0])); radeon_emit(cs, fui(state->translate[0])); @@ -451,7 +451,7 @@ static void si_emit_one_viewport(struct si_context *ctx, struct pipe_viewport_st static void si_emit_viewports(struct si_context *ctx) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; struct pipe_viewport_state *states = ctx->viewports.states; if (ctx->screen->use_ngg_culling) { @@ -473,9 +473,9 @@ static void si_emit_viewports(struct si_context *ctx) if (ctx->small_prim_cull_info_dirty) { /* This will end up in SGPR6 as (value << 8), shifted by the hw. */ - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->small_prim_cull_info_buf, + radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->small_prim_cull_info_buf, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); - radeon_set_sh_reg(ctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS, + radeon_set_sh_reg(&ctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS, ctx->small_prim_cull_info_address >> 8); ctx->small_prim_cull_info_dirty = false; } @@ -509,7 +509,7 @@ static inline void si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, b static void si_emit_depth_ranges(struct si_context *ctx) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; + struct radeon_cmdbuf *cs = &ctx->gfx_cs; struct pipe_viewport_state *states = ctx->viewports.states; bool clip_halfz = ctx->queued.named.rasterizer->clip_halfz; bool window_space = ctx->vs_disables_clipping_viewport; @@ -596,7 +596,7 @@ static void si_emit_window_rectangles(struct si_context *sctx) * * If CLIPRECT_RULE & (1 << number), the pixel is rasterized. */ - struct radeon_cmdbuf *cs = sctx->gfx_cs; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; static const unsigned outside[4] = { /* outside rectangle 0 */ V_02820C_OUT | V_02820C_IN_1 | V_02820C_IN_2 | V_02820C_IN_21 | V_02820C_IN_3 | diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index 0ca8dce62a0..7135a958496 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -111,7 +111,7 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned cs_dwords_per_thread = test_cs ? cs_dwords_per_thread_list[cs_method % NUM_SHADERS] : 0; - if (test_sdma && !sctx->sdma_cs) + if (test_sdma && !sctx->sdma_cs.priv) continue; if (sctx->chip_class == GFX6) { @@ -198,7 +198,7 @@ void si_test_dma_perf(struct si_screen *sscreen) si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0, SI_COHERENCY_NONE, cache_policy); } else { - si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, size, clear_value, 0, + si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, 0, size, clear_value, 0, SI_COHERENCY_NONE, cache_policy); } } else if (test_sdma) { diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 16157d443f7..9656b826d96 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -52,7 +52,7 @@ bool si_prepare_for_dma_blit(struct si_context *sctx, struct si_texture *dst, un unsigned dstx, unsigned dsty, unsigned dstz, struct si_texture *src, unsigned src_level, const struct pipe_box *src_box) { - if (!sctx->sdma_cs) + if (!sctx->sdma_cs.priv) return false; if (dst->surface.bpe != src->surface.bpe) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 46e560a6dca..90c598316f0 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -276,7 +276,7 @@ void *amdgpu_bo_map(struct pb_buffer *buf, { struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; struct amdgpu_winsys_bo *real; - struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; + struct amdgpu_cs *cs = rcs ? amdgpu_cs(rcs) : NULL; assert(!(bo->base.usage & RADEON_FLAG_SPARSE)); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 25be8a1695c..632138fde01 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -489,7 +489,8 @@ amdgpu_do_add_real_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo } static int -amdgpu_lookup_or_add_real_buffer(struct amdgpu_cs *acs, struct amdgpu_winsys_bo *bo) +amdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs *acs, + struct amdgpu_winsys_bo *bo) { struct amdgpu_cs_context *cs = acs->csc; unsigned hash; @@ -504,14 +505,15 @@ amdgpu_lookup_or_add_real_buffer(struct amdgpu_cs *acs, struct amdgpu_winsys_bo cs->buffer_indices_hashlist[hash] = idx; if (bo->base.placement & RADEON_DOMAIN_VRAM) - acs->main.base.used_vram += bo->base.size; + rcs->used_vram += bo->base.size; else if (bo->base.placement & RADEON_DOMAIN_GTT) - acs->main.base.used_gart += bo->base.size; + rcs->used_gart += bo->base.size; return idx; } -static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_cs *acs, +static int amdgpu_lookup_or_add_slab_buffer(struct radeon_cmdbuf *rcs, + struct amdgpu_cs *acs, struct amdgpu_winsys_bo *bo) { struct amdgpu_cs_context *cs = acs->csc; @@ -523,7 +525,7 @@ static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_cs *acs, if (idx >= 0) return idx; - real_idx = amdgpu_lookup_or_add_real_buffer(acs, bo->u.slab.real); + real_idx = amdgpu_lookup_or_add_real_buffer(rcs, acs, bo->u.slab.real); if (real_idx < 0) return -1; @@ -560,7 +562,8 @@ static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_cs *acs, return idx; } -static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_cs *acs, +static int amdgpu_lookup_or_add_sparse_buffer(struct radeon_cmdbuf *rcs, + struct amdgpu_cs *acs, struct amdgpu_winsys_bo *bo) { struct amdgpu_cs_context *cs = acs->csc; @@ -607,9 +610,9 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_cs *acs, list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { if (bo->base.placement & RADEON_DOMAIN_VRAM) - acs->main.base.used_vram += backing->bo->base.size; + rcs->used_vram += backing->bo->base.size; else if (bo->base.placement & RADEON_DOMAIN_GTT) - acs->main.base.used_gart += backing->bo->base.size; + rcs->used_gart += backing->bo->base.size; } simple_mtx_unlock(&bo->lock); @@ -643,7 +646,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, if (!(bo->base.usage & RADEON_FLAG_SPARSE)) { if (!bo->bo) { - index = amdgpu_lookup_or_add_slab_buffer(acs, bo); + index = amdgpu_lookup_or_add_slab_buffer(rcs, acs, bo); if (index < 0) return 0; @@ -653,14 +656,14 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, usage &= ~RADEON_USAGE_SYNCHRONIZED; index = buffer->u.slab.real_idx; } else { - index = amdgpu_lookup_or_add_real_buffer(acs, bo); + index = amdgpu_lookup_or_add_real_buffer(rcs, acs, bo); if (index < 0) return 0; } buffer = &cs->real_buffers[index]; } else { - index = amdgpu_lookup_or_add_sparse_buffer(acs, bo); + index = amdgpu_lookup_or_add_sparse_buffer(rcs, acs, bo); if (index < 0) return 0; @@ -679,7 +682,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib, - enum ring_type ring_type) + struct amdgpu_cs *cs) { struct pb_buffer *pb; uint8_t *mapped; @@ -691,7 +694,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, * is the largest power of two that fits into the size field of the * INDIRECT_BUFFER packet. */ - if (amdgpu_cs_has_chaining(amdgpu_cs_from_ib(ib))) + if (amdgpu_cs_has_chaining(cs)) buffer_size = 4 *util_next_power_of_two(ib->max_ib_size); else buffer_size = 4 *util_next_power_of_two(4 * ib->max_ib_size); @@ -706,9 +709,9 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, ws->info.gart_page_size, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING | - (ring_type == RING_GFX || - ring_type == RING_COMPUTE || - ring_type == RING_DMA ? + (cs->ring_type == RING_GFX || + cs->ring_type == RING_COMPUTE || + cs->ring_type == RING_DMA ? RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC : 0)); if (!pb) return false; @@ -746,29 +749,19 @@ static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type) } } -static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws, struct amdgpu_cs *cs, - enum ib_type ib_type) +static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws, + struct radeon_cmdbuf *rcs, + struct amdgpu_ib *ib, + struct amdgpu_cs *cs) { /* Small IBs are better than big IBs, because the GPU goes idle quicker * and there is less waiting for buffers and fences. Proof: * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 */ - struct amdgpu_ib *ib = NULL; - struct drm_amdgpu_cs_chunk_ib *info = &cs->csc->ib[ib_type]; + struct drm_amdgpu_cs_chunk_ib *info = &cs->csc->ib[ib->ib_type]; /* This is the minimum size of a contiguous IB. */ unsigned ib_size = 4 * 1024 * 4; - switch (ib_type) { - case IB_PARALLEL_COMPUTE: - ib = &cs->compute_ib; - break; - case IB_MAIN: - ib = &cs->main; - break; - default: - unreachable("unhandled IB type"); - } - /* Always allocate at least the size of the biggest cs_check_space call, * because precisely the last call might have requested this size. */ @@ -777,20 +770,20 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws, struct amdgpu_cs *cs, if (!amdgpu_cs_has_chaining(cs)) { ib_size = MAX2(ib_size, 4 * MIN2(util_next_power_of_two(ib->max_ib_size), - amdgpu_ib_max_submit_dwords(ib_type))); + amdgpu_ib_max_submit_dwords(ib->ib_type))); } ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32; - ib->base.prev_dw = 0; - ib->base.num_prev = 0; - ib->base.current.cdw = 0; - ib->base.current.buf = NULL; + rcs->prev_dw = 0; + rcs->num_prev = 0; + rcs->current.cdw = 0; + rcs->current.buf = NULL; /* Allocate a new buffer for IBs if the current buffer is all used. */ if (!ib->big_ib_buffer || ib->used_ib_space + ib_size > ib->big_ib_buffer->size) { - if (!amdgpu_ib_new_buffer(ws, ib, cs->ring_type)) + if (!amdgpu_ib_new_buffer(ws, ib, cs)) return false; } @@ -801,33 +794,34 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *ws, struct amdgpu_cs *cs, ib->ptr_ib_size = &info->ib_bytes; ib->ptr_ib_size_inside_ib = false; - amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, + amdgpu_cs_add_buffer(cs->main.rcs, ib->big_ib_buffer, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); - ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); + rcs->current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); ib_size = ib->big_ib_buffer->size - ib->used_ib_space; - ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs); - ib->base.gpu_address = info->va_start; + rcs->current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs); + rcs->gpu_address = info->va_start; return true; } -static void amdgpu_set_ib_size(struct amdgpu_ib *ib) +static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib) { if (ib->ptr_ib_size_inside_ib) { - *ib->ptr_ib_size = ib->base.current.cdw | + *ib->ptr_ib_size = rcs->current.cdw | S_3F2_CHAIN(1) | S_3F2_VALID(1); } else { - *ib->ptr_ib_size = ib->base.current.cdw; + *ib->ptr_ib_size = rcs->current.cdw; } } -static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) +static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *rcs, + struct amdgpu_ib *ib) { - amdgpu_set_ib_size(ib); - ib->used_ib_space += ib->base.current.cdw * 4; + amdgpu_set_ib_size(rcs, ib); + ib->used_ib_space += rcs->current.cdw * 4; ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_alignment); - ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + ib->base.current.cdw); + ib->max_ib_size = MAX2(ib->max_ib_size, rcs->prev_dw + rcs->current.cdw); } static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, @@ -944,8 +938,9 @@ static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs) } -static struct radeon_cmdbuf * -amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, +static bool +amdgpu_cs_create(struct radeon_cmdbuf *rcs, + struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), @@ -957,7 +952,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, cs = CALLOC_STRUCT(amdgpu_cs); if (!cs) { - return NULL; + return false; } util_queue_fence_init(&cs->flush_completed); @@ -979,47 +974,52 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) { FREE(cs); - return NULL; + return false; } if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ring_type)) { amdgpu_destroy_cs_context(&cs->csc1); FREE(cs); - return NULL; + return false; } /* Set the first submission context as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; - if (!amdgpu_get_new_ib(ctx->ws, cs, IB_MAIN)) { + cs->main.rcs = rcs; + rcs->priv = cs; + + if (!amdgpu_get_new_ib(ctx->ws, rcs, &cs->main, cs)) { amdgpu_destroy_cs_context(&cs->csc2); amdgpu_destroy_cs_context(&cs->csc1); FREE(cs); - return NULL; + rcs->priv = NULL; + return false; } p_atomic_inc(&ctx->ws->num_cs); - return &cs->main.base; + return true; } -static struct radeon_cmdbuf * -amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib, +static bool +amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *compute_cs, + struct radeon_cmdbuf *gfx_cs, bool uses_gds_ordered_append) { - struct amdgpu_cs *cs = (struct amdgpu_cs*)ib; + struct amdgpu_cs *cs = amdgpu_cs(gfx_cs); struct amdgpu_winsys *ws = cs->ctx->ws; if (cs->ring_type != RING_GFX) - return NULL; + return false; /* only one secondary IB can be added */ if (cs->compute_ib.ib_mapped) - return NULL; + return false; /* Allocate the compute IB. */ - if (!amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE)) - return NULL; + if (!amdgpu_get_new_ib(ws, compute_cs, &cs->compute_ib, cs)) + return false; if (uses_gds_ordered_append) { cs->csc1.ib[IB_PARALLEL_COMPUTE].flags |= @@ -1027,15 +1027,17 @@ amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib, cs->csc2.ib[IB_PARALLEL_COMPUTE].flags |= AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID; } - return &cs->compute_ib.base; + + cs->compute_ib.rcs = compute_cs; + compute_cs->priv = cs; + return true; } static bool amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, unsigned preamble_num_dw) { - struct amdgpu_ib *ib = amdgpu_ib(rcs); - struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib); + struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys *ws = cs->ctx->ws; struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2}; unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment); @@ -1092,8 +1094,8 @@ static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs) static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, bool force_chaining) { - struct amdgpu_ib *ib = amdgpu_ib(rcs); - struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib); + struct amdgpu_cs *cs = amdgpu_cs(rcs); + struct amdgpu_ib *ib = rcs == cs->main.rcs ? &cs->main : &cs->compute_ib; unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw; unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs); unsigned need_byte_size = (dw + cs_epilog_dw) * 4; @@ -1138,7 +1140,7 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, rcs->max_prev = new_max_prev; } - if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib, cs->ring_type)) + if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib, cs)) return false; assert(ib->used_ib_space == 0); @@ -1161,7 +1163,7 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, assert((rcs->current.cdw & 7) == 0); assert(rcs->current.cdw <= rcs->current.max_dw); - amdgpu_set_ib_size(ib); + amdgpu_set_ib_size(rcs, ib); ib->ptr_ib_size = new_ptr_ib_size; ib->ptr_ib_size_inside_ib = true; @@ -1171,14 +1173,14 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw, rcs->prev[rcs->num_prev].max_dw = rcs->current.cdw; /* no modifications */ rcs->num_prev++; - ib->base.prev_dw += ib->base.current.cdw; - ib->base.current.cdw = 0; + rcs->prev_dw += rcs->current.cdw; + rcs->current.cdw = 0; - ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); - ib->base.current.max_dw = ib->big_ib_buffer->size / 4 - cs_epilog_dw; - ib->base.gpu_address = va; + rcs->current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); + rcs->current.max_dw = ib->big_ib_buffer->size / 4 - cs_epilog_dw; + rcs->gpu_address = va; - amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, + amdgpu_cs_add_buffer(cs->main.rcs, ib->big_ib_buffer, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); return true; @@ -1764,8 +1766,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, /* Also pad secondary IBs. */ if (cs->compute_ib.ib_mapped) { - while (cs->compute_ib.base.current.cdw & ib_pad_dw_mask) - radeon_emit(&cs->compute_ib.base, PKT3_NOP_PAD); + while (cs->compute_ib.rcs->current.cdw & ib_pad_dw_mask) + radeon_emit(cs->compute_ib.rcs, PKT3_NOP_PAD); } break; case RING_UVD: @@ -1794,17 +1796,17 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, } /* If the CS is not empty or overflowed.... */ - if (likely(radeon_emitted(&cs->main.base, 0) && - cs->main.base.current.cdw <= cs->main.base.current.max_dw && + if (likely(radeon_emitted(rcs, 0) && + rcs->current.cdw <= rcs->current.max_dw && !cs->noop && !(flags & RADEON_FLUSH_NOOP))) { struct amdgpu_cs_context *cur = cs->csc; /* Set IB sizes. */ - amdgpu_ib_finalize(ws, &cs->main); + amdgpu_ib_finalize(ws, rcs, &cs->main); if (cs->compute_ib.ib_mapped) - amdgpu_ib_finalize(ws, &cs->compute_ib); + amdgpu_ib_finalize(ws, cs->compute_ib.rcs, &cs->compute_ib); /* Create a fence. */ amdgpu_fence_reference(&cur->fence, NULL); @@ -1858,17 +1860,17 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, amdgpu_cs_context_cleanup(cs->csc); } - amdgpu_get_new_ib(ws, cs, IB_MAIN); + amdgpu_get_new_ib(ws, rcs, &cs->main, cs); if (cs->compute_ib.ib_mapped) - amdgpu_get_new_ib(ws, cs, IB_PARALLEL_COMPUTE); + amdgpu_get_new_ib(ws, cs->compute_ib.rcs, &cs->compute_ib, cs); if (cs->preamble_ib_bo) { amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); } - cs->main.base.used_gart = 0; - cs->main.base.used_vram = 0; + rcs->used_gart = 0; + rcs->used_vram = 0; if (cs->ring_type == RING_GFX) ws->num_gfx_IBs++; @@ -1882,14 +1884,18 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs) { struct amdgpu_cs *cs = amdgpu_cs(rcs); + if (!cs) + return; + amdgpu_cs_sync_flush(rcs); util_queue_fence_destroy(&cs->flush_completed); p_atomic_dec(&cs->ctx->ws->num_cs); pb_reference(&cs->preamble_ib_bo, NULL); pb_reference(&cs->main.big_ib_buffer, NULL); - FREE(cs->main.base.prev); + FREE(rcs->prev); pb_reference(&cs->compute_ib.big_ib_buffer, NULL); - FREE(cs->compute_ib.base.prev); + if (cs->compute_ib.rcs) + FREE(cs->compute_ib.rcs->prev); amdgpu_destroy_cs_context(&cs->csc1); amdgpu_destroy_cs_context(&cs->csc2); amdgpu_fence_reference(&cs->next_fence, NULL); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index d3dbe607445..2652ba0d50f 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -63,7 +63,7 @@ enum ib_type { }; struct amdgpu_ib { - struct radeon_cmdbuf base; + struct radeon_cmdbuf *rcs; /* pointer to the driver-owned data */ /* A buffer out of which new IBs are allocated. */ struct pb_buffer *big_ib_buffer; @@ -130,7 +130,7 @@ struct amdgpu_cs_context { struct amdgpu_cs { struct amdgpu_ib main; /* must be first because this is inherited */ - struct amdgpu_ib compute_ib; /* optional parallel compute IB */ + struct amdgpu_ib compute_ib; /* optional parallel compute IB */ struct amdgpu_ctx *ctx; enum ring_type ring_type; struct drm_amdgpu_cs_chunk_fence fence_chunk; @@ -210,35 +210,17 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo); -static inline struct amdgpu_ib * -amdgpu_ib(struct radeon_cmdbuf *base) -{ - return (struct amdgpu_ib *)base; -} - static inline struct amdgpu_cs * -amdgpu_cs(struct radeon_cmdbuf *base) +amdgpu_cs(struct radeon_cmdbuf *rcs) { - assert(amdgpu_ib(base)->ib_type == IB_MAIN); - return (struct amdgpu_cs*)base; + struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv; + assert(!cs || cs->main.ib_type == IB_MAIN); + return cs; } #define get_container(member_ptr, container_type, container_member) \ (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member)) -static inline struct amdgpu_cs * -amdgpu_cs_from_ib(struct amdgpu_ib *ib) -{ - switch (ib->ib_type) { - case IB_MAIN: - return get_container(ib, struct amdgpu_cs, main); - case IB_PARALLEL_COMPUTE: - return get_container(ib, struct amdgpu_cs, compute_ib); - default: - unreachable("bad ib_type"); - } -} - static inline bool amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 3174464079f..d7f892bc1a5 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -501,7 +501,7 @@ static void *radeon_bo_map(struct pb_buffer *buf, enum pipe_map_flags usage) { struct radeon_bo *bo = (struct radeon_bo*)buf; - struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs; + struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL; /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 1a8ceb29165..c1c307283e6 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -162,8 +162,9 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc) } -static struct radeon_cmdbuf * -radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, +static bool +radeon_drm_cs_create(struct radeon_cmdbuf *rcs, + struct radeon_winsys_ctx *ctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), @@ -175,7 +176,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, cs = CALLOC_STRUCT(radeon_drm_cs); if (!cs) { - return NULL; + return false; } util_queue_fence_init(&cs->flush_completed); @@ -185,23 +186,26 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); - return NULL; + return false; } if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); - return NULL; + return false; } /* Set the first command buffer as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; - cs->base.current.buf = cs->csc->buf; - cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf); cs->ring_type = ring_type; + memset(rcs, 0, sizeof(*rcs)); + rcs->current.buf = cs->csc->buf; + rcs->current.max_dw = ARRAY_SIZE(cs->csc->buf); + rcs->priv = cs; + p_atomic_inc(&ws->num_cs); - return &cs->base; + return true; } int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) @@ -387,9 +391,9 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs, cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority; if (added_domains & RADEON_DOMAIN_VRAM) - cs->base.used_vram += bo->base.size; + rcs->used_vram += bo->base.size; else if (added_domains & RADEON_DOMAIN_GTT) - cs->base.used_gart += bo->base.size; + rcs->used_gart += bo->base.size; return index; } @@ -406,8 +410,8 @@ static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); bool status = - cs->base.used_gart < cs->ws->info.gart_size * 0.8 && - cs->base.used_vram < cs->ws->info.vram_size * 0.8; + rcs->used_gart < cs->ws->info.gart_size * 0.8 && + rcs->used_vram < cs->ws->info.vram_size * 0.8; if (status) { cs->csc->num_validated_relocs = cs->csc->num_relocs; @@ -429,11 +433,11 @@ static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs) RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } else { radeon_cs_context_cleanup(cs->csc); - cs->base.used_vram = 0; - cs->base.used_gart = 0; + rcs->used_vram = 0; + rcs->used_gart = 0; - assert(cs->base.current.cdw == 0); - if (cs->base.current.cdw != 0) { + assert(rcs->current.cdw == 0); + if (rcs->current.cdw != 0) { fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); } } @@ -570,10 +574,10 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= GFX6) { while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0xf0000000); /* NOP packet */ + radeon_emit(rcs, 0xf0000000); /* NOP packet */ } else { while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0x00000000); /* NOP packet */ + radeon_emit(rcs, 0x00000000); /* NOP packet */ } break; case RING_GFX: @@ -582,15 +586,15 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, */ if (cs->ws->info.gfx_ib_pad_with_type2) { while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ + radeon_emit(rcs, 0x80000000); /* type2 nop packet */ } else { while (rcs->current.cdw & 7) - radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */ + radeon_emit(rcs, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->current.cdw & 15) - radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ + radeon_emit(rcs, 0x80000000); /* type2 nop packet */ break; default: break; @@ -636,13 +640,13 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ - if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && + if (rcs->current.cdw && rcs->current.cdw <= rcs->current.max_dw && !cs->ws->noop_cs && !(flags & RADEON_FLUSH_NOOP)) { unsigned i, num_relocs; num_relocs = cs->cst->num_relocs; - cs->cst->chunks[0].length_dw = cs->base.current.cdw; + cs->cst->chunks[0].length_dw = rcs->current.cdw; for (i = 0; i < num_relocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ @@ -706,10 +710,10 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, } /* Prepare a new CS. */ - cs->base.current.buf = cs->csc->buf; - cs->base.current.cdw = 0; - cs->base.used_vram = 0; - cs->base.used_gart = 0; + rcs->current.buf = cs->csc->buf; + rcs->current.cdw = 0; + rcs->used_vram = 0; + rcs->used_gart = 0; if (cs->ring_type == RING_GFX) cs->ws->num_gfx_IBs++; @@ -722,6 +726,9 @@ static void radeon_drm_cs_destroy(struct radeon_cmdbuf *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + if (!cs) + return; + radeon_drm_cs_sync_flush(rcs); util_queue_fence_destroy(&cs->flush_completed); radeon_cs_context_cleanup(&cs->csc1); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index b07ffc19ec1..1090dfa4090 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -70,7 +70,6 @@ struct radeon_cs_context { }; struct radeon_drm_cs { - struct radeon_cmdbuf base; enum ring_type ring_type; /* We flip between these two CS. While one is being consumed @@ -97,9 +96,9 @@ struct radeon_drm_cs { int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo); static inline struct radeon_drm_cs * -radeon_drm_cs(struct radeon_cmdbuf *base) +radeon_drm_cs(struct radeon_cmdbuf *rcs) { - return (struct radeon_drm_cs*)base; + return (struct radeon_drm_cs*)rcs->priv; } static inline bool