radv: emit stream outputs for vertex and tessellation stages

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Samuel Pitoiset
2018-10-05 17:54:22 +02:00
parent 19f1b49236
commit 6c21645046

View File

@@ -2492,6 +2492,140 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
return LLVMBuildLoad(ctx->ac.builder, output, "");
}
static void
radv_emit_stream_output(struct radv_shader_context *ctx,
LLVMValueRef const *so_buffers,
LLVMValueRef const *so_write_offsets,
const struct radv_stream_output *output)
{
unsigned num_comps = util_bitcount(output->component_mask);
unsigned loc = output->location;
unsigned buf = output->buffer;
unsigned offset = output->offset;
unsigned start;
LLVMValueRef out[4];
assert(num_comps && num_comps <= 4);
if (!num_comps || num_comps > 4)
return;
/* Get the first component. */
start = ffs(output->component_mask) - 1;
/* Adjust the destination offset. */
offset += start * 4;
/* Load the output as int. */
for (int i = 0; i < num_comps; i++) {
out[i] = ac_to_integer(&ctx->ac,
radv_load_output(ctx, loc, start + i));
}
/* Pack the output. */
LLVMValueRef vdata = NULL;
switch (num_comps) {
case 1: /* as i32 */
vdata = out[0];
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
out[3] = LLVMGetUndef(ctx->ac.i32);
/* fall through */
case 4: /* as v4i32 */
vdata = ac_build_gather_values(&ctx->ac, out,
util_next_power_of_two(num_comps));
break;
}
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
vdata, num_comps, so_write_offsets[buf],
ctx->ac.i32_0, offset,
1, 1, true, false);
}
static void
radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
{
struct ac_build_if_state if_ctx;
int i;
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
assert(ctx->streamout_config);
LLVMValueRef so_vtx_count =
ac_build_bfe(&ctx->ac, ctx->streamout_config,
LLVMConstInt(ctx->ac.i32, 16, false),
LLVMConstInt(ctx->ac.i32, 7, false), false);
LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
tid, so_vtx_count, "");
/* Emit the streamout code conditionally. This actually avoids
* out-of-bounds buffer access. The hw tells us via the SGPR
* (so_vtx_count) which threads are allowed to emit streamout data.
*/
ac_nir_build_if(&if_ctx, ctx, can_emit);
{
/* The buffer offset is computed as follows:
* ByteOffset = streamout_offset[buffer_id]*4 +
* (streamout_write_index + thread_id)*stride[buffer_id] +
* attrib_offset
*/
LLVMValueRef so_write_index = ctx->streamout_write_idx;
/* Compute (streamout_write_index + thread_id). */
so_write_index =
LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, "");
/* Load the descriptor and compute the write offset for each
* enabled buffer.
*/
LLVMValueRef so_write_offset[4] = {};
LLVMValueRef so_buffers[4] = {};
LLVMValueRef buf_ptr = ctx->streamout_buffers;
for (i = 0; i < 4; i++) {
uint16_t stride = ctx->shader_info->info.so.strides[i];
if (!stride)
continue;
LLVMValueRef offset =
LLVMConstInt(ctx->ac.i32, i, false);
so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac,
buf_ptr, offset);
LLVMValueRef so_offset = ctx->streamout_offset[i];
so_offset = LLVMBuildMul(ctx->ac.builder, so_offset,
LLVMConstInt(ctx->ac.i32, 4, false), "");
so_write_offset[i] =
ac_build_imad(&ctx->ac, so_write_index,
LLVMConstInt(ctx->ac.i32,
stride * 4, false),
so_offset);
}
/* Write streamout data. */
for (i = 0; i < ctx->shader_info->info.so.num_outputs; i++) {
struct radv_stream_output *output =
&ctx->shader_info->info.so.outputs[i];
if (stream != output->stream)
continue;
radv_emit_stream_output(ctx, so_buffers,
so_write_offset, output);
}
}
ac_nir_build_endif(&if_ctx);
}
static void
handle_vs_outputs_post(struct radv_shader_context *ctx,
bool export_prim_id, bool export_layer_id,
@@ -2589,6 +2723,9 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0);
}
if (ctx->shader_info->info.so.num_outputs)
radv_emit_streamout(ctx, 0);
if (outinfo->writes_pointsize ||
outinfo->writes_layer ||
outinfo->writes_viewport_index) {