i965: Port gen7+ 3DSTATE_SOL to genxml.
Emit 3DSTATE_SOL on Gen7+ using brw_batch_emit helper, that uses pack structs from genxml. v2: - Add helpers to assign struct brw_address (Kristian) v3: - Rename MOCS -> SOBufferMOCS - Do not re-declare MOCS macros (Ken). - Style and code reorganization (Ken). Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:

committed by
Kenneth Graunke

parent
c5d6ee6ccb
commit
ddc6f4d069
@@ -111,7 +111,6 @@ i965_FILES = \
|
||||
gen8_hs_state.c \
|
||||
gen8_multisample_state.c \
|
||||
gen8_ps_state.c \
|
||||
gen8_sol_state.c \
|
||||
gen8_surface_state.c \
|
||||
gen8_viewport_state.c \
|
||||
gen8_vs_state.c \
|
||||
|
@@ -135,7 +135,6 @@ extern const struct brw_tracked_state gen7_l3_state;
|
||||
extern const struct brw_tracked_state gen7_ps_state;
|
||||
extern const struct brw_tracked_state gen7_push_constant_space;
|
||||
extern const struct brw_tracked_state gen7_sf_clip_viewport;
|
||||
extern const struct brw_tracked_state gen7_sol_state;
|
||||
extern const struct brw_tracked_state gen7_te_state;
|
||||
extern const struct brw_tracked_state gen7_tes_push_constants;
|
||||
extern const struct brw_tracked_state gen7_urb;
|
||||
@@ -299,11 +298,6 @@ void gen8_upload_ps_state(struct brw_context *brw,
|
||||
void gen8_upload_ps_extra(struct brw_context *brw,
|
||||
const struct brw_wm_prog_data *prog_data);
|
||||
|
||||
/* gen7_sol_state.c */
|
||||
void gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
|
||||
const struct brw_vue_map *vue_map);
|
||||
void gen8_upload_3dstate_so_buffers(struct brw_context *brw);
|
||||
|
||||
/* gen8_surface_state.c */
|
||||
|
||||
void gen8_init_vtable_surface_functions(struct brw_context *brw);
|
||||
|
@@ -35,313 +35,6 @@
|
||||
#include "intel_buffer_objects.h"
|
||||
#include "main/transformfeedback.h"
|
||||
|
||||
static void
|
||||
upload_3dstate_so_buffers(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
xfb_obj->program->sh.LinkedTransformFeedback;
|
||||
int i;
|
||||
|
||||
/* Set up the up to 4 output buffers. These are the ranges defined in the
|
||||
* gl_transform_feedback_object.
|
||||
*/
|
||||
for (i = 0; i < 4; i++) {
|
||||
struct intel_buffer_object *bufferobj =
|
||||
intel_buffer_object(xfb_obj->Buffers[i]);
|
||||
struct brw_bo *bo;
|
||||
uint32_t start, end;
|
||||
uint32_t stride;
|
||||
|
||||
if (!xfb_obj->Buffers[i]) {
|
||||
/* The pitch of 0 in this command indicates that the buffer is
|
||||
* unbound and won't be written to.
|
||||
*/
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
|
||||
OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
stride = linked_xfb_info->Buffers[i].Stride * 4;
|
||||
|
||||
start = xfb_obj->Offset[i];
|
||||
assert(start % 4 == 0);
|
||||
end = ALIGN(start + xfb_obj->Size[i], 4);
|
||||
bo = intel_bufferobj_buffer(brw, bufferobj, start, end - start);
|
||||
assert(end <= bo->size);
|
||||
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
|
||||
OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) | stride);
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Outputs the 3DSTATE_SO_DECL_LIST command.
|
||||
*
|
||||
* The data output is a series of 64-bit entries containing a SO_DECL per
|
||||
* stream. We only have one stream of rendering coming out of the GS unit, so
|
||||
* we only emit stream 0 (low 16 bits) SO_DECLs.
|
||||
*/
|
||||
void
|
||||
gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
|
||||
const struct brw_vue_map *vue_map)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
xfb_obj->program->sh.LinkedTransformFeedback;
|
||||
uint16_t so_decl[MAX_VERTEX_STREAMS][128];
|
||||
int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
|
||||
int next_offset[BRW_MAX_SOL_BUFFERS] = {0, 0, 0, 0};
|
||||
int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
|
||||
int max_decls = 0;
|
||||
STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
|
||||
|
||||
memset(so_decl, 0, sizeof(so_decl));
|
||||
|
||||
/* Construct the list of SO_DECLs to be emitted. The formatting of the
|
||||
* command is feels strange -- each dword pair contains a SO_DECL per stream.
|
||||
*/
|
||||
for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
|
||||
int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
|
||||
uint16_t decl = 0;
|
||||
int varying = linked_xfb_info->Outputs[i].OutputRegister;
|
||||
const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
|
||||
unsigned component_mask = (1 << components) - 1;
|
||||
unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
|
||||
unsigned decl_buffer_slot = buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
|
||||
assert(stream_id < MAX_VERTEX_STREAMS);
|
||||
|
||||
/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
|
||||
* gl_Layer is stored in VARYING_SLOT_PSIZ.y
|
||||
* gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
|
||||
*/
|
||||
if (varying == VARYING_SLOT_PSIZ) {
|
||||
assert(components == 1);
|
||||
component_mask <<= 3;
|
||||
} else if (varying == VARYING_SLOT_LAYER) {
|
||||
assert(components == 1);
|
||||
component_mask <<= 1;
|
||||
} else if (varying == VARYING_SLOT_VIEWPORT) {
|
||||
assert(components == 1);
|
||||
component_mask <<= 2;
|
||||
} else {
|
||||
component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
|
||||
}
|
||||
|
||||
buffer_mask[stream_id] |= 1 << buffer;
|
||||
|
||||
decl |= decl_buffer_slot;
|
||||
if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
|
||||
decl |= vue_map->varying_to_slot[VARYING_SLOT_PSIZ] <<
|
||||
SO_DECL_REGISTER_INDEX_SHIFT;
|
||||
} else {
|
||||
assert(vue_map->varying_to_slot[varying] >= 0);
|
||||
decl |= vue_map->varying_to_slot[varying] <<
|
||||
SO_DECL_REGISTER_INDEX_SHIFT;
|
||||
}
|
||||
decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT;
|
||||
|
||||
/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
|
||||
* array. Instead, it simply increments DstOffset for the following
|
||||
* input by the number of components that should be skipped.
|
||||
*
|
||||
* Our hardware is unusual in that it requires us to program SO_DECLs
|
||||
* for fake "hole" components, rather than simply taking the offset
|
||||
* for each real varying. Each hole can have size 1, 2, 3, or 4; we
|
||||
* program as many size = 4 holes as we can, then a final hole to
|
||||
* accommodate the final 1, 2, or 3 remaining.
|
||||
*/
|
||||
int skip_components =
|
||||
linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
|
||||
|
||||
next_offset[buffer] += skip_components;
|
||||
|
||||
while (skip_components >= 4) {
|
||||
so_decl[stream_id][decls[stream_id]++] =
|
||||
SO_DECL_HOLE_FLAG | 0xf | decl_buffer_slot;
|
||||
skip_components -= 4;
|
||||
}
|
||||
if (skip_components > 0)
|
||||
so_decl[stream_id][decls[stream_id]++] =
|
||||
SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1) |
|
||||
decl_buffer_slot;
|
||||
|
||||
assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
|
||||
|
||||
next_offset[buffer] += components;
|
||||
|
||||
so_decl[stream_id][decls[stream_id]++] = decl;
|
||||
|
||||
if (decls[stream_id] > max_decls)
|
||||
max_decls = decls[stream_id];
|
||||
}
|
||||
|
||||
BEGIN_BATCH(max_decls * 2 + 3);
|
||||
OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (max_decls * 2 + 1));
|
||||
|
||||
OUT_BATCH((buffer_mask[0] << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
|
||||
(buffer_mask[1] << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
|
||||
(buffer_mask[2] << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
|
||||
(buffer_mask[3] << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
|
||||
|
||||
OUT_BATCH((decls[0] << SO_NUM_ENTRIES_0_SHIFT) |
|
||||
(decls[1] << SO_NUM_ENTRIES_1_SHIFT) |
|
||||
(decls[2] << SO_NUM_ENTRIES_2_SHIFT) |
|
||||
(decls[3] << SO_NUM_ENTRIES_3_SHIFT));
|
||||
|
||||
for (int i = 0; i < max_decls; i++) {
|
||||
/* Stream 1 | Stream 0 */
|
||||
OUT_BATCH(((uint32_t) so_decl[1][i]) << 16 | so_decl[0][i]);
|
||||
/* Stream 3 | Stream 2 */
|
||||
OUT_BATCH(((uint32_t) so_decl[3][i]) << 16 | so_decl[2][i]);
|
||||
}
|
||||
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
static bool
|
||||
query_active(struct gl_query_object *q)
|
||||
{
|
||||
return q && q->Active;
|
||||
}
|
||||
|
||||
static void
|
||||
upload_3dstate_streamout(struct brw_context *brw, bool active,
|
||||
const struct brw_vue_map *vue_map)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0;
|
||||
int i;
|
||||
|
||||
if (active) {
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
xfb_obj->program->sh.LinkedTransformFeedback;
|
||||
int urb_entry_read_offset = 0;
|
||||
int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
|
||||
urb_entry_read_offset;
|
||||
|
||||
dw1 |= SO_FUNCTION_ENABLE;
|
||||
dw1 |= SO_STATISTICS_ENABLE;
|
||||
|
||||
/* BRW_NEW_RASTERIZER_DISCARD */
|
||||
if (ctx->RasterDiscard) {
|
||||
if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
|
||||
dw1 |= SO_RENDERING_DISABLE;
|
||||
} else {
|
||||
perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
|
||||
"query active relies on the clipper.");
|
||||
}
|
||||
}
|
||||
|
||||
/* _NEW_LIGHT */
|
||||
if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
|
||||
dw1 |= SO_REORDER_TRAILING;
|
||||
|
||||
if (brw->gen < 8) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (xfb_obj->Buffers[i]) {
|
||||
dw1 |= SO_BUFFER_ENABLE(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* We always read the whole vertex. This could be reduced at some
|
||||
* point by reading less and offsetting the register index in the
|
||||
* SO_DECLs.
|
||||
*/
|
||||
dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET);
|
||||
dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH);
|
||||
|
||||
dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET);
|
||||
dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH);
|
||||
|
||||
dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET);
|
||||
dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH);
|
||||
|
||||
dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET);
|
||||
dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH);
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
/* Set buffer pitches; 0 means unbound. */
|
||||
if (xfb_obj->Buffers[0])
|
||||
dw3 |= linked_xfb_info->Buffers[0].Stride * 4;
|
||||
if (xfb_obj->Buffers[1])
|
||||
dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16;
|
||||
if (xfb_obj->Buffers[2])
|
||||
dw4 |= linked_xfb_info->Buffers[2].Stride * 4;
|
||||
if (xfb_obj->Buffers[3])
|
||||
dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16;
|
||||
}
|
||||
}
|
||||
|
||||
const int dwords = brw->gen >= 8 ? 5 : 3;
|
||||
|
||||
BEGIN_BATCH(dwords);
|
||||
OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (dwords - 2));
|
||||
OUT_BATCH(dw1);
|
||||
OUT_BATCH(dw2);
|
||||
if (dwords > 3) {
|
||||
OUT_BATCH(dw3);
|
||||
OUT_BATCH(dw4);
|
||||
}
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
static void
|
||||
upload_sol_state(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
bool active = _mesa_is_xfb_active_and_unpaused(ctx);
|
||||
|
||||
if (active) {
|
||||
if (brw->gen >= 8)
|
||||
gen8_upload_3dstate_so_buffers(brw);
|
||||
else
|
||||
upload_3dstate_so_buffers(brw);
|
||||
|
||||
/* BRW_NEW_VUE_MAP_GEOM_OUT */
|
||||
gen7_upload_3dstate_so_decl_list(brw, &brw->vue_map_geom_out);
|
||||
}
|
||||
|
||||
/* Finally, set up the SOL stage. This command must always follow updates to
|
||||
* the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
|
||||
* MMIO register updates (current performed by the kernel at each batch
|
||||
* emit).
|
||||
*/
|
||||
upload_3dstate_streamout(brw, active, &brw->vue_map_geom_out);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen7_sol_state = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_LIGHT,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_RASTERIZER_DISCARD |
|
||||
BRW_NEW_VUE_MAP_GEOM_OUT |
|
||||
BRW_NEW_TRANSFORM_FEEDBACK,
|
||||
},
|
||||
.emit = upload_sol_state,
|
||||
};
|
||||
|
||||
void
|
||||
gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
|
||||
struct gl_transform_feedback_object *obj)
|
||||
|
@@ -1,95 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file gen8_sol_state.c
|
||||
*
|
||||
* Controls the stream output logic (SOL) stage of the gen8 hardware, which is
|
||||
* used to implement GL_EXT_transform_feedback.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_buffer_objects.h"
|
||||
#include "main/transformfeedback.h"
|
||||
|
||||
void
|
||||
gen8_upload_3dstate_so_buffers(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
struct brw_transform_feedback_object *brw_obj =
|
||||
(struct brw_transform_feedback_object *) xfb_obj;
|
||||
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
|
||||
|
||||
/* Set up the up to 4 output buffers. These are the ranges defined in the
|
||||
* gl_transform_feedback_object.
|
||||
*/
|
||||
for (int i = 0; i < 4; i++) {
|
||||
struct intel_buffer_object *bufferobj =
|
||||
intel_buffer_object(xfb_obj->Buffers[i]);
|
||||
|
||||
if (!bufferobj) {
|
||||
BEGIN_BATCH(8);
|
||||
OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2));
|
||||
OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t start = xfb_obj->Offset[i];
|
||||
assert(start % 4 == 0);
|
||||
uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
|
||||
struct brw_bo *bo =
|
||||
intel_bufferobj_buffer(brw, bufferobj, start, end - start);
|
||||
assert(end <= bo->size);
|
||||
|
||||
BEGIN_BATCH(8);
|
||||
OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2));
|
||||
OUT_BATCH(GEN8_SO_BUFFER_ENABLE | (i << SO_BUFFER_INDEX_SHIFT) |
|
||||
GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE |
|
||||
GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE |
|
||||
(mocs_wb << 22));
|
||||
OUT_RELOC64(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);
|
||||
OUT_BATCH(xfb_obj->Size[i] / 4 - 1);
|
||||
OUT_RELOC64(brw_obj->offset_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
i * sizeof(uint32_t));
|
||||
if (brw_obj->zero_offsets)
|
||||
OUT_BATCH(0); /* Zero out the offset and write that to offset_bo */
|
||||
else
|
||||
OUT_BATCH(0xFFFFFFFF); /* Use offset_bo as the "Stream Offset." */
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
brw_obj->zero_offsets = false;
|
||||
}
|
@@ -31,11 +31,13 @@
|
||||
#include "brw_util.h"
|
||||
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_buffer_objects.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
#include "main/fbobject.h"
|
||||
#include "main/framebuffer.h"
|
||||
#include "main/stencil.h"
|
||||
#include "main/transformfeedback.h"
|
||||
|
||||
UNUSED static void *
|
||||
emit_dwords(struct brw_context *brw, unsigned n)
|
||||
@@ -80,6 +82,28 @@ __gen_combine_address(struct brw_context *brw, void *location,
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct brw_address
|
||||
render_bo(struct brw_bo *bo, uint32_t offset)
|
||||
{
|
||||
return (struct brw_address) {
|
||||
.bo = bo,
|
||||
.offset = offset,
|
||||
.read_domains = I915_GEM_DOMAIN_RENDER,
|
||||
.write_domain = I915_GEM_DOMAIN_RENDER,
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct brw_address
|
||||
instruction_bo(struct brw_bo *bo, uint32_t offset)
|
||||
{
|
||||
return (struct brw_address) {
|
||||
.bo = bo,
|
||||
.offset = offset,
|
||||
.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
|
||||
.write_domain = I915_GEM_DOMAIN_INSTRUCTION,
|
||||
};
|
||||
}
|
||||
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#define _brw_cmd_length(cmd) cmd ## _length
|
||||
@@ -94,11 +118,12 @@ __gen_combine_address(struct brw_context *brw, void *location,
|
||||
_brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
|
||||
_dst = NULL)
|
||||
|
||||
#define brw_batch_emitn(brw, cmd, n) ({ \
|
||||
#define brw_batch_emitn(brw, cmd, n, ...) ({ \
|
||||
uint32_t *_dw = emit_dwords(brw, n); \
|
||||
struct cmd template = { \
|
||||
_brw_cmd_header(cmd), \
|
||||
.DWordLength = n - _brw_cmd_length_bias(cmd), \
|
||||
__VA_ARGS__ \
|
||||
}; \
|
||||
_brw_cmd_pack(cmd)(brw, _dw, &template); \
|
||||
_dw + 1; /* Array starts at dw[1] */ \
|
||||
@@ -860,6 +885,316 @@ static const struct brw_tracked_state genX(sbe_state) = {
|
||||
},
|
||||
.emit = genX(upload_sbe),
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Outputs the 3DSTATE_SO_DECL_LIST command.
|
||||
*
|
||||
* The data output is a series of 64-bit entries containing a SO_DECL per
|
||||
* stream. We only have one stream of rendering coming out of the GS unit, so
|
||||
* we only emit stream 0 (low 16 bits) SO_DECLs.
|
||||
*/
|
||||
static void
|
||||
genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
|
||||
const struct brw_vue_map *vue_map)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
xfb_obj->program->sh.LinkedTransformFeedback;
|
||||
struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
|
||||
int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
|
||||
int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
|
||||
int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
|
||||
int max_decls = 0;
|
||||
STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
|
||||
|
||||
memset(so_decl, 0, sizeof(so_decl));
|
||||
|
||||
/* Construct the list of SO_DECLs to be emitted. The formatting of the
|
||||
* command feels strange -- each dword pair contains a SO_DECL per stream.
|
||||
*/
|
||||
for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
|
||||
int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
|
||||
struct GENX(SO_DECL) decl = {0};
|
||||
int varying = linked_xfb_info->Outputs[i].OutputRegister;
|
||||
const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
|
||||
unsigned component_mask = (1 << components) - 1;
|
||||
unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
|
||||
unsigned decl_buffer_slot = buffer;
|
||||
assert(stream_id < MAX_VERTEX_STREAMS);
|
||||
|
||||
/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
|
||||
* gl_Layer is stored in VARYING_SLOT_PSIZ.y
|
||||
* gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
|
||||
*/
|
||||
if (varying == VARYING_SLOT_PSIZ) {
|
||||
assert(components == 1);
|
||||
component_mask <<= 3;
|
||||
} else if (varying == VARYING_SLOT_LAYER) {
|
||||
assert(components == 1);
|
||||
component_mask <<= 1;
|
||||
} else if (varying == VARYING_SLOT_VIEWPORT) {
|
||||
assert(components == 1);
|
||||
component_mask <<= 2;
|
||||
} else {
|
||||
component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
|
||||
}
|
||||
|
||||
buffer_mask[stream_id] |= 1 << buffer;
|
||||
|
||||
decl.OutputBufferSlot = decl_buffer_slot;
|
||||
if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
|
||||
decl.RegisterIndex = vue_map->varying_to_slot[VARYING_SLOT_PSIZ];
|
||||
} else {
|
||||
assert(vue_map->varying_to_slot[varying] >= 0);
|
||||
decl.RegisterIndex = vue_map->varying_to_slot[varying];
|
||||
}
|
||||
decl.ComponentMask = component_mask;
|
||||
|
||||
/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
|
||||
* array. Instead, it simply increments DstOffset for the following
|
||||
* input by the number of components that should be skipped.
|
||||
*
|
||||
* Our hardware is unusual in that it requires us to program SO_DECLs
|
||||
* for fake "hole" components, rather than simply taking the offset
|
||||
* for each real varying. Each hole can have size 1, 2, 3, or 4; we
|
||||
* program as many size = 4 holes as we can, then a final hole to
|
||||
* accommodate the final 1, 2, or 3 remaining.
|
||||
*/
|
||||
int skip_components =
|
||||
linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
|
||||
|
||||
next_offset[buffer] += skip_components;
|
||||
|
||||
while (skip_components >= 4) {
|
||||
struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
|
||||
d->HoleFlag = 1;
|
||||
d->OutputBufferSlot = decl_buffer_slot;
|
||||
d->ComponentMask = 0xf;
|
||||
skip_components -= 4;
|
||||
}
|
||||
|
||||
if (skip_components > 0) {
|
||||
struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
|
||||
d->HoleFlag = 1;
|
||||
d->OutputBufferSlot = decl_buffer_slot;
|
||||
d->ComponentMask = (1 << skip_components) - 1;
|
||||
}
|
||||
|
||||
assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
|
||||
|
||||
next_offset[buffer] += components;
|
||||
|
||||
so_decl[stream_id][decls[stream_id]++] = decl;
|
||||
|
||||
if (decls[stream_id] > max_decls)
|
||||
max_decls = decls[stream_id];
|
||||
}
|
||||
|
||||
uint32_t *dw;
|
||||
dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls,
|
||||
.StreamtoBufferSelects0 = buffer_mask[0],
|
||||
.StreamtoBufferSelects1 = buffer_mask[1],
|
||||
.StreamtoBufferSelects2 = buffer_mask[2],
|
||||
.StreamtoBufferSelects3 = buffer_mask[3],
|
||||
.NumEntries0 = decls[0],
|
||||
.NumEntries1 = decls[1],
|
||||
.NumEntries2 = decls[2],
|
||||
.NumEntries3 = decls[3]);
|
||||
|
||||
for (int i = 0; i < max_decls; i++) {
|
||||
GENX(SO_DECL_ENTRY_pack)(
|
||||
brw, dw + 2 + i * 2,
|
||||
&(struct GENX(SO_DECL_ENTRY)) {
|
||||
.Stream0Decl = so_decl[0][i],
|
||||
.Stream1Decl = so_decl[1][i],
|
||||
.Stream2Decl = so_decl[2][i],
|
||||
.Stream3Decl = so_decl[3][i],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
genX(upload_3dstate_so_buffers)(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
#if GEN_GEN < 8
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
xfb_obj->program->sh.LinkedTransformFeedback;
|
||||
#else
|
||||
struct brw_transform_feedback_object *brw_obj =
|
||||
(struct brw_transform_feedback_object *) xfb_obj;
|
||||
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
|
||||
#endif
|
||||
|
||||
/* Set up the up to 4 output buffers. These are the ranges defined in the
|
||||
* gl_transform_feedback_object.
|
||||
*/
|
||||
for (int i = 0; i < 4; i++) {
|
||||
struct intel_buffer_object *bufferobj =
|
||||
intel_buffer_object(xfb_obj->Buffers[i]);
|
||||
|
||||
if (!bufferobj) {
|
||||
brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
|
||||
sob.SOBufferIndex = i;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t start = xfb_obj->Offset[i];
|
||||
assert(start % 4 == 0);
|
||||
uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
|
||||
struct brw_bo *bo =
|
||||
intel_bufferobj_buffer(brw, bufferobj, start, end - start);
|
||||
assert(end <= bo->size);
|
||||
|
||||
brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
|
||||
sob.SOBufferIndex = i;
|
||||
|
||||
sob.SurfaceBaseAddress = render_bo(bo, start);
|
||||
#if GEN_GEN < 8
|
||||
sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
|
||||
sob.SurfaceEndAddress = render_bo(bo, end);
|
||||
#else
|
||||
sob.SOBufferEnable = true;
|
||||
sob.StreamOffsetWriteEnable = true;
|
||||
sob.StreamOutputBufferOffsetAddressEnable = true;
|
||||
sob.SOBufferMOCS = mocs_wb;
|
||||
|
||||
sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
|
||||
sob.StreamOutputBufferOffsetAddress =
|
||||
instruction_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
|
||||
|
||||
if (brw_obj->zero_offsets) {
|
||||
/* Zero out the offset and write that to offset_bo */
|
||||
sob.StreamOffset = 0;
|
||||
} else {
|
||||
/* Use offset_bo as the "Stream Offset." */
|
||||
sob.StreamOffset = 0xFFFFFFFF;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
brw_obj->zero_offsets = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool
|
||||
query_active(struct gl_query_object *q)
|
||||
{
|
||||
return q && q->Active;
|
||||
}
|
||||
|
||||
static void
|
||||
genX(upload_3dstate_streamout)(struct brw_context *brw, bool active,
|
||||
const struct brw_vue_map *vue_map)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
struct gl_transform_feedback_object *xfb_obj =
|
||||
ctx->TransformFeedback.CurrentObject;
|
||||
|
||||
brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) {
|
||||
if (active) {
|
||||
int urb_entry_read_offset = 0;
|
||||
int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
|
||||
urb_entry_read_offset;
|
||||
|
||||
sos.SOFunctionEnable = true;
|
||||
sos.SOStatisticsEnable = true;
|
||||
|
||||
/* BRW_NEW_RASTERIZER_DISCARD */
|
||||
if (ctx->RasterDiscard) {
|
||||
if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
|
||||
sos.RenderingDisable = true;
|
||||
} else {
|
||||
perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
|
||||
"query active relies on the clipper.");
|
||||
}
|
||||
}
|
||||
|
||||
/* _NEW_LIGHT */
|
||||
if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
|
||||
sos.ReorderMode = TRAILING;
|
||||
|
||||
#if GEN_GEN < 8
|
||||
sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL;
|
||||
sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL;
|
||||
sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL;
|
||||
sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL;
|
||||
#else
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
xfb_obj->program->sh.LinkedTransformFeedback;
|
||||
/* Set buffer pitches; 0 means unbound. */
|
||||
if (xfb_obj->Buffers[0])
|
||||
sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4;
|
||||
if (xfb_obj->Buffers[1])
|
||||
sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4;
|
||||
if (xfb_obj->Buffers[2])
|
||||
sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4;
|
||||
if (xfb_obj->Buffers[3])
|
||||
sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4;
|
||||
#endif
|
||||
|
||||
/* We always read the whole vertex. This could be reduced at some
|
||||
* point by reading less and offsetting the register index in the
|
||||
* SO_DECLs.
|
||||
*/
|
||||
sos.Stream0VertexReadOffset = urb_entry_read_offset;
|
||||
sos.Stream0VertexReadLength = urb_entry_read_length - 1;
|
||||
sos.Stream1VertexReadOffset = urb_entry_read_offset;
|
||||
sos.Stream1VertexReadLength = urb_entry_read_length - 1;
|
||||
sos.Stream2VertexReadOffset = urb_entry_read_offset;
|
||||
sos.Stream2VertexReadLength = urb_entry_read_length - 1;
|
||||
sos.Stream3VertexReadOffset = urb_entry_read_offset;
|
||||
sos.Stream3VertexReadLength = urb_entry_read_length - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
genX(upload_sol)(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
bool active = _mesa_is_xfb_active_and_unpaused(ctx);
|
||||
|
||||
if (active) {
|
||||
genX(upload_3dstate_so_buffers)(brw);
|
||||
|
||||
/* BRW_NEW_VUE_MAP_GEOM_OUT */
|
||||
genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out);
|
||||
}
|
||||
|
||||
/* Finally, set up the SOL stage. This command must always follow updates to
|
||||
* the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
|
||||
* MMIO register updates (current performed by the kernel at each batch
|
||||
* emit).
|
||||
*/
|
||||
genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out);
|
||||
}
|
||||
|
||||
static const struct brw_tracked_state genX(sol_state) = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_LIGHT,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_RASTERIZER_DISCARD |
|
||||
BRW_NEW_VUE_MAP_GEOM_OUT |
|
||||
BRW_NEW_TRANSFORM_FEEDBACK,
|
||||
},
|
||||
.emit = genX(upload_sol),
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@@ -1178,7 +1513,7 @@ genX(init_atoms)(struct brw_context *brw)
|
||||
&gen7_te_state,
|
||||
&gen7_ds_state,
|
||||
&gen7_gs_state,
|
||||
&gen7_sol_state,
|
||||
&genX(sol_state),
|
||||
&genX(clip_state),
|
||||
&genX(sbe_state),
|
||||
&genX(sf_state),
|
||||
@@ -1265,7 +1600,7 @@ genX(init_atoms)(struct brw_context *brw)
|
||||
&gen7_te_state,
|
||||
&gen8_ds_state,
|
||||
&gen8_gs_state,
|
||||
&gen7_sol_state,
|
||||
&genX(sol_state),
|
||||
&genX(clip_state),
|
||||
&genX(raster_state),
|
||||
&genX(sbe_state),
|
||||
|
Reference in New Issue
Block a user