i965/gen7: Enable HiZ
This patch modifies all batches needed for HiZ. The batch length for 3DSTATE_HIER_DEPTH_BUFFER is also corrected from 4 to 3. Performance +6.7% on Citybench. num-frames: 400 resolution: 1918x1031 avg-hiz-off: 127.90 fps avg-hiz-on: 136.50 fps kernel: git://people.freedesktop.org/~anholt/linux.git branch=gen7-reset-sol sha=23360e4 Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
This commit is contained in:
@@ -39,6 +39,23 @@ upload_clip_state(struct brw_context *brw)
|
|||||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||||
const struct gl_fragment_program *fprog = brw->fragment_program;
|
const struct gl_fragment_program *fprog = brw->fragment_program;
|
||||||
|
|
||||||
|
if (brw->hiz.op) {
|
||||||
|
/* HiZ operations emit a rectangle primitive, which requires clipping to
|
||||||
|
* be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
|
||||||
|
* Section 1.3 3D Primitives Overview:
|
||||||
|
* RECTLIST:
|
||||||
|
* Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
|
||||||
|
* Mode should be set to a value other than CLIPMODE_NORMAL.
|
||||||
|
*/
|
||||||
|
BEGIN_BATCH(4);
|
||||||
|
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
|
||||||
|
OUT_BATCH(0);
|
||||||
|
OUT_BATCH(0);
|
||||||
|
OUT_BATCH(0);
|
||||||
|
ADVANCE_BATCH();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* _NEW_BUFFERS */
|
/* _NEW_BUFFERS */
|
||||||
bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
|
bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
|
||||||
|
|
||||||
@@ -116,7 +133,8 @@ const struct brw_tracked_state gen7_clip_state = {
|
|||||||
_NEW_LIGHT |
|
_NEW_LIGHT |
|
||||||
_NEW_TRANSFORM),
|
_NEW_TRANSFORM),
|
||||||
.brw = (BRW_NEW_CONTEXT |
|
.brw = (BRW_NEW_CONTEXT |
|
||||||
BRW_NEW_FRAGMENT_PROGRAM),
|
BRW_NEW_FRAGMENT_PROGRAM |
|
||||||
|
BRW_NEW_HIZ),
|
||||||
.cache = 0
|
.cache = 0
|
||||||
},
|
},
|
||||||
.emit = upload_clip_state,
|
.emit = upload_clip_state,
|
||||||
|
@@ -38,11 +38,16 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||||||
/* _NEW_BUFFERS */
|
/* _NEW_BUFFERS */
|
||||||
struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
|
struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
|
||||||
struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
|
struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
|
||||||
struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
|
struct intel_mipmap_tree *depth_mt = NULL,
|
||||||
|
*stencil_mt = NULL,
|
||||||
|
*hiz_mt = NULL;
|
||||||
|
|
||||||
if (drb)
|
if (drb)
|
||||||
depth_mt = drb->mt;
|
depth_mt = drb->mt;
|
||||||
|
|
||||||
|
if (depth_mt)
|
||||||
|
hiz_mt = depth_mt->hiz_mt;
|
||||||
|
|
||||||
if (srb) {
|
if (srb) {
|
||||||
stencil_mt = srb->mt;
|
stencil_mt = srb->mt;
|
||||||
if (stencil_mt->stencil_mt)
|
if (stencil_mt->stencil_mt)
|
||||||
@@ -97,7 +102,7 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||||||
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
|
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
|
||||||
OUT_BATCH(((region->pitch * region->cpp) - 1) |
|
OUT_BATCH(((region->pitch * region->cpp) - 1) |
|
||||||
(brw_depthbuffer_format(brw) << 18) |
|
(brw_depthbuffer_format(brw) << 18) |
|
||||||
(0 << 22) /* no HiZ buffer */ |
|
((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
|
||||||
((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
|
((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
|
||||||
((ctx->Depth.Mask != 0) << 28) |
|
((ctx->Depth.Mask != 0) << 28) |
|
||||||
(BRW_SURFACE_2D << 29));
|
(BRW_SURFACE_2D << 29));
|
||||||
@@ -112,12 +117,22 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||||||
ADVANCE_BATCH();
|
ADVANCE_BATCH();
|
||||||
}
|
}
|
||||||
|
|
||||||
BEGIN_BATCH(4);
|
if (hiz_mt == NULL) {
|
||||||
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (4 - 2));
|
BEGIN_BATCH(5);
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
ADVANCE_BATCH();
|
ADVANCE_BATCH();
|
||||||
|
} else {
|
||||||
|
BEGIN_BATCH(5);
|
||||||
|
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
|
||||||
|
OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
|
||||||
|
OUT_RELOC(hiz_mt->region->bo,
|
||||||
|
I915_GEM_DOMAIN_RENDER,
|
||||||
|
I915_GEM_DOMAIN_RENDER,
|
||||||
|
0);
|
||||||
|
ADVANCE_BATCH();
|
||||||
|
}
|
||||||
|
|
||||||
if (stencil_mt == NULL) {
|
if (stencil_mt == NULL) {
|
||||||
BEGIN_BATCH(3);
|
BEGIN_BATCH(3);
|
||||||
|
@@ -137,7 +137,8 @@ const struct brw_tracked_state gen7_sbe_state = {
|
|||||||
_NEW_PROGRAM |
|
_NEW_PROGRAM |
|
||||||
_NEW_TRANSFORM),
|
_NEW_TRANSFORM),
|
||||||
.brw = (BRW_NEW_CONTEXT |
|
.brw = (BRW_NEW_CONTEXT |
|
||||||
BRW_NEW_FRAGMENT_PROGRAM),
|
BRW_NEW_FRAGMENT_PROGRAM |
|
||||||
|
BRW_NEW_HIZ),
|
||||||
.cache = CACHE_NEW_VS_PROG
|
.cache = CACHE_NEW_VS_PROG
|
||||||
},
|
},
|
||||||
.emit = upload_sbe_state,
|
.emit = upload_sbe_state,
|
||||||
@@ -153,7 +154,17 @@ upload_sf_state(struct brw_context *brw)
|
|||||||
/* _NEW_BUFFERS */
|
/* _NEW_BUFFERS */
|
||||||
bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
|
bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
|
||||||
|
|
||||||
dw1 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
|
dw1 = GEN6_SF_STATISTICS_ENABLE;
|
||||||
|
|
||||||
|
/* Enable viewport transform only if no HiZ operation is progress
|
||||||
|
*
|
||||||
|
* From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
|
||||||
|
* Primitives Overview":
|
||||||
|
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
|
||||||
|
* use of screen- space coordinates).
|
||||||
|
*/
|
||||||
|
if (!brw->hiz.op)
|
||||||
|
dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
|
||||||
|
|
||||||
/* _NEW_BUFFERS */
|
/* _NEW_BUFFERS */
|
||||||
dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
|
dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
|
||||||
@@ -287,7 +298,8 @@ const struct brw_tracked_state gen7_sf_state = {
|
|||||||
_NEW_SCISSOR |
|
_NEW_SCISSOR |
|
||||||
_NEW_BUFFERS |
|
_NEW_BUFFERS |
|
||||||
_NEW_POINT),
|
_NEW_POINT),
|
||||||
.brw = (BRW_NEW_CONTEXT),
|
.brw = (BRW_NEW_CONTEXT |
|
||||||
|
BRW_NEW_HIZ),
|
||||||
.cache = CACHE_NEW_VS_PROG
|
.cache = CACHE_NEW_VS_PROG
|
||||||
},
|
},
|
||||||
.emit = upload_sf_state,
|
.emit = upload_sf_state,
|
||||||
|
@@ -49,6 +49,23 @@ upload_wm_state(struct brw_context *brw)
|
|||||||
dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
|
dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
|
||||||
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
|
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
|
||||||
|
|
||||||
|
switch (brw->hiz.op) {
|
||||||
|
case BRW_HIZ_OP_NONE:
|
||||||
|
break;
|
||||||
|
case BRW_HIZ_OP_DEPTH_CLEAR:
|
||||||
|
dw1 |= GEN7_WM_DEPTH_CLEAR;
|
||||||
|
break;
|
||||||
|
case BRW_HIZ_OP_DEPTH_RESOLVE:
|
||||||
|
dw1 |= GEN7_WM_DEPTH_RESOLVE;
|
||||||
|
break;
|
||||||
|
case BRW_HIZ_OP_HIZ_RESOLVE:
|
||||||
|
dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* _NEW_LINE */
|
/* _NEW_LINE */
|
||||||
if (ctx->Line.StippleFlag)
|
if (ctx->Line.StippleFlag)
|
||||||
dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
|
dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
|
||||||
@@ -89,6 +106,7 @@ const struct brw_tracked_state gen7_wm_state = {
|
|||||||
.mesa = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON |
|
.mesa = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON |
|
||||||
_NEW_COLOR | _NEW_BUFFERS),
|
_NEW_COLOR | _NEW_BUFFERS),
|
||||||
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
|
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
|
||||||
|
BRW_NEW_HIZ |
|
||||||
BRW_NEW_BATCH),
|
BRW_NEW_BATCH),
|
||||||
.cache = 0,
|
.cache = 0,
|
||||||
},
|
},
|
||||||
|
@@ -717,7 +717,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
|
|||||||
|
|
||||||
intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6;
|
intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6;
|
||||||
intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7;
|
intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7;
|
||||||
intelScreen->hw_has_hiz = intelScreen->gen == 6; /* Not yet for gen7. */
|
intelScreen->hw_has_hiz = intelScreen->gen >= 6;
|
||||||
intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_UNKNOWN;
|
intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_UNKNOWN;
|
||||||
|
|
||||||
intel_override_hiz(intelScreen);
|
intel_override_hiz(intelScreen);
|
||||||
|
Reference in New Issue
Block a user