i965: get rid of duplicated values from gen_device_info

Now that we have gen_device_info mutable, we can update its values and drop
all copies we had in brw_context.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Lionel Landwerlin
2016-09-23 00:41:23 +03:00
parent bc24590f0c
commit 94d0e7dc08
26 changed files with 71 additions and 79 deletions

View File

@@ -116,6 +116,7 @@ static void
brw_emit_gpgpu_walker(struct brw_context *brw)
{
const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const GLuint *num_groups = brw->compute.num_work_groups;
uint32_t indirect_flag;
@@ -148,7 +149,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
OUT_BATCH(0); /* Indirect Data Length */
OUT_BATCH(0); /* Indirect Data Start Address */
}
assert(thread_width_max <= brw->max_cs_threads);
assert(thread_width_max <= devinfo->max_cs_threads);
OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX));
OUT_BATCH(0); /* Thread Group ID Starting X */

View File

@@ -806,19 +806,17 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
const struct intel_screen *screen = brw->screen;
const struct gen_device_info *devinfo = &screen->devinfo;
struct gen_device_info *devinfo = &brw->screen->devinfo;
/* FINISHME: Do this for all platforms that the kernel supports */
if (brw->is_cherryview &&
screen->subslice_total > 0 && screen->eu_total > 0) {
/* Logical CS threads = EUs per subslice * 7 threads per EU */
brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
/* Fuse configurations may give more threads than expected, never less. */
if (brw->max_cs_threads < devinfo->max_cs_threads)
brw->max_cs_threads = devinfo->max_cs_threads;
} else {
brw->max_cs_threads = devinfo->max_cs_threads;
if (max_cs_threads > devinfo->max_cs_threads)
devinfo->max_cs_threads = max_cs_threads;
}
/* Maximum number of scalar compute shader invocations that can be run in
@@ -830,7 +828,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
* threads. With SIMD32 and 64 threads, Haswell still provides twice the
* required the number of invocation needed for ARB_compute_shader.
*/
const unsigned max_threads = MIN2(64, brw->max_cs_threads);
const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
const uint32_t max_invocations = 32 * max_threads;
ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
@@ -1078,17 +1076,7 @@ brwCreateContext(gl_api api,
if (brw->gen >= 6)
brw_blorp_init(brw);
brw->max_vs_threads = devinfo->max_vs_threads;
brw->max_hs_threads = devinfo->max_hs_threads;
brw->max_ds_threads = devinfo->max_ds_threads;
brw->max_gs_threads = devinfo->max_gs_threads;
brw->max_wm_threads = devinfo->max_wm_threads;
brw->urb.size = devinfo->urb.size;
brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
if (brw->gen == 6)
brw->urb.gs_present = false;

View File

@@ -1017,17 +1017,6 @@ struct brw_context
*/
int num_samples;
/**
* Platform specific constants containing the maximum number of threads
* for each pipeline stage.
*/
unsigned max_vs_threads;
unsigned max_hs_threads;
unsigned max_ds_threads;
unsigned max_gs_threads;
unsigned max_wm_threads;
unsigned max_cs_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
struct {
@@ -1040,12 +1029,6 @@ struct brw_context
bool constrained;
GLuint min_vs_entries; /* Minimum number of VS entries */
GLuint max_vs_entries; /* Maximum number of VS entries */
GLuint max_hs_entries; /* Maximum number of HS entries */
GLuint max_ds_entries; /* Maximum number of DS entries */
GLuint max_gs_entries; /* Maximum number of GS entries */
GLuint nr_vs_entries;
GLuint nr_hs_entries;
GLuint nr_ds_entries;

View File

@@ -57,6 +57,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
struct brw_compute_program *cp,
struct brw_cs_prog_key *key)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
const GLuint *program;
void *mem_ctx = ralloc_context(NULL);
@@ -84,7 +85,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
prog_data.base.total_shared = prog->Comp.SharedSize;
}
assign_cs_binding_table_offsets(&brw->screen->devinfo, prog,
assign_cs_binding_table_offsets(devinfo, prog,
&cp->program.Base, &prog_data);
/* Allocate the references to the uniforms that will end up in the
@@ -166,7 +167,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
* number of threads per subslice.
*/
const unsigned scratch_ids_per_subslice =
brw->is_haswell ? 16 * 8 : brw->max_cs_threads;
brw->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
brw_alloc_stage_scratch(brw, &brw->cs.base,
prog_data.base.total_scratch,

View File

@@ -98,6 +98,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct brw_gs_prog_key *key)
{
struct brw_compiler *compiler = brw->screen->compiler;
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_prog_data prog_data;
bool start_busy = false;
@@ -105,7 +106,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
memset(&prog_data, 0, sizeof(prog_data));
assign_gs_binding_table_offsets(&brw->screen->devinfo, prog,
assign_gs_binding_table_offsets(devinfo, prog,
&gp->program.Base, &prog_data);
/* Allocate the references to the uniforms that will end up in the
@@ -139,7 +140,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
((1 << gp->program.Base.CullDistanceArraySize) - 1) <<
gp->program.Base.ClipDistanceArraySize;
brw_compute_vue_map(&brw->screen->devinfo,
brw_compute_vue_map(devinfo,
&prog_data.base.vue_map, outputs_written,
prog->SeparateShader);
@@ -184,7 +185,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
prog_data.base.base.total_scratch,
brw->max_gs_threads);
devinfo->max_gs_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
key, sizeof(*key),

View File

@@ -298,7 +298,7 @@ brw_codegen_tcs_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
prog_data.base.base.total_scratch,
brw->max_hs_threads);
devinfo->max_hs_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
key, sizeof(*key),

View File

@@ -217,7 +217,7 @@ brw_codegen_tes_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
prog_data.base.base.total_scratch,
brw->max_ds_threads);
devinfo->max_ds_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
key, sizeof(*key),

View File

@@ -91,6 +91,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
struct brw_vs_prog_key *key)
{
const struct brw_compiler *compiler = brw->screen->compiler;
const struct gen_device_info *devinfo = &brw->screen->devinfo;
GLuint program_size;
const GLuint *program;
struct brw_vs_prog_data prog_data;
@@ -112,7 +113,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
mem_ctx = ralloc_context(NULL);
brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
&brw->screen->devinfo,
devinfo,
prog, &vp->program.Base,
&prog_data.base.base, 0);
@@ -160,7 +161,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
((1 << vp->program.Base.CullDistanceArraySize) - 1) <<
vp->program.Base.ClipDistanceArraySize;
brw_compute_vue_map(&brw->screen->devinfo,
brw_compute_vue_map(devinfo,
&prog_data.base.vue_map, outputs_written,
prog ? prog->SeparateShader ||
prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]
@@ -222,7 +223,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, &brw->vs.base,
prog_data.base.base.total_scratch,
brw->max_vs_threads);
devinfo->max_vs_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
key, sizeof(struct brw_vs_prog_key),

View File

@@ -39,6 +39,7 @@
static void
brw_upload_vs_unit(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct brw_stage_state *stage_state = &brw->vs.base;
struct brw_vs_unit_state *vs;
@@ -137,7 +138,7 @@ brw_upload_vs_unit(struct brw_context *brw)
vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
1, brw->max_vs_threads) - 1;
1, devinfo->max_vs_threads) - 1;
if (brw->gen == 5)
vs->vs5.sampler_count = 0; /* hardware requirement */

View File

@@ -79,6 +79,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
void *mem_ctx = ralloc_context(NULL);
struct brw_wm_prog_data prog_data;
@@ -97,7 +98,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
if (!prog)
prog_data.base.use_alt_mode = true;
assign_fs_binding_table_offsets(&brw->screen->devinfo, prog,
assign_fs_binding_table_offsets(devinfo, prog,
&fp->program.Base, key, &prog_data);
/* Allocate the references to the uniforms that will end up in the
@@ -172,7 +173,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
brw_alloc_stage_scratch(brw, &brw->wm.base,
prog_data.base.total_scratch,
brw->max_wm_threads);
devinfo->max_wm_threads);
if (unlikely(INTEL_DEBUG & DEBUG_WM))
fprintf(stderr, "\n");

View File

@@ -75,6 +75,7 @@ brw_color_buffer_write_enabled(struct brw_context *brw)
static void
brw_upload_wm_unit(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct gl_fragment_program *fp = brw->fragment_program;
@@ -178,7 +179,7 @@ brw_upload_wm_unit(struct brw_context *brw)
wm->wm5.program_uses_killpixel =
prog_data->uses_kill || ctx->Color.AlphaEnabled;
wm->wm5.max_threads = brw->max_wm_threads - 1;
wm->wm5.max_threads = devinfo->max_wm_threads - 1;
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) ||
@@ -270,4 +271,3 @@ const struct brw_tracked_state brw_wm_unit = {
},
.emit = brw_upload_wm_unit,
};

View File

@@ -69,6 +69,8 @@ const struct brw_tracked_state gen6_gs_push_constants = {
static void
upload_gs_state_for_tf(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(brw->ff_gs.prog_offset);
@@ -76,7 +78,7 @@ upload_gs_state_for_tf(struct brw_context *brw)
OUT_BATCH(0); /* no scratch space */
OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
(brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
OUT_BATCH(((devinfo->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_SO_STATISTICS_ENABLE |
GEN6_GS_RENDERING_ENABLE);
@@ -91,6 +93,7 @@ upload_gs_state_for_tf(struct brw_context *brw)
static void
upload_gs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* BRW_NEW_GEOMETRY_PROGRAM */
bool active = brw->geometry_program;
/* BRW_NEW_GS_PROG_DATA */
@@ -153,7 +156,7 @@ upload_gs_state(struct brw_context *brw)
(prog_data->base.dispatch_grf_start_reg <<
GEN6_GS_DISPATCH_START_GRF_SHIFT));
OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
OUT_BATCH(((devinfo->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_SO_STATISTICS_ENABLE |
GEN6_GS_RENDERING_ENABLE);

View File

@@ -52,6 +52,7 @@ gen6_upload_urb(struct brw_context *brw, unsigned vs_size,
{
int nr_vs_entries, nr_gs_entries;
int total_urb_size = brw->urb.size * 1024; /* in bytes */
const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* Calculate how many entries fit in each stage's section of the URB */
if (gs_present) {
@@ -63,17 +64,17 @@ gen6_upload_urb(struct brw_context *brw, unsigned vs_size,
}
/* Then clamp to the maximum allowed by the hardware */
if (nr_vs_entries > brw->urb.max_vs_entries)
nr_vs_entries = brw->urb.max_vs_entries;
if (nr_vs_entries > devinfo->urb.max_vs_entries)
nr_vs_entries = devinfo->urb.max_vs_entries;
if (nr_gs_entries > brw->urb.max_gs_entries)
nr_gs_entries = brw->urb.max_gs_entries;
if (nr_gs_entries > devinfo->urb.max_gs_entries)
nr_gs_entries = devinfo->urb.max_gs_entries;
/* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);
assert(brw->urb.nr_vs_entries >= brw->urb.min_vs_entries);
assert(brw->urb.nr_vs_entries >= devinfo->urb.min_vs_entries);
assert(brw->urb.nr_vs_entries % 4 == 0);
assert(brw->urb.nr_gs_entries % 4 == 0);
assert(vs_size <= 5);

View File

@@ -74,6 +74,7 @@ const struct brw_tracked_state gen6_vs_push_constants = {
static void
upload_vs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->vs.base;
uint32_t floating_point_mode = 0;
@@ -138,7 +139,7 @@ upload_vs_state(struct brw_context *brw)
(brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH(((brw->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
OUT_BATCH(((devinfo->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE);
ADVANCE_BATCH();

View File

@@ -80,6 +80,7 @@ gen6_upload_wm_state(struct brw_context *brw,
bool line_stipple_enable, bool polygon_stipple_enable,
bool statistic_enable)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
/* We can't fold this into gen6_upload_wm_push_constants(), because
@@ -130,7 +131,7 @@ gen6_upload_wm_state(struct brw_context *brw,
dw2 |= ((prog_data->base.binding_table.size_bytes / 4) <<
GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
dw5 |= (devinfo->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
if (prog_data->dispatch_8)
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;

View File

@@ -97,7 +97,7 @@ brw_upload_cs_state(struct brw_context *brw)
const uint32_t vfe_gpgpu_mode =
brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
OUT_BATCH(SET_FIELD(brw->max_cs_threads * subslices - 1,
OUT_BATCH(SET_FIELD(devinfo->max_cs_threads * subslices - 1,
MEDIA_VFE_STATE_MAX_THREADS) |
SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
@@ -163,7 +163,7 @@ brw_upload_cs_state(struct brw_context *brw)
brw->gen >= 8 ?
SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
assert(cs_prog_data->threads <= brw->max_cs_threads);
assert(cs_prog_data->threads <= devinfo->max_cs_threads);
const uint32_t slm_size =
encode_slm_size(devinfo->gen, prog_data->total_shared);

View File

@@ -61,6 +61,7 @@ const struct brw_tracked_state gen7_tes_push_constants = {
static void
gen7_upload_ds_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->tes.base;
/* BRW_NEW_TESS_PROGRAMS */
bool active = brw->tess_eval_program;
@@ -70,7 +71,7 @@ gen7_upload_ds_state(struct brw_context *brw)
const struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
const unsigned thread_count = (brw->max_ds_threads - 1) <<
const unsigned thread_count = (devinfo->max_ds_threads - 1) <<
(brw->is_haswell ? HSW_DS_MAX_THREADS_SHIFT : GEN7_DS_MAX_THREADS_SHIFT);
if (active) {

View File

@@ -29,6 +29,7 @@
static void
upload_gs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->gs.base;
const int max_threads_shift = brw->is_haswell ?
HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT;
@@ -107,7 +108,7 @@ upload_gs_state(struct brw_context *brw)
* Bridge this will get the order close to correct but not perfect.
*/
uint32_t dw5 =
((brw->max_gs_threads - 1) << max_threads_shift) |
((devinfo->max_gs_threads - 1) << max_threads_shift) |
(brw->gs.prog_data->control_data_header_size_hwords <<
GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
((brw->gs.prog_data->invocations - 1) <<

View File

@@ -64,6 +64,7 @@ const struct brw_tracked_state gen7_tcs_push_constants = {
static void
gen7_upload_hs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
bool active = brw->tess_eval_program;
@@ -77,7 +78,7 @@ gen7_upload_hs_state(struct brw_context *brw)
GEN7_HS_SAMPLER_COUNT) |
SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
GEN7_HS_BINDING_TABLE_ENTRY_COUNT) |
(brw->max_hs_threads - 1));
(devinfo->max_hs_threads - 1));
OUT_BATCH(GEN7_HS_ENABLE |
GEN7_HS_STATISTICS_ENABLE |
SET_FIELD(brw->tcs.prog_data->instances - 1,

View File

@@ -276,14 +276,14 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
* greater than or equal to 192."
*/
unsigned vs_min_entries =
tess_present && brw->gen == 8 ? 192 : brw->urb.min_vs_entries;
tess_present && brw->gen == 8 ? 192 : devinfo->urb.min_vs_entries;
/* Min VS Entries isn't a multiple of 8 on Cherryview/Broxton; round up */
vs_min_entries = ALIGN(vs_min_entries, vs_granularity);
unsigned vs_chunks =
DIV_ROUND_UP(vs_min_entries * vs_entry_size_bytes, chunk_size_bytes);
unsigned vs_wants =
DIV_ROUND_UP(brw->urb.max_vs_entries * vs_entry_size_bytes,
DIV_ROUND_UP(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
chunk_size_bytes) - vs_chunks;
unsigned gs_chunks = 0;
@@ -299,7 +299,7 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
*/
gs_chunks = DIV_ROUND_UP(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
chunk_size_bytes);
gs_wants = DIV_ROUND_UP(brw->urb.max_gs_entries * gs_entry_size_bytes,
gs_wants = DIV_ROUND_UP(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
chunk_size_bytes) - gs_chunks;
}
@@ -320,7 +320,7 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
DIV_ROUND_UP(devinfo->urb.min_ds_entries * ds_entry_size_bytes,
chunk_size_bytes);
ds_wants =
DIV_ROUND_UP(brw->urb.max_ds_entries * ds_entry_size_bytes,
DIV_ROUND_UP(devinfo->urb.max_ds_entries * ds_entry_size_bytes,
chunk_size_bytes) - ds_chunks;
}
@@ -377,10 +377,10 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
/* Since we rounded up when computing *_wants, this may be slightly more
* than the maximum allowed amount, so correct for that.
*/
nr_vs_entries = MIN2(nr_vs_entries, brw->urb.max_vs_entries);
nr_hs_entries = MIN2(nr_hs_entries, brw->urb.max_hs_entries);
nr_ds_entries = MIN2(nr_ds_entries, brw->urb.max_ds_entries);
nr_gs_entries = MIN2(nr_gs_entries, brw->urb.max_gs_entries);
nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
nr_hs_entries = MIN2(nr_hs_entries, devinfo->urb.max_hs_entries);
nr_ds_entries = MIN2(nr_ds_entries, devinfo->urb.max_ds_entries);
nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
/* Ensure that we program a multiple of the granularity. */
nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);

View File

@@ -32,6 +32,7 @@
static void
upload_vs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->vs.base;
uint32_t floating_point_mode = 0;
const int max_threads_shift = brw->is_haswell ?
@@ -66,7 +67,7 @@ upload_vs_state(struct brw_context *brw)
(prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
OUT_BATCH(((devinfo->max_vs_threads - 1) << max_threads_shift) |
GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE);
ADVANCE_BATCH();

View File

@@ -149,6 +149,7 @@ gen7_upload_ps_state(struct brw_context *brw,
bool enable_dual_src_blend, unsigned sample_mask,
unsigned fast_clear_op)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint32_t dw2, dw4, dw5, ksp0, ksp2;
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
@@ -171,7 +172,7 @@ gen7_upload_ps_state(struct brw_context *brw,
if (brw->is_haswell)
dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK);
dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
dw4 |= (devinfo->max_wm_threads - 1) << max_threads_shift;
if (prog_data->base.nr_params > 0)
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;

View File

@@ -29,6 +29,7 @@
static void
gen8_upload_ds_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
const struct brw_stage_state *stage_state = &brw->tes.base;
/* BRW_NEW_TESS_PROGRAMS */
@@ -64,7 +65,7 @@ gen8_upload_ds_state(struct brw_context *brw)
OUT_BATCH(GEN7_DS_ENABLE |
GEN7_DS_STATISTICS_ENABLE |
(brw->max_ds_threads - 1) << HSW_DS_MAX_THREADS_SHIFT |
(devinfo->max_ds_threads - 1) << HSW_DS_MAX_THREADS_SHIFT |
(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ?
GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) |
(tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?

View File

@@ -29,6 +29,7 @@
static void
gen8_upload_gs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
const struct brw_stage_state *stage_state = &brw->gs.base;
/* BRW_NEW_GEOMETRY_PROGRAM */
@@ -97,9 +98,9 @@ gen8_upload_gs_state(struct brw_context *brw)
}
if (brw->gen < 9)
dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
dw7 |= (devinfo->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
else
dw8 |= brw->max_gs_threads - 1;
dw8 |= devinfo->max_gs_threads - 1;
/* DW7 */
OUT_BATCH(dw7);

View File

@@ -29,6 +29,7 @@
static void
gen8_upload_hs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
bool active = brw->tess_eval_program;
@@ -44,7 +45,7 @@ gen8_upload_hs_state(struct brw_context *brw)
GEN7_HS_BINDING_TABLE_ENTRY_COUNT));
OUT_BATCH(GEN7_HS_ENABLE |
GEN7_HS_STATISTICS_ENABLE |
(brw->max_hs_threads - 1) << GEN8_HS_MAX_THREADS_SHIFT |
(devinfo->max_hs_threads - 1) << GEN8_HS_MAX_THREADS_SHIFT |
SET_FIELD(brw->tcs.prog_data->instances - 1,
GEN7_HS_INSTANCE_COUNT));
OUT_BATCH(stage_state->prog_offset);

View File

@@ -32,6 +32,7 @@
static void
upload_vs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
const struct brw_stage_state *stage_state = &brw->vs.base;
uint32_t floating_point_mode = 0;
@@ -71,7 +72,7 @@ upload_vs_state(struct brw_context *brw)
uint32_t simd8_enable = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ?
GEN8_VS_SIMD8_ENABLE : 0;
OUT_BATCH(((brw->max_vs_threads - 1) << HSW_VS_MAX_THREADS_SHIFT) |
OUT_BATCH(((devinfo->max_vs_threads - 1) << HSW_VS_MAX_THREADS_SHIFT) |
GEN6_VS_STATISTICS_ENABLE |
simd8_enable |
GEN6_VS_ENABLE);