2016-09-26 12:10:11 -07:00
|
|
|
/*
|
|
|
|
* Copyright © 2016 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "anv_private.h"
|
|
|
|
|
|
|
|
#include "genxml/gen_macros.h"
|
|
|
|
#include "genxml/genX_pack.h"
|
|
|
|
|
|
|
|
#include "common/gen_l3_config.h"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This file implements some lightweight memcpy/memset operations on the GPU
|
|
|
|
* using a vertex buffer and streamout.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the greatest common divisor of a and b that is a power of two.
|
|
|
|
*/
|
2017-07-06 21:18:03 -07:00
|
|
|
static uint64_t
|
2016-09-26 12:10:11 -07:00
|
|
|
gcd_pow2_u64(uint64_t a, uint64_t b)
|
|
|
|
{
|
|
|
|
assert(a > 0 || b > 0);
|
|
|
|
|
|
|
|
unsigned a_log2 = ffsll(a) - 1;
|
|
|
|
unsigned b_log2 = ffsll(b) - 1;
|
|
|
|
|
|
|
|
/* If either a or b is 0, then a_log2 or b_log2 will be UINT_MAX in which
|
|
|
|
* case, the MIN2() will take the other one. If both are 0 then we will
|
|
|
|
* hit the assert above.
|
|
|
|
*/
|
|
|
|
return 1 << MIN2(a_log2, b_log2);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-05-11 09:37:33 -07:00
|
|
|
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
2018-09-10 16:43:34 -05:00
|
|
|
struct anv_address dst, struct anv_address src,
|
2017-05-11 09:37:33 -07:00
|
|
|
uint32_t size)
|
2016-09-26 12:10:11 -07:00
|
|
|
{
|
|
|
|
if (size == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* The maximum copy block size is 4 32-bit components at a time. */
|
2018-09-10 16:36:10 -05:00
|
|
|
assert(size % 4 == 0);
|
|
|
|
unsigned bs = gcd_pow2_u64(16, size);
|
2016-09-26 12:10:11 -07:00
|
|
|
|
|
|
|
enum isl_format format;
|
|
|
|
switch (bs) {
|
|
|
|
case 4: format = ISL_FORMAT_R32_UINT; break;
|
|
|
|
case 8: format = ISL_FORMAT_R32G32_UINT; break;
|
|
|
|
case 16: format = ISL_FORMAT_R32G32B32A32_UINT; break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid size");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!cmd_buffer->state.current_l3_config) {
|
|
|
|
const struct gen_l3_config *cfg =
|
|
|
|
gen_get_default_l3_config(&cmd_buffer->device->info);
|
|
|
|
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
|
|
|
|
}
|
|
|
|
|
2019-11-25 21:55:51 -06:00
|
|
|
genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer, 32, src, size);
|
2016-09-26 12:10:11 -07:00
|
|
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
|
|
|
|
|
|
|
genX(flush_pipeline_select_3d)(cmd_buffer);
|
|
|
|
|
|
|
|
uint32_t *dw;
|
|
|
|
dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(3DSTATE_VERTEX_BUFFERS));
|
|
|
|
GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, dw + 1,
|
|
|
|
&(struct GENX(VERTEX_BUFFER_STATE)) {
|
|
|
|
.VertexBufferIndex = 32, /* Reserved for this */
|
|
|
|
.AddressModifyEnable = true,
|
2018-09-10 16:43:34 -05:00
|
|
|
.BufferStartingAddress = src,
|
2016-09-26 12:10:11 -07:00
|
|
|
.BufferPitch = bs,
|
genxml: Consistently use a numeric "MOCS" field
When we first started using genxml, we decided to represent MOCS as an
actual structure, and pack values. However, in many places, it was more
convenient to use a numeric value rather than treating it as a struct,
so we added secondary setters in a bunch of places as well.
We were not entirely consistent, either. Some places only had one.
Gen6 had both kinds of setters for STATE_BASE_ADDRESS, but newer gens
only had the struct-based setters. The names were sometimes "Constant
Buffer Object Control State" instead of "Memory", making it harder to
find. Many had prefixes like "Vertex Buffer MOCS"...in a vertex buffer
packet...which is a bit redundant.
On modern hardware, MOCS is simply an index into a table, but we were
still carrying around the structure with an "Index to MOCS Table" field,
in addition to the direct numeric setters. This is clunky - we really
just want a number on new hardware.
This patch eliminates the struct-based setters, and makes the numeric
setters be consistently called "MOCS". We leave the struct definition
around on Gen7-8 for reference purposes, but it is unused.
v2: Drop bonus "Depth Buffer MOCS" fields on Gen7.5 and Gen9
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
2018-12-11 00:34:11 -08:00
|
|
|
.MOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
|
2016-09-26 12:10:11 -07:00
|
|
|
#if (GEN_GEN >= 8)
|
|
|
|
.BufferSize = size,
|
|
|
|
#else
|
2018-09-10 16:43:34 -05:00
|
|
|
.EndAddress = anv_address_add(src, size - 1),
|
2016-09-26 12:10:11 -07:00
|
|
|
#endif
|
|
|
|
});
|
|
|
|
|
|
|
|
dw = anv_batch_emitn(&cmd_buffer->batch, 3, GENX(3DSTATE_VERTEX_ELEMENTS));
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(&cmd_buffer->batch, dw + 1,
|
|
|
|
&(struct GENX(VERTEX_ELEMENT_STATE)) {
|
|
|
|
.VertexBufferIndex = 32,
|
|
|
|
.Valid = true,
|
2018-02-13 18:13:51 -08:00
|
|
|
.SourceElementFormat = format,
|
2016-09-26 12:10:11 -07:00
|
|
|
.SourceElementOffset = 0,
|
|
|
|
.Component0Control = (bs >= 4) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
|
|
|
.Component1Control = (bs >= 8) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
|
|
|
.Component2Control = (bs >= 12) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
|
|
|
.Component3Control = (bs >= 16) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
|
|
|
|
});
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), sgvs);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Disable all shader stages */
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), vs);
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), hs);
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), te);
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), DS);
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), gs);
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS), gs);
|
|
|
|
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SBE), sbe) {
|
|
|
|
sbe.VertexURBEntryReadOffset = 1;
|
|
|
|
sbe.NumberofSFOutputAttributes = 1;
|
|
|
|
sbe.VertexURBEntryReadLength = 1;
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
sbe.ForceVertexURBEntryReadLength = true;
|
|
|
|
sbe.ForceVertexURBEntryReadOffset = true;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if GEN_GEN >= 9
|
|
|
|
for (unsigned i = 0; i < 32; i++)
|
|
|
|
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Emit URB setup. We tell it that the VS is active because we want it to
|
|
|
|
* allocate space for the VS. Even though one isn't run, we need VUEs to
|
|
|
|
* store the data that VF is going to pass to SOL.
|
|
|
|
*/
|
2016-11-15 11:43:07 -08:00
|
|
|
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
|
|
|
|
|
2016-09-26 12:10:11 -07:00
|
|
|
genX(emit_urb_setup)(cmd_buffer->device, &cmd_buffer->batch,
|
2016-11-15 11:43:07 -08:00
|
|
|
cmd_buffer->state.current_l3_config,
|
2020-01-17 14:14:03 -06:00
|
|
|
VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
|
2016-09-26 12:10:11 -07:00
|
|
|
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
|
2019-10-23 23:47:03 +01:00
|
|
|
#if GEN_GEN < 12
|
2016-09-26 12:10:11 -07:00
|
|
|
sob.SOBufferIndex = 0;
|
2019-10-23 23:47:03 +01:00
|
|
|
#else
|
|
|
|
sob._3DCommandOpcode = 0;
|
|
|
|
sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD;
|
|
|
|
#endif
|
genxml: Consistently use a numeric "MOCS" field
When we first started using genxml, we decided to represent MOCS as an
actual structure, and pack values. However, in many places, it was more
convenient to use a numeric value rather than treating it as a struct,
so we added secondary setters in a bunch of places as well.
We were not entirely consistent, either. Some places only had one.
Gen6 had both kinds of setters for STATE_BASE_ADDRESS, but newer gens
only had the struct-based setters. The names were sometimes "Constant
Buffer Object Control State" instead of "Memory", making it harder to
find. Many had prefixes like "Vertex Buffer MOCS"...in a vertex buffer
packet...which is a bit redundant.
On modern hardware, MOCS is simply an index into a table, but we were
still carrying around the structure with an "Index to MOCS Table" field,
in addition to the direct numeric setters. This is clunky - we really
just want a number on new hardware.
This patch eliminates the struct-based setters, and makes the numeric
setters be consistently called "MOCS". We leave the struct definition
around on Gen7-8 for reference purposes, but it is unused.
v2: Drop bonus "Depth Buffer MOCS" fields on Gen7.5 and Gen9
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
2018-12-11 00:34:11 -08:00
|
|
|
sob.MOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
|
2018-09-10 16:43:34 -05:00
|
|
|
sob.SurfaceBaseAddress = dst;
|
2016-09-26 12:10:11 -07:00
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
sob.SOBufferEnable = true;
|
2018-09-10 16:37:17 -05:00
|
|
|
sob.SurfaceSize = size / 4 - 1;
|
2016-09-26 12:10:11 -07:00
|
|
|
#else
|
|
|
|
sob.SurfacePitch = bs;
|
2018-09-10 16:43:34 -05:00
|
|
|
sob.SurfaceEndAddress = anv_address_add(dst, size);
|
2016-09-26 12:10:11 -07:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
/* As SOL writes out data, it updates the SO_WRITE_OFFSET registers with
|
|
|
|
* the end position of the stream. We need to reset this value to 0 at
|
|
|
|
* the beginning of the run or else SOL will start at the offset from
|
|
|
|
* the previous draw.
|
|
|
|
*/
|
|
|
|
sob.StreamOffsetWriteEnable = true;
|
|
|
|
sob.StreamOffset = 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#if GEN_GEN <= 7
|
|
|
|
/* The hardware can do this for us on BDW+ (see above) */
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), load) {
|
|
|
|
load.RegisterOffset = GENX(SO_WRITE_OFFSET0_num);
|
|
|
|
load.DataDWord = 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(3DSTATE_SO_DECL_LIST),
|
|
|
|
.StreamtoBufferSelects0 = (1 << 0),
|
|
|
|
.NumEntries0 = 1);
|
|
|
|
GENX(SO_DECL_ENTRY_pack)(&cmd_buffer->batch, dw + 3,
|
|
|
|
&(struct GENX(SO_DECL_ENTRY)) {
|
|
|
|
.Stream0Decl = {
|
|
|
|
.OutputBufferSlot = 0,
|
|
|
|
.RegisterIndex = 0,
|
|
|
|
.ComponentMask = (1 << (bs / 4)) - 1,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so) {
|
|
|
|
so.SOFunctionEnable = true;
|
|
|
|
so.RenderingDisable = true;
|
|
|
|
so.Stream0VertexReadOffset = 0;
|
|
|
|
so.Stream0VertexReadLength = DIV_ROUND_UP(32, 64);
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
so.Buffer0SurfacePitch = bs;
|
|
|
|
#else
|
|
|
|
so.SOBufferEnable0 = true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
|
|
|
topo.PrimitiveTopologyType = _3DPRIM_POINTLIST;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2017-03-16 14:12:03 -07:00
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), vf) {
|
|
|
|
vf.StatisticsEnable = false;
|
|
|
|
}
|
|
|
|
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
#if GEN_GEN >= 12
|
|
|
|
/* Disable Primitive Replication. */
|
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
|
|
|
#endif
|
|
|
|
|
2016-09-26 12:10:11 -07:00
|
|
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
|
|
|
prim.VertexAccessType = SEQUENTIAL;
|
|
|
|
prim.PrimitiveTopologyType = _3DPRIM_POINTLIST;
|
|
|
|
prim.VertexCountPerInstance = size / bs;
|
|
|
|
prim.StartVertexLocation = 0;
|
|
|
|
prim.InstanceCount = 1;
|
|
|
|
prim.StartInstanceLocation = 0;
|
|
|
|
prim.BaseVertexLocation = 0;
|
|
|
|
}
|
|
|
|
|
2019-11-25 21:55:51 -06:00
|
|
|
genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(cmd_buffer, SEQUENTIAL,
|
|
|
|
1ull << 32);
|
|
|
|
|
2017-12-15 16:38:10 -08:00
|
|
|
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
|
2016-09-26 12:10:11 -07:00
|
|
|
}
|