i965/tiled_memcpy: inline movntdqa loads in tiled_to_linear

The reference for MOVNTDQA says:

    For WC memory type, the nontemporal hint may be implemented by
    loading a temporary internal buffer with the equivalent of an
    aligned cache line without filling this data to the cache.
    [...] Subsequent MOVNTDQA reads to unread portions of the WC
    cache line will receive data from the temporary internal
    buffer if data is available.

This hidden cache line sized temporary buffer can improve the
read performance from wc maps.

v2: Add mfence at start of tiled_to_linear for streaming loads (Chris)
v3: add Android build support (Tapani)
v4: squash 'fix i915: Fix streaming loads for intel_tiled_memcpy'
    separate sse41 to own static library (Tapani)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
Reviewed-by: Matt Turner <mattst88@gmail.com> (v2)
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
This commit is contained in:
Scott D Phillips
2018-09-24 08:33:06 +03:00
committed by Tapani Pälli
parent 91d3a5d1a8
commit 11b1afdc92
9 changed files with 426 additions and 90 deletions

View File

@@ -51,6 +51,42 @@ I965_PERGEN_LIBS := \
libmesa_i965_gen10 \
libmesa_i965_gen11
# ---------------------------------------
# Build libmesa_intel_tiled_memcpy
# ---------------------------------------
include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_intel_tiled_memcpy
LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES)
LOCAL_SRC_FILES := $(intel_tiled_memcpy_FILES)
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
# ---------------------------------------
# Build libmesa_intel_tiled_memcpy_sse41
# ---------------------------------------
include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_intel_tiled_memcpy_sse41
LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES)
LOCAL_SRC_FILES := $(intel_tiled_memcpy_sse41_FILES)
ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
LOCAL_CFLAGS += \
-DUSE_SSE41 -msse4.1 -mstackrealign
endif
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
# ---------------------------------------
# Build libmesa_i965_gen4
# ---------------------------------------
@@ -289,6 +325,8 @@ LOCAL_SRC_FILES := \
LOCAL_WHOLE_STATIC_LIBRARIES := \
$(MESA_DRI_WHOLE_STATIC_LIBRARIES) \
$(I965_PERGEN_LIBS) \
libmesa_intel_tiled_memcpy \
libmesa_intel_tiled_memcpy_sse41 \
libmesa_intel_dev \
libmesa_intel_common \
libmesa_isl \

View File

@@ -92,8 +92,20 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110
noinst_LTLIBRARIES = \
libi965_dri.la \
libintel_tiled_memcpy.la \
libintel_tiled_memcpy_sse41.la \
$(I965_PERGEN_LIBS)
libintel_tiled_memcpy_la_SOURCES = \
$(intel_tiled_memcpy_FILES)
libintel_tiled_memcpy_la_CFLAGS = \
$(AM_CFLAGS)
libintel_tiled_memcpy_sse41_la_SOURCES = \
$(intel_tiled_memcpy_sse41_FILES)
libintel_tiled_memcpy_sse41_la_CFLAGS = \
$(AM_CFLAGS) $(SSE41_CFLAGS)
libi965_dri_la_SOURCES = \
$(i965_FILES) \
$(i965_oa_GENERATED_FILES)
@@ -104,6 +116,8 @@ libi965_dri_la_LIBADD = \
$(top_builddir)/src/intel/compiler/libintel_compiler.la \
$(top_builddir)/src/intel/blorp/libblorp.la \
$(I965_PERGEN_LIBS) \
libintel_tiled_memcpy.la \
libintel_tiled_memcpy_sse41.la \
$(LIBDRM_LIBS)
BUILT_SOURCES = $(i965_oa_GENERATED_FILES)

View File

@@ -110,11 +110,17 @@ i965_FILES = \
intel_tex_image.c \
intel_tex_obj.h \
intel_tex_validate.c \
intel_tiled_memcpy.c \
intel_tiled_memcpy.h \
intel_upload.c \
libdrm_macros.h
intel_tiled_memcpy_FILES = \
intel_tiled_memcpy_normal.c \
intel_tiled_memcpy.h
intel_tiled_memcpy_sse41_FILES = \
intel_tiled_memcpy_sse41.c \
intel_tiled_memcpy_sse41.h
i965_gen4_FILES = \
genX_blorp_exec.c \
genX_state_upload.c

View File

@@ -566,6 +566,31 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
}
}
#if defined(INLINE_SSE41)
static ALWAYS_INLINE void *
_memcpy_streaming_load(void *dest, const void *src, size_t count)
{
if (count == 16) {
__m128i val = _mm_stream_load_si128((__m128i *)src);
_mm_storeu_si128((__m128i *)dest, val);
return dest;
} else if (count == 64) {
__m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
__m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
__m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
__m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
_mm_storeu_si128(((__m128i *)dest) + 0, val0);
_mm_storeu_si128(((__m128i *)dest) + 1, val1);
_mm_storeu_si128(((__m128i *)dest) + 2, val2);
_mm_storeu_si128(((__m128i *)dest) + 3, val3);
return dest;
} else {
assert(count < 64); /* and (count < 16) for ytiled */
return memcpy(dest, src, count);
}
}
#endif
static mem_copy_fn
choose_copy_function(mem_copy_fn_type copy_type)
{
@@ -574,6 +599,10 @@ choose_copy_function(mem_copy_fn_type copy_type)
return memcpy;
case INTEL_COPY_RGBA8:
return rgba8_copy;
#if defined(INLINE_SSE41)
case INTEL_COPY_STREAMING_LOAD:
return _memcpy_streaming_load;
#endif
default:
assert(!"unreachable");
}
@@ -696,6 +725,12 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(INLINE_SSE41)
else if (mem_copy == _memcpy_streaming_load)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
} else {
@@ -706,6 +741,12 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(INLINE_SSE41)
else if (mem_copy == _memcpy_streaming_load)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
}
@@ -740,6 +781,12 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(INLINE_SSE41)
else if (copy_type == INTEL_COPY_STREAMING_LOAD)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
} else {
@@ -750,6 +797,12 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(INLINE_SSE41)
else if (copy_type == INTEL_COPY_STREAMING_LOAD)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
}
@@ -768,14 +821,14 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* 'dst' is the address of (0, 0) in the destination tiled texture.
* 'src' is the address of (xt1, yt1) in the source linear texture.
*/
void
linear_to_tiled(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
uint32_t dst_pitch, int32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
static void
intel_linear_to_tiled(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
uint32_t dst_pitch, int32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
{
tile_copy_fn tile_copy;
uint32_t xt0, xt3;
@@ -859,14 +912,14 @@ linear_to_tiled(uint32_t xt1, uint32_t xt2,
* 'dst' is the address of (xt1, yt1) in the destination linear texture.
* 'src' is the address of (0, 0) in the source tiled texture.
*/
void
tiled_to_linear(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
int32_t dst_pitch, uint32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
static void
intel_tiled_to_linear(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
int32_t dst_pitch, uint32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
{
tile_copy_fn tile_copy;
uint32_t xt0, xt3;
@@ -889,6 +942,15 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
unreachable("unsupported tiling");
}
#if defined(INLINE_SSE41)
if (copy_type == INTEL_COPY_STREAMING_LOAD) {
/* The hidden cacheline sized register used by movntdqa can apparently
* give you stale data, so do an mfence to invalidate it.
*/
_mm_mfence();
}
#endif
/* Round out to tile boundaries. */
xt0 = ALIGN_DOWN(xt1, tw);
xt3 = ALIGN_UP (xt2, tw);
@@ -938,69 +1000,3 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
}
}
}
/**
* Determine which copy function to use for the given format combination
*
* The only two possible copy functions which are ever returned are a
* direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and
* BGRA -> RGBA are exactly the same operation (and memcpy is obviously
* symmetric), it doesn't matter whether the copy is from the tiled image
* to the untiled or vice versa. The copy function required is the same in
* either case so this function can be used.
*
* \param[in] tiledFormat The format of the tiled image
* \param[in] format The GL format of the client data
* \param[in] type The GL type of the client data
* \param[out] mem_copy Will be set to one of either the standard
* library's memcpy or a different copy function
* that performs an RGBA to BGRA conversion
* \param[out] cpp Number of bytes per channel
*
* \return true if the format and type combination are valid
*/
bool
intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
mem_copy_fn_type *copy_type, uint32_t *cpp)
{
*copy_type = INTEL_COPY_INVALID;
if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
!(format == GL_RGBA || format == GL_BGRA))
return false; /* Invalid type/format combination */
if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
(tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
*cpp = 1;
*copy_type = INTEL_COPY_MEMCPY;
} else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
(tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
(tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
(tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
*cpp = 4;
if (format == GL_BGRA) {
*copy_type = INTEL_COPY_MEMCPY;
} else if (format == GL_RGBA) {
*copy_type = INTEL_COPY_RGBA8;
}
} else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
(tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
(tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
(tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
*cpp = 4;
if (format == GL_BGRA) {
/* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
* use the same function.
*/
*copy_type = INTEL_COPY_RGBA8;
} else if (format == GL_RGBA) {
*copy_type = INTEL_COPY_MEMCPY;
}
}
if (*copy_type == INTEL_COPY_INVALID)
return false;
return true;
}

View File

@@ -38,11 +38,21 @@
typedef enum {
INTEL_COPY_MEMCPY = 0,
INTEL_COPY_RGBA8,
INTEL_COPY_STREAMING_LOAD,
INTEL_COPY_INVALID,
} mem_copy_fn_type;
typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
typedef void (*tiled_to_linear_fn)
(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
int32_t dst_pitch, uint32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type);
void
linear_to_tiled(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
@@ -61,8 +71,69 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
enum isl_tiling tiling,
mem_copy_fn_type copy_type);
bool intel_get_memcpy_type(mesa_format tiledFormat, GLenum format,
GLenum type, mem_copy_fn_type *copy_type,
uint32_t *cpp);
/**
* Determine which copy function to use for the given format combination
*
* The only two possible copy functions which are ever returned are a
* direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and
* BGRA -> RGBA are exactly the same operation (and memcpy is obviously
* symmetric), it doesn't matter whether the copy is from the tiled image
* to the untiled or vice versa. The copy function required is the same in
* either case so this function can be used.
*
* \param[in] tiledFormat The format of the tiled image
* \param[in] format The GL format of the client data
* \param[in] type The GL type of the client data
* \param[out] mem_copy Will be set to one of either the standard
* library's memcpy or a different copy function
* that performs an RGBA to BGRA conversion
* \param[out] cpp Number of bytes per channel
*
* \return true if the format and type combination are valid
*/
static MAYBE_UNUSED bool
intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
mem_copy_fn_type *copy_type, uint32_t *cpp)
{
*copy_type = INTEL_COPY_INVALID;
if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
!(format == GL_RGBA || format == GL_BGRA))
return false; /* Invalid type/format combination */
if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
(tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
*cpp = 1;
*copy_type = INTEL_COPY_MEMCPY;
} else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
(tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
(tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
(tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
*cpp = 4;
if (format == GL_BGRA) {
*copy_type = INTEL_COPY_MEMCPY;
} else if (format == GL_RGBA) {
*copy_type = INTEL_COPY_RGBA8;
}
} else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
(tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
(tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
(tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
*cpp = 4;
if (format == GL_BGRA) {
/* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
* use the same function.
*/
*copy_type = INTEL_COPY_RGBA8;
} else if (format == GL_RGBA) {
*copy_type = INTEL_COPY_MEMCPY;
}
}
if (*copy_type == INTEL_COPY_INVALID)
return false;
return true;
}
#endif /* INTEL_TILED_MEMCPY */

View File

@@ -0,0 +1,59 @@
/*
* Mesa 3-D graphics library
*
* Copyright 2012 Intel Corporation
* Copyright 2013 Google
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chad Versace <chad.versace@linux.intel.com>
* Frank Henigman <fjhenigman@google.com>
*/
#include "intel_tiled_memcpy.c"
void
linear_to_tiled(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
uint32_t dst_pitch, int32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
{
intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
has_swizzling, tiling, copy_type);
}
void
tiled_to_linear(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
int32_t dst_pitch, uint32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
{
intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
has_swizzling, tiling, copy_type);
}

View File

@@ -0,0 +1,61 @@
/*
* Mesa 3-D graphics library
*
* Copyright 2012 Intel Corporation
* Copyright 2013 Google
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chad Versace <chad.versace@linux.intel.com>
* Frank Henigman <fjhenigman@google.com>
*/
#define INLINE_SSE41
#include "intel_tiled_memcpy_sse41.h"
#include "intel_tiled_memcpy.c"
void
linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
uint32_t dst_pitch, int32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
{
intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
has_swizzling, tiling, copy_type);
}
void
tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
int32_t dst_pitch, uint32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type)
{
intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
has_swizzling, tiling, copy_type);
}

View File

@@ -0,0 +1,59 @@
/*
* Mesa 3-D graphics library
*
* Copyright 2012 Intel Corporation
* Copyright 2013 Google
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chad Versace <chad.versace@linux.intel.com>
* Frank Henigman <fjhenigman@google.com>
*/
#ifndef INTEL_TILED_MEMCPY_SSE41_H
#define INTEL_TILED_MEMCPY_SSE41_H
#include <stdint.h>
#include "main/mtypes.h"
#include "isl/isl.h"
#include "intel_tiled_memcpy.h"
void
linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
uint32_t dst_pitch, int32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type);
void
tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
uint32_t yt1, uint32_t yt2,
char *dst, const char *src,
int32_t dst_pitch, uint32_t src_pitch,
bool has_swizzling,
enum isl_tiling tiling,
mem_copy_fn_type copy_type);
#endif /* INTEL_TILED_MEMCPY_SSE41_H */

View File

@@ -129,12 +129,20 @@ files_i965 = files(
'intel_tex_image.c',
'intel_tex_obj.h',
'intel_tex_validate.c',
'intel_tiled_memcpy.c',
'intel_tiled_memcpy.h',
'intel_upload.c',
'libdrm_macros.h',
)
files_intel_tiled_memcpy = files(
'intel_tiled_memcpy_normal.c',
'intel_tiled_memcpy.h',
)
files_intel_tiled_memcpy_sse41 = files(
'intel_tiled_memcpy_sse41.c',
'intel_tiled_memcpy_sse41.h',
)
i965_gen_libs = []
foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
i965_gen_libs += static_library(
@@ -176,6 +184,30 @@ i965_oa_sources = custom_target(
],
)
intel_tiled_memcpy = static_library(
'intel_tiled_memcpy',
[files_intel_tiled_memcpy],
include_directories : [
inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
],
c_args : [c_vis_args, no_override_init_args, '-msse2'],
)
if with_sse41
intel_tiled_memcpy_sse41 = static_library(
'intel_tiled_memcpy_sse41',
[files_intel_tiled_memcpy_sse41],
include_directories : [
inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
],
link_args : [ '-Wl,--exclude-libs=ALL' ],
c_args : [c_vis_args, no_override_init_args, '-Wl,--exclude-libs=ALL', '-msse2', sse41_args],
)
else
intel_tiled_memcpy_sse41 = []
endif
libi965 = static_library(
'i965',
[files_i965, i965_oa_sources, ir_expression_operation_h,
@@ -187,7 +219,7 @@ libi965 = static_library(
cpp_args : [cpp_vis_args, c_sse2_args],
link_with : [
i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
libblorp,
libblorp, intel_tiled_memcpy, intel_tiled_memcpy_sse41
],
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
)