Import Thomas Helstrom's SSE memcpy code from the via X.org driver.
Add a TextureMemCpy callback, called from texstore.c when copying texture data via the memcpy_texture() path. Enable this code in the via driver - 100% speedup in texdown.c results.
This commit is contained in:
@@ -107,6 +107,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
|
||||
driver->DeleteTexture = _mesa_delete_texture_object;
|
||||
driver->NewTextureImage = _mesa_new_texture_image;
|
||||
driver->FreeTexImageData = _mesa_free_texture_image_data;
|
||||
driver->TextureMemCpy = _mesa_memcpy;
|
||||
driver->IsTextureResident = NULL;
|
||||
driver->PrioritizeTexture = NULL;
|
||||
driver->ActiveTexture = NULL;
|
||||
|
@@ -12,6 +12,7 @@ DRIVER_SOURCES = \
|
||||
via_fb.c \
|
||||
via_tex.c \
|
||||
via_ioctl.c \
|
||||
via_memcpy.c \
|
||||
via_render.c \
|
||||
via_screen.c \
|
||||
via_span.c \
|
||||
|
138
src/mesa/drivers/dri/unichrome/via_memcpy.c
Normal file
138
src/mesa/drivers/dri/unichrome/via_memcpy.c
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (C) 2004 Thomas Hellstrom, All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CODE SUPPLIER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Thomas' orginal gutted for mesa by Keith Whitwell
|
||||
*/
|
||||
|
||||
#include "via_tex.h"
|
||||
|
||||
|
||||
#define SSE_PREFETCH " prefetchnta "
|
||||
#define FENCE __asm__ __volatile__ ("sfence":::"memory");
|
||||
|
||||
|
||||
#define PREFETCH1(arch_prefetch,from) \
|
||||
__asm__ __volatile__ ( \
|
||||
"1: " arch_prefetch "(%0)\n" \
|
||||
arch_prefetch "32(%0)\n" \
|
||||
arch_prefetch "64(%0)\n" \
|
||||
arch_prefetch "96(%0)\n" \
|
||||
arch_prefetch "128(%0)\n" \
|
||||
arch_prefetch "160(%0)\n" \
|
||||
arch_prefetch "192(%0)\n" \
|
||||
arch_prefetch "256(%0)\n" \
|
||||
arch_prefetch "288(%0)\n" \
|
||||
"2:\n" \
|
||||
: : "r" (from) );
|
||||
|
||||
|
||||
|
||||
#define small_memcpy(to,from,n) \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"movl %2,%%ecx\n\t" \
|
||||
"sarl $2,%%ecx\n\t" \
|
||||
"rep ; movsl\n\t" \
|
||||
"testb $2,%b2\n\t" \
|
||||
"je 1f\n\t" \
|
||||
"movsw\n" \
|
||||
"1:\ttestb $1,%b2\n\t" \
|
||||
"je 2f\n\t" \
|
||||
"movsb\n" \
|
||||
"2:" \
|
||||
:"=&D" (to), "=&S" (from) \
|
||||
:"q" (n),"0" ((long) to),"1" ((long) from) \
|
||||
: "%ecx","memory"); \
|
||||
}
|
||||
|
||||
|
||||
#define SSE_CPY(prefetch,from,to,dummy,lcnt) \
|
||||
if ((unsigned long) from & 15) { \
|
||||
__asm__ __volatile__ ( \
|
||||
"1:\n" \
|
||||
prefetch "320(%1)\n" \
|
||||
" movups (%1), %%xmm0\n" \
|
||||
" movups 16(%1), %%xmm1\n" \
|
||||
" movntps %%xmm0, (%0)\n" \
|
||||
" movntps %%xmm1, 16(%0)\n" \
|
||||
prefetch "352(%1)\n" \
|
||||
" movups 32(%1), %%xmm2\n" \
|
||||
" movups 48(%1), %%xmm3\n" \
|
||||
" movntps %%xmm2, 32(%0)\n" \
|
||||
" movntps %%xmm3, 48(%0)\n" \
|
||||
" addl $64,%0\n" \
|
||||
" addl $64,%1\n" \
|
||||
" decl %2\n" \
|
||||
" jne 1b\n" \
|
||||
:"=&D"(to), "=&S"(from), "=&r"(dummy) \
|
||||
:"0" (to), "1" (from), "2" (lcnt): "memory"); \
|
||||
} else { \
|
||||
__asm__ __volatile__ ( \
|
||||
"2:\n" \
|
||||
prefetch "320(%1)\n" \
|
||||
" movaps (%1), %%xmm0\n" \
|
||||
" movaps 16(%1), %%xmm1\n" \
|
||||
" movntps %%xmm0, (%0)\n" \
|
||||
" movntps %%xmm1, 16(%0)\n" \
|
||||
prefetch "352(%1)\n" \
|
||||
" movaps 32(%1), %%xmm2\n" \
|
||||
" movaps 48(%1), %%xmm3\n" \
|
||||
" movntps %%xmm2, 32(%0)\n" \
|
||||
" movntps %%xmm3, 48(%0)\n" \
|
||||
" addl $64,%0\n" \
|
||||
" addl $64,%1\n" \
|
||||
" decl %2\n" \
|
||||
" jne 2b\n" \
|
||||
:"=&D"(to), "=&S"(from), "=&r"(dummy) \
|
||||
:"0" (to), "1" (from), "2" (lcnt): "memory"); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*/
|
||||
void via_sse_memcpy(void *to,
|
||||
const void *from,
|
||||
size_t sz)
|
||||
|
||||
{
|
||||
int dummy;
|
||||
int lcnt = sz >> 6;
|
||||
int rest = sz & 63;
|
||||
|
||||
PREFETCH1(SSE_PREFETCH,from);
|
||||
|
||||
if (lcnt > 5) {
|
||||
lcnt -= 5;
|
||||
SSE_CPY(SSE_PREFETCH,from,to,dummy,lcnt);
|
||||
lcnt = 5;
|
||||
}
|
||||
if (lcnt) {
|
||||
SSE_CPY("#",from,to,dummy,lcnt);
|
||||
}
|
||||
if (rest) small_memcpy(to, from, rest);
|
||||
FENCE;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -948,6 +948,11 @@ void viaInitTextureFuncs(struct dd_function_table * functions)
|
||||
functions->DeleteTexture = _mesa_delete_texture_object;
|
||||
functions->FreeTexImageData = viaFreeTextureImageData;
|
||||
|
||||
if (getenv("VIA_NO_SSE"))
|
||||
functions->TextureMemCpy = _mesa_memcpy;
|
||||
else
|
||||
functions->TextureMemCpy = via_sse_memcpy;
|
||||
|
||||
functions->UpdateTexturePalette = 0;
|
||||
functions->IsTextureResident = viaIsTextureResident;
|
||||
}
|
||||
|
@@ -34,4 +34,6 @@ GLboolean viaUpdateTextureState(GLcontext *ctx);
|
||||
void viaInitTextureFuncs(struct dd_function_table * functions);
|
||||
GLboolean viaSwapOutWork( struct via_context *vmesa );
|
||||
|
||||
void via_sse_memcpy( void *to, const void *from, size_t sz );
|
||||
|
||||
#endif
|
||||
|
@@ -514,6 +514,19 @@ struct dd_function_table {
|
||||
*/
|
||||
void (*FreeTexImageData)( GLcontext *ctx, struct gl_texture_image *tImage );
|
||||
|
||||
/**
|
||||
* Note: no context argument. This function doesn't initially look
|
||||
* like it belongs here, except that the driver is the only entity
|
||||
* that knows for sure how the texture memory is allocated - via
|
||||
* the above callbacks. There is then an argument that the driver
|
||||
* knows what memcpy paths might be fast. Typically this is invoked with
|
||||
*
|
||||
* to -- a pointer into texture memory allocated by NewTextureImage() above.
|
||||
* from -- a pointer into client memory or a mesa temporary.
|
||||
* sz -- nr bytes to copy.
|
||||
*/
|
||||
void (*TextureMemCpy)( void *to, const void *from, size_t sz );
|
||||
|
||||
/**
|
||||
* Called by glAreTextureResident().
|
||||
*/
|
||||
|
@@ -671,7 +671,8 @@ _mesa_swizzle_ubyte_image(GLcontext *ctx,
|
||||
* 1D, 2D and 3D images supported.
|
||||
*/
|
||||
static void
|
||||
memcpy_texture(GLuint dimensions,
|
||||
memcpy_texture(GLcontext *ctx,
|
||||
GLuint dimensions,
|
||||
const struct gl_texture_format *dstFormat,
|
||||
GLvoid *dstAddr,
|
||||
GLint dstXoffset, GLint dstYoffset, GLint dstZoffset,
|
||||
@@ -701,7 +702,7 @@ memcpy_texture(GLuint dimensions,
|
||||
dstImageStride == bytesPerImage) ||
|
||||
(srcDepth == 1))) {
|
||||
/* one big memcpy */
|
||||
_mesa_memcpy(dstImage, srcImage, bytesPerTexture);
|
||||
ctx->Driver.TextureMemCpy(dstImage, srcImage, bytesPerTexture);
|
||||
}
|
||||
else {
|
||||
GLint img, row;
|
||||
@@ -709,7 +710,7 @@ memcpy_texture(GLuint dimensions,
|
||||
const GLubyte *srcRow = srcImage;
|
||||
GLubyte *dstRow = dstImage;
|
||||
for (row = 0; row < srcHeight; row++) {
|
||||
_mesa_memcpy(dstRow, srcRow, bytesPerRow);
|
||||
ctx->Driver.TextureMemCpy(dstRow, srcRow, bytesPerRow);
|
||||
dstRow += dstRowStride;
|
||||
srcRow += srcRowStride;
|
||||
}
|
||||
@@ -776,7 +777,7 @@ _mesa_texstore_rgba(GLcontext *ctx, GLuint dims,
|
||||
baseInternalFormat == srcFormat &&
|
||||
srcType == CHAN_TYPE) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -862,7 +863,7 @@ _mesa_texstore_depth_component_float32(STORE_PARAMS)
|
||||
srcFormat == GL_DEPTH_COMPONENT &&
|
||||
srcType == GL_FLOAT) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -907,7 +908,7 @@ _mesa_texstore_depth_component16(STORE_PARAMS)
|
||||
srcFormat == GL_DEPTH_COMPONENT &&
|
||||
srcType == GL_UNSIGNED_SHORT) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -958,7 +959,7 @@ _mesa_texstore_rgb565(STORE_PARAMS)
|
||||
srcFormat == GL_RGB &&
|
||||
srcType == GL_UNSIGNED_SHORT_5_6_5) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1067,7 +1068,7 @@ _mesa_texstore_rgba8888(STORE_PARAMS)
|
||||
((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
|
||||
(srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV))) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1173,7 +1174,7 @@ _mesa_texstore_argb8888(STORE_PARAMS)
|
||||
((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
|
||||
srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
|
||||
/* simple memcpy path (little endian) */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1187,7 +1188,7 @@ _mesa_texstore_argb8888(STORE_PARAMS)
|
||||
((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
|
||||
srcType == GL_UNSIGNED_INT_8_8_8_8)) {
|
||||
/* simple memcpy path (big endian) */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1348,7 +1349,7 @@ _mesa_texstore_rgb888(STORE_PARAMS)
|
||||
srcType == GL_UNSIGNED_BYTE &&
|
||||
littleEndian) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1453,7 +1454,7 @@ _mesa_texstore_bgr888(STORE_PARAMS)
|
||||
srcType == GL_UNSIGNED_BYTE &&
|
||||
littleEndian) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1537,7 +1538,7 @@ _mesa_texstore_argb4444(STORE_PARAMS)
|
||||
srcFormat == GL_BGRA &&
|
||||
srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1607,7 +1608,7 @@ _mesa_texstore_argb1555(STORE_PARAMS)
|
||||
srcFormat == GL_BGRA &&
|
||||
srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1680,7 +1681,7 @@ _mesa_texstore_al88(STORE_PARAMS)
|
||||
srcType == GL_UNSIGNED_BYTE &&
|
||||
littleEndian) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1744,7 +1745,7 @@ _mesa_texstore_rgb332(STORE_PARAMS)
|
||||
baseInternalFormat == GL_RGB &&
|
||||
srcFormat == GL_RGB && srcType == GL_UNSIGNED_BYTE_3_3_2) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1802,7 +1803,7 @@ _mesa_texstore_a8(STORE_PARAMS)
|
||||
baseInternalFormat == srcFormat &&
|
||||
srcType == GL_UNSIGNED_BYTE) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1856,7 +1857,7 @@ _mesa_texstore_ci8(STORE_PARAMS)
|
||||
srcFormat == GL_COLOR_INDEX &&
|
||||
srcType == GL_UNSIGNED_BYTE) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1906,7 +1907,7 @@ _mesa_texstore_ycbcr(STORE_PARAMS)
|
||||
ASSERT(baseInternalFormat == GL_YCBCR_MESA);
|
||||
|
||||
/* always just memcpy since no pixel transfer ops apply */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -1971,7 +1972,7 @@ _mesa_texstore_rgba_float32(STORE_PARAMS)
|
||||
baseInternalFormat == srcFormat &&
|
||||
srcType == GL_FLOAT) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
@@ -2039,7 +2040,7 @@ _mesa_texstore_rgba_float16(STORE_PARAMS)
|
||||
baseInternalFormat == srcFormat &&
|
||||
srcType == GL_HALF_FLOAT_ARB) {
|
||||
/* simple memcpy path */
|
||||
memcpy_texture(dims,
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
|
||||
dstRowStride, dstImageStride,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
|
Reference in New Issue
Block a user