panfrost: Size tiled temp buffers correctly
This should lower transient memory usage and improve performance slightly (due to less memory to malloc/free, better cache locality, etc). Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
@@ -390,8 +390,6 @@ panfrost_transfer_map(struct pipe_context *pctx,
|
|||||||
transfer->base.level = level;
|
transfer->base.level = level;
|
||||||
transfer->base.usage = usage;
|
transfer->base.usage = usage;
|
||||||
transfer->base.box = *box;
|
transfer->base.box = *box;
|
||||||
transfer->base.stride = bo->slices[level].stride;
|
|
||||||
transfer->base.layer_stride = bo->cubemap_stride;
|
|
||||||
|
|
||||||
pipe_resource_reference(&transfer->base.resource, resource);
|
pipe_resource_reference(&transfer->base.resource, resource);
|
||||||
|
|
||||||
@@ -413,12 +411,17 @@ panfrost_transfer_map(struct pipe_context *pctx,
|
|||||||
if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
|
if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
transfer->base.stride = box->width * bytes_per_pixel;
|
||||||
|
transfer->base.layer_stride = transfer->base.stride * box->height;
|
||||||
|
|
||||||
/* TODO: Reads */
|
/* TODO: Reads */
|
||||||
/* TODO: Only allocate "just" enough, shortening the stride */
|
transfer->map = malloc(transfer->base.layer_stride * box->depth);
|
||||||
transfer->map = malloc(transfer->base.stride * box->height);
|
|
||||||
|
|
||||||
return transfer->map;
|
return transfer->map;
|
||||||
} else {
|
} else {
|
||||||
|
transfer->base.stride = bo->slices[level].stride;
|
||||||
|
transfer->base.layer_stride = bo->cubemap_stride;
|
||||||
|
|
||||||
return bo->cpu
|
return bo->cpu
|
||||||
+ bo->slices[level].offset
|
+ bo->slices[level].offset
|
||||||
+ transfer->base.box.z * bo->cubemap_stride
|
+ transfer->base.box.z * bo->cubemap_stride
|
||||||
@@ -440,7 +443,6 @@ panfrost_tile_texture(struct panfrost_screen *screen, struct panfrost_resource *
|
|||||||
trans->base.box.width,
|
trans->base.box.width,
|
||||||
trans->base.box.height,
|
trans->base.box.height,
|
||||||
util_format_get_blocksize(rsrc->base.format),
|
util_format_get_blocksize(rsrc->base.format),
|
||||||
bo->slices[level].stride,
|
|
||||||
u_minify(rsrc->base.width0, level),
|
u_minify(rsrc->base.width0, level),
|
||||||
trans->map,
|
trans->map,
|
||||||
bo->cpu
|
bo->cpu
|
||||||
|
@@ -149,15 +149,18 @@ swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch,
|
|||||||
void
|
void
|
||||||
panfrost_texture_swizzle(unsigned off_x,
|
panfrost_texture_swizzle(unsigned off_x,
|
||||||
unsigned off_y,
|
unsigned off_y,
|
||||||
int width, int height, int bytes_per_pixel, int source_stride, int dest_width,
|
int width, int height, int bytes_per_pixel, int dest_width,
|
||||||
const uint8_t *pixels,
|
const uint8_t *pixels,
|
||||||
uint8_t *ldest)
|
uint8_t *ldest)
|
||||||
{
|
{
|
||||||
/* Calculate maximum size, overestimating a bit */
|
/* Calculate maximum size, overestimating a bit */
|
||||||
int block_pitch = ALIGN(dest_width, 16) >> 4;
|
int block_pitch = ALIGN(dest_width, 16) >> 4;
|
||||||
|
|
||||||
|
/* Strides must be tight, since we're only ever called indirectly */
|
||||||
|
int source_stride = width * bytes_per_pixel;
|
||||||
|
|
||||||
/* Use fast path if available */
|
/* Use fast path if available */
|
||||||
if (!(off_x || off_y)) {
|
if (!(off_x || off_y) && (width == dest_width)) {
|
||||||
if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
|
if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
|
||||||
swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
|
swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
|
||||||
return;
|
return;
|
||||||
|
@@ -32,7 +32,7 @@ panfrost_generate_space_filler_indices(void);
|
|||||||
|
|
||||||
void
|
void
|
||||||
panfrost_texture_swizzle(unsigned off_x, unsigned off_y,
|
panfrost_texture_swizzle(unsigned off_x, unsigned off_y,
|
||||||
int width, int height, int bytes_per_pixel, int source_stride, int dest_width,
|
int width, int height, int bytes_per_pixel, int dest_width,
|
||||||
const uint8_t *pixels,
|
const uint8_t *pixels,
|
||||||
uint8_t *ldest);
|
uint8_t *ldest);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user