diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 770ebc333b0..374345b7c6f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -2462,6 +2462,8 @@ st_GetTexSubImage(struct gl_context * ctx, texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); st_flush_bitmap_cache(st); + if (st->force_compute_based_texture_transfer) + goto non_blit_transfer; /* GetTexImage only returns a single face for cubemaps. */ if (gl_target == GL_TEXTURE_CUBE_MAP) { @@ -2571,7 +2573,7 @@ st_GetTexSubImage(struct gl_context * ctx, non_blit_transfer: if (done) return; - if (st->allow_compute_based_texture_transfer) { + if (st->allow_compute_based_texture_transfer || st->force_compute_based_texture_transfer) { if (st_GetTexSubImage_shader(ctx, xoffset, yoffset, zoffset, width, height, depth, format, type, pixels, texImage)) return; } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 76a05f5d18c..b2ca05c189e 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -147,6 +147,8 @@ struct st_context boolean has_bptc; boolean prefer_blit_based_texture_transfer; boolean allow_compute_based_texture_transfer; + boolean force_compute_based_texture_transfer; + boolean force_specialized_compute_transfer; boolean force_persample_in_shader; boolean has_shareable_shaders; boolean has_half_float_packing; diff --git a/src/mesa/state_tracker/st_pbo.c b/src/mesa/state_tracker/st_pbo.c index 27ef9916282..210bba4624e 100644 --- a/src/mesa/state_tracker/st_pbo.c +++ b/src/mesa/state_tracker/st_pbo.c @@ -690,7 +690,13 @@ st_init_pbo_helpers(struct st_context *st) memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state)); st->pbo.raster.half_pixel_center = 1; - if (st->allow_compute_based_texture_transfer) + const char *pbo = debug_get_option("MESA_COMPUTE_PBO", NULL); + if (pbo) { + st->force_compute_based_texture_transfer = true; + st->force_specialized_compute_transfer = !strncmp(pbo, "spec", 4); + } + + if (st->allow_compute_based_texture_transfer || st->force_compute_based_texture_transfer) st->pbo.shaders = _mesa_hash_table_create_u32_keys(NULL); } diff --git a/src/mesa/state_tracker/st_pbo_compute.c b/src/mesa/state_tracker/st_pbo_compute.c index f129bd4942e..15e9948490d 100644 --- a/src/mesa/state_tracker/st_pbo_compute.c +++ b/src/mesa/state_tracker/st_pbo_compute.c @@ -854,6 +854,19 @@ add_spec_data(struct pbo_async_data *async, struct pbo_data *pd) return spec; } +static struct pbo_async_data * +add_async_data(struct st_context *st, enum pipe_texture_target view_target, unsigned num_components, uint32_t hash_key) +{ + struct pbo_async_data *async = calloc(1, sizeof(struct pbo_async_data)); + async->st = st; + async->target = view_target; + async->num_components = num_components; + util_queue_fence_init(&async->fence); + _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, async); + _mesa_set_init(&async->specialized, NULL, hash_pbo_data, equals_pbo_data); + return async; +} + static struct pipe_resource * download_texture_compute(struct st_context *st, const struct gl_pixelstore_attrib *pack, @@ -903,7 +916,22 @@ download_texture_compute(struct st_context *st, struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key); void *cs = NULL; if (he) { - if (screen->driver_thread_add_job) { + /* disable async if MESA_COMPUTE_PBO is set */ + if (st->force_specialized_compute_transfer) { + struct pbo_async_data *async = he->data; + struct pbo_spec_async_data *spec = add_spec_data(async, &pd); + if (spec->cs) { + cs = spec->cs; + } else { + create_spec_shader_async(spec, NULL, 0); + struct pipe_shader_state state = { + .type = PIPE_SHADER_IR_NIR, + .ir.nir = spec->nir, + }; + cs = spec->cs = st_create_nir_shader(st, &state); + } + cb.buffer_size = 2 * sizeof(uint32_t); + } else if (!st->force_compute_based_texture_transfer && screen->driver_thread_add_job) { struct pbo_async_data *async = he->data; struct pbo_spec_async_data *spec = add_spec_data(async, &pd); if (!util_queue_fence_is_signalled(&async->fence)) @@ -947,28 +975,33 @@ download_texture_compute(struct st_context *st, cs = he->data; } } else { - if (screen->driver_thread_add_job) { - struct pbo_async_data *async = calloc(1, sizeof(struct pbo_async_data)); - async->st = st; - async->target = view_target; - async->num_components = num_components; - util_queue_fence_init(&async->fence); + if (!st->force_compute_based_texture_transfer && screen->driver_thread_add_job) { + struct pbo_async_data *async = add_async_data(st, view_target, num_components, hash_key); screen->driver_thread_add_job(screen, async, &async->fence, create_conversion_shader_async, NULL, 0); - _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, async); - - _mesa_set_init(&async->specialized, NULL, hash_pbo_data, equals_pbo_data); add_spec_data(async, &pd); return NULL; + } + + if (st->force_specialized_compute_transfer) { + struct pbo_async_data *async = add_async_data(st, view_target, num_components, hash_key); + create_conversion_shader_async(async, NULL, 0); + struct pbo_spec_async_data *spec = add_spec_data(async, &pd); + create_spec_shader_async(spec, NULL, 0); + struct pipe_shader_state state = { + .type = PIPE_SHADER_IR_NIR, + .ir.nir = spec->nir, + }; + cs = spec->cs = st_create_nir_shader(st, &state); + cb.buffer_size = 2 * sizeof(uint32_t); } else { nir_shader *nir = create_conversion_shader(st, view_target, num_components); struct pipe_shader_state state = { .type = PIPE_SHADER_IR_NIR, .ir.nir = nir, }; - cs = st_create_nir_shader(st, &state); + he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs); } - he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs); } assert(cs); struct cso_context *cso = st->cso_context; @@ -1265,7 +1298,8 @@ st_GetTexSubImage_shader(struct gl_context * ctx, } /* check with the driver to see if memcpy is likely to be faster */ - if (!screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true)) + if (!st->force_compute_based_texture_transfer && + !screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true)) return false; view_target = get_target_from_texture(src); @@ -1305,7 +1339,8 @@ st_pbo_compute_deinit(struct st_context *st) if (!st->pbo.shaders) return; hash_table_foreach(st->pbo.shaders, entry) { - if (screen->driver_thread_add_job) { + if (st->force_specialized_compute_transfer || + (!st->force_compute_based_texture_transfer && screen->driver_thread_add_job)) { struct pbo_async_data *async = entry->data; util_queue_fence_wait(&async->fence); if (async->cs)