llvmpipe/cs: rework thread pool for avoid mtx locking

This helps reduced the mtx lock/unlock overheads for the threadpool
if the work evenly distributes across the number of threads.

The CL CTS conversions tests really hit this, and this takes maybe 10-20s
off a 5min test run.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12432>
This commit is contained in:
Dave Airlie
2021-08-16 11:18:15 +10:00
parent 53aade0ef0
commit 69109e0b19
2 changed files with 21 additions and 3 deletions

View File

@@ -43,6 +43,7 @@ lp_cs_tpool_worker(void *data)
while (!pool->shutdown) {
struct lp_cs_tpool_task *task;
unsigned iter_per_thread;
while (list_is_empty(&pool->workqueue) && !pool->shutdown)
cnd_wait(&pool->new_work, &pool->m);
@@ -52,15 +53,26 @@ lp_cs_tpool_worker(void *data)
task = list_first_entry(&pool->workqueue, struct lp_cs_tpool_task,
list);
unsigned this_iter = task->iter_start++;
unsigned this_iter = task->iter_start;
iter_per_thread = task->iter_per_thread;
if (task->iter_remainder &&
task->iter_start + task->iter_remainder == task->iter_total)
iter_per_thread = task->iter_remainder;
task->iter_start += iter_per_thread;
if (task->iter_start == task->iter_total)
list_del(&task->list);
mtx_unlock(&pool->m);
task->work(task->data, this_iter, &lmem);
for (unsigned i = 0; i < iter_per_thread; i++)
task->work(task->data, this_iter + i, &lmem);
mtx_lock(&pool->m);
task->iter_finished++;
task->iter_finished += iter_per_thread;
if (task->iter_finished == task->iter_total)
cnd_broadcast(&task->finish);
}
@@ -132,6 +144,10 @@ lp_cs_tpool_queue_task(struct lp_cs_tpool *pool,
task->work = work;
task->data = data;
task->iter_total = num_iters;
task->iter_per_thread = num_iters / pool->num_threads;
task->iter_remainder = num_iters % pool->num_threads;
cnd_init(&task->finish);
mtx_lock(&pool->m);

View File

@@ -66,6 +66,8 @@ struct lp_cs_tpool_task {
unsigned iter_total;
unsigned iter_start;
unsigned iter_finished;
unsigned iter_per_thread;
unsigned iter_remainder;
};
struct lp_cs_tpool *lp_cs_tpool_create(unsigned num_threads);