panfrost: Make pan_props.c panfrost_device agnostic

Move the functions that are dealing with panfrost_device initialization
to a new panfrost_device.c file, and make the remaining ones
panfrost_device agnostic.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Constantine Shablya <constantine.shablya@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26698>
This commit is contained in:
Boris Brezillon
2023-12-08 12:28:04 +01:00
committed by Marge Bot
parent 7b4d1bb9be
commit 2d07926df1
6 changed files with 292 additions and 201 deletions

View File

@@ -76,6 +76,7 @@ libpanfrost_lib_files = files(
'pan_bo.c',
'pan_blend.c',
'pan_clear.c',
'pan_device.c',
'pan_earlyzs.c',
'pan_samples.c',
'pan_tiler.c',

View File

@@ -0,0 +1,157 @@
/*
* Copyright (C) 2019 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include <xf86drm.h>
#include "drm-uapi/panfrost_drm.h"
#include "util/hash_table.h"
#include "util/macros.h"
#include "util/u_math.h"
#include "util/u_thread.h"
#include "pan_bo.h"
#include "pan_device.h"
#include "pan_encoder.h"
#include "pan_samples.h"
#include "pan_texture.h"
#include "pan_util.h"
#include "wrap.h"
/* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported
* compressed formats, so we offer a helper to test if a format is supported */
bool
panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
{
if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
return true;
unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
assert(idx < 32);
return panfrost_query_compressed_formats(&dev->kmod.props) & (1 << idx);
}
/* Always reserve the lower 32MB. */
#define PANFROST_VA_RESERVE_BOTTOM 0x2000000ull
void
panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
{
dev->memctx = memctx;
dev->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL);
if (!dev->kmod.dev) {
close(fd);
return;
}
pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props);
dev->arch = pan_arch(dev->kmod.props.gpu_prod_id);
dev->model = panfrost_get_model(dev->kmod.props.gpu_prod_id);
/* If we don't recognize the model, bail early */
if (!dev->model)
goto err_free_kmod_dev;
/* 32bit address space, with the lower 32MB reserved. We clamp
* things so it matches kmod VA range limitations.
*/
uint64_t user_va_start = panfrost_clamp_to_usable_va_range(
dev->kmod.dev, PANFROST_VA_RESERVE_BOTTOM);
uint64_t user_va_end =
panfrost_clamp_to_usable_va_range(dev->kmod.dev, 1ull << 32);
dev->kmod.vm =
pan_kmod_vm_create(dev->kmod.dev, PAN_KMOD_VM_FLAG_AUTO_VA, user_va_start,
user_va_end - user_va_start);
if (!dev->kmod.vm)
goto err_free_kmod_dev;
dev->core_count =
panfrost_query_core_count(&dev->kmod.props, &dev->core_id_range);
dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(&dev->kmod.props);
dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev->model);
dev->compressed_formats =
panfrost_query_compressed_formats(&dev->kmod.props);
dev->tiler_features = panfrost_query_tiler_features(&dev->kmod.props);
dev->has_afbc = panfrost_query_afbc(&dev->kmod.props);
dev->formats = panfrost_format_table(dev->arch);
dev->blendable_formats = panfrost_blendable_format_table(dev->arch);
util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
pthread_mutex_init(&dev->bo_cache.lock, NULL);
list_inithead(&dev->bo_cache.lru);
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
list_inithead(&dev->bo_cache.buckets[i]);
/* Initialize pandecode before we start allocating */
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
dev->decode_ctx = pandecode_create_context(!(dev->debug & PAN_DBG_TRACE));
/* Tiler heap is internally required by the tiler, which can only be
* active for a single job chain at once, so a single heap can be
* shared across batches/contextes */
dev->tiler_heap = panfrost_bo_create(
dev, 128 * 1024 * 1024, PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
pthread_mutex_init(&dev->submit_lock, NULL);
/* Done once on init */
dev->sample_positions = panfrost_bo_create(
dev, panfrost_sample_positions_buffer_size(), 0, "Sample positions");
panfrost_upload_sample_positions(dev->sample_positions->ptr.cpu);
return;
err_free_kmod_dev:
pan_kmod_dev_destroy(dev->kmod.dev);
dev->kmod.dev = NULL;
}
void
panfrost_close_device(struct panfrost_device *dev)
{
/* If we don't recognize the model, the rest of the device won't exist,
* we will have early-exited the device open.
*/
if (dev->model) {
pthread_mutex_destroy(&dev->submit_lock);
panfrost_bo_unreference(dev->tiler_heap);
panfrost_bo_unreference(dev->sample_positions);
panfrost_bo_cache_evict_all(dev);
pthread_mutex_destroy(&dev->bo_cache.lock);
util_sparse_array_finish(&dev->bo_map);
}
if (dev->kmod.vm)
pan_kmod_vm_destroy(dev->kmod.vm);
if (dev->kmod.dev)
pan_kmod_dev_destroy(dev->kmod.dev);
}

View File

@@ -42,6 +42,7 @@
#include "pan_blitter.h"
#include "pan_indirect_dispatch.h"
#include "pan_pool.h"
#include "pan_props.h"
#include "pan_util.h"
#include "kmod/pan_kmod.h"
@@ -65,45 +66,6 @@ extern "C" {
/* Fencepost problem, hence the off-by-one */
#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
/** Implementation-defined tiler features */
struct panfrost_tiler_features {
/** Number of bytes per tiler bin */
unsigned bin_size;
/** Maximum number of levels that may be simultaneously enabled.
* Invariant: bitcount(hierarchy_mask) <= max_levels */
unsigned max_levels;
};
struct panfrost_model {
/* GPU ID */
uint32_t gpu_id;
/* Marketing name for the GPU, used as the GL_RENDERER */
const char *name;
/* Set of associated performance counters */
const char *performance_counters;
/* Minimum GPU revision required for anisotropic filtering. ~0 and 0
* means "no revisions support anisotropy" and "all revisions support
* anistropy" respectively -- so checking for anisotropy is simply
* comparing the reivsion.
*/
uint32_t min_rev_anisotropic;
/* Default tilebuffer size in bytes for the model. */
unsigned tilebuffer_size;
struct {
/* The GPU lacks the capability for hierarchical tiling, without
* an "Advanced Tiling Unit", instead requiring a single bin
* size for the entire framebuffer be selected by the driver
*/
bool no_hierarchical_tiling;
} quirks;
};
struct panfrost_device {
/* For ralloc */
void *memctx;
@@ -238,8 +200,6 @@ void panfrost_close_device(struct panfrost_device *dev);
bool panfrost_supports_compressed_format(struct panfrost_device *dev,
unsigned fmt);
unsigned panfrost_query_l2_slices(const struct panfrost_device *dev);
static inline struct panfrost_bo *
pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle)
{
@@ -252,8 +212,6 @@ pan_is_bifrost(const struct panfrost_device *dev)
return dev->arch >= 6 && dev->arch <= 7;
}
const struct panfrost_model *panfrost_get_model(uint32_t gpu_id);
#if defined(__cplusplus)
} // extern "C"
#endif

View File

@@ -24,20 +24,11 @@
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include <xf86drm.h>
#include "kmod/pan_kmod.h"
#include "panfrost/util/pan_ir.h"
#include "pan_props.h"
#include "drm-uapi/panfrost_drm.h"
#include "util/hash_table.h"
#include "util/macros.h"
#include "util/u_math.h"
#include "util/u_thread.h"
#include "pan_bo.h"
#include "pan_device.h"
#include "pan_encoder.h"
#include "pan_samples.h"
#include "pan_texture.h"
#include "pan_util.h"
#include "wrap.h"
#include <genxml/gen_macros.h>
/* Fixed "minimum revisions" */
#define NO_ANISO (~0)
@@ -95,17 +86,17 @@ panfrost_get_model(uint32_t gpu_id)
}
unsigned
panfrost_query_l2_slices(const struct panfrost_device *dev)
panfrost_query_l2_slices(const struct pan_kmod_dev_props *props)
{
/* L2_SLICES is MEM_FEATURES[11:8] minus(1) */
return ((dev->kmod.props.mem_features >> 8) & 0xF) + 1;
return ((props->mem_features >> 8) & 0xF) + 1;
}
static struct panfrost_tiler_features
panfrost_query_tiler_features(const struct panfrost_device *dev)
struct panfrost_tiler_features
panfrost_query_tiler_features(const struct pan_kmod_dev_props *props)
{
/* Default value (2^9 bytes and 8 levels) to match old behaviour */
uint32_t raw = dev->kmod.props.tiler_features;
uint32_t raw = props->tiler_features;
/* Bin size is log2 in the first byte, max levels in the second byte */
return (struct panfrost_tiler_features){
@@ -114,13 +105,13 @@ panfrost_query_tiler_features(const struct panfrost_device *dev)
};
}
static unsigned
panfrost_query_core_count(const struct panfrost_device *dev,
unsigned
panfrost_query_core_count(const struct pan_kmod_dev_props *props,
unsigned *core_id_range)
{
/* On older kernels, worst-case to 16 cores */
unsigned mask = dev->kmod.props.shader_present;
unsigned mask = props->shader_present;
/* Some cores might be absent. In some cases, we care
* about the range of core IDs (that is, the greatest core ID + 1). If
@@ -132,45 +123,31 @@ panfrost_query_core_count(const struct panfrost_device *dev,
return util_bitcount(mask);
}
static unsigned
panfrost_query_thread_tls_alloc(const struct panfrost_device *dev,
unsigned major)
unsigned
panfrost_query_thread_tls_alloc(const struct pan_kmod_dev_props *props)
{
unsigned tls = dev->kmod.props.thread_tls_alloc;
unsigned tls = props->thread_tls_alloc;
return (tls > 0) ? tls : panfrost_max_thread_count(major, 0);
return (tls > 0)
? tls
: panfrost_max_thread_count(pan_arch(props->gpu_prod_id), 0);
}
static uint32_t
panfrost_query_compressed_formats(const struct panfrost_device *dev)
uint32_t
panfrost_query_compressed_formats(const struct pan_kmod_dev_props *props)
{
return dev->kmod.props.texture_features[0];
}
/* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported
* compressed formats, so we offer a helper to test if a format is supported */
bool
panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
{
if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
return true;
unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
assert(idx < 32);
return dev->compressed_formats & (1 << idx);
return props->texture_features[0];
}
/* Check for AFBC hardware support. AFBC is introduced in v5. Implementations
* may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */
static bool
panfrost_query_afbc(struct panfrost_device *dev, unsigned arch)
bool
panfrost_query_afbc(const struct pan_kmod_dev_props *props)
{
unsigned reg = dev->kmod.props.afbc_features;
unsigned reg = props->afbc_features;
return (arch >= 5) && (reg == 0);
return (pan_arch(props->gpu_prod_id) >= 5) && (reg == 0);
}
/*
@@ -180,24 +157,23 @@ panfrost_query_afbc(struct panfrost_device *dev, unsigned arch)
* For Mali-G510 and Mali-G310, we will need extra logic to query the tilebuffer
* size for the particular variant. The CORE_FEATURES register might help.
*/
static unsigned
panfrost_query_optimal_tib_size(const struct panfrost_device *dev)
unsigned
panfrost_query_optimal_tib_size(const struct panfrost_model *model)
{
/* Preconditions ensure the returned value is a multiple of 1 KiB, the
* granularity of the colour buffer allocation field.
*/
assert(dev->model->tilebuffer_size >= 2048);
assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size));
assert(model->tilebuffer_size >= 2048);
assert(util_is_power_of_two_nonzero(model->tilebuffer_size));
return dev->model->tilebuffer_size / 2;
return model->tilebuffer_size / 2;
}
static uint64_t
panfrost_clamp_to_usable_va_range(const struct panfrost_device *dev,
uint64_t va)
uint64_t
panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va)
{
struct pan_kmod_va_range user_va_range =
pan_kmod_dev_query_user_va_range(dev->kmod.dev);
pan_kmod_dev_query_user_va_range(dev);
if (va < user_va_range.start)
return user_va_range.start;
@@ -206,103 +182,3 @@ panfrost_clamp_to_usable_va_range(const struct panfrost_device *dev,
return va;
}
/* Always reserve the lower 32MB. */
#define PANFROST_VA_RESERVE_BOTTOM 0x2000000ull
void
panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
{
dev->memctx = memctx;
dev->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL);
if (!dev->kmod.dev) {
close(fd);
return;
}
pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props);
dev->arch = pan_arch(dev->kmod.props.gpu_prod_id);
dev->model = panfrost_get_model(dev->kmod.props.gpu_prod_id);
/* If we don't recognize the model, bail early */
if (!dev->model)
goto err_free_kmod_dev;
/* 32bit address space, with the lower 32MB reserved. We clamp
* things so it matches kmod VA range limitations.
*/
uint64_t user_va_start =
panfrost_clamp_to_usable_va_range(dev, PANFROST_VA_RESERVE_BOTTOM);
uint64_t user_va_end =
panfrost_clamp_to_usable_va_range(dev, 1ull << 32);
dev->kmod.vm =
pan_kmod_vm_create(dev->kmod.dev, PAN_KMOD_VM_FLAG_AUTO_VA, user_va_start,
user_va_end - user_va_start);
if (!dev->kmod.vm)
goto err_free_kmod_dev;
dev->core_count = panfrost_query_core_count(dev, &dev->core_id_range);
dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(dev, dev->arch);
dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev);
dev->compressed_formats = panfrost_query_compressed_formats(dev);
dev->tiler_features = panfrost_query_tiler_features(dev);
dev->has_afbc = panfrost_query_afbc(dev, dev->arch);
dev->formats = panfrost_format_table(dev->arch);
dev->blendable_formats = panfrost_blendable_format_table(dev->arch);
util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
pthread_mutex_init(&dev->bo_cache.lock, NULL);
list_inithead(&dev->bo_cache.lru);
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
list_inithead(&dev->bo_cache.buckets[i]);
/* Initialize pandecode before we start allocating */
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
dev->decode_ctx = pandecode_create_context(!(dev->debug & PAN_DBG_TRACE));
/* Tiler heap is internally required by the tiler, which can only be
* active for a single job chain at once, so a single heap can be
* shared across batches/contextes */
dev->tiler_heap = panfrost_bo_create(
dev, 128 * 1024 * 1024, PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
pthread_mutex_init(&dev->submit_lock, NULL);
/* Done once on init */
dev->sample_positions = panfrost_bo_create(
dev, panfrost_sample_positions_buffer_size(), 0, "Sample positions");
panfrost_upload_sample_positions(dev->sample_positions->ptr.cpu);
return;
err_free_kmod_dev:
pan_kmod_dev_destroy(dev->kmod.dev);
dev->kmod.dev = NULL;
}
void
panfrost_close_device(struct panfrost_device *dev)
{
/* If we don't recognize the model, the rest of the device won't exist,
* we will have early-exited the device open.
*/
if (dev->model) {
pthread_mutex_destroy(&dev->submit_lock);
panfrost_bo_unreference(dev->tiler_heap);
panfrost_bo_unreference(dev->sample_positions);
panfrost_bo_cache_evict_all(dev);
pthread_mutex_destroy(&dev->bo_cache.lock);
util_sparse_array_finish(&dev->bo_map);
}
if (dev->kmod.vm)
pan_kmod_vm_destroy(dev->kmod.vm);
if (dev->kmod.dev)
pan_kmod_dev_destroy(dev->kmod.dev);
}

View File

@@ -0,0 +1,98 @@
/*
* Copyright (C) 2019 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#ifndef PAN_PROPS_H
#define PAN_PROPS_H
#include <stdbool.h>
#include <stdint.h>
struct pan_kmod_dev;
struct pan_kmod_dev_props;
/** Implementation-defined tiler features */
struct panfrost_tiler_features {
/** Number of bytes per tiler bin */
unsigned bin_size;
/** Maximum number of levels that may be simultaneously enabled.
* Invariant: bitcount(hierarchy_mask) <= max_levels */
unsigned max_levels;
};
struct panfrost_model {
/* GPU ID */
uint32_t gpu_id;
/* Marketing name for the GPU, used as the GL_RENDERER */
const char *name;
/* Set of associated performance counters */
const char *performance_counters;
/* Minimum GPU revision required for anisotropic filtering. ~0 and 0
* means "no revisions support anisotropy" and "all revisions support
* anistropy" respectively -- so checking for anisotropy is simply
* comparing the reivsion.
*/
uint32_t min_rev_anisotropic;
/* Default tilebuffer size in bytes for the model. */
unsigned tilebuffer_size;
struct {
/* The GPU lacks the capability for hierarchical tiling, without
* an "Advanced Tiling Unit", instead requiring a single bin
* size for the entire framebuffer be selected by the driver
*/
bool no_hierarchical_tiling;
} quirks;
};
const struct panfrost_model *panfrost_get_model(uint32_t gpu_id);
unsigned panfrost_query_l2_slices(const struct pan_kmod_dev_props *props);
struct panfrost_tiler_features
panfrost_query_tiler_features(const struct pan_kmod_dev_props *props);
unsigned
panfrost_query_thread_tls_alloc(const struct pan_kmod_dev_props *props);
uint32_t
panfrost_query_compressed_formats(const struct pan_kmod_dev_props *props);
unsigned panfrost_query_core_count(const struct pan_kmod_dev_props *props,
unsigned *core_id_range);
bool panfrost_query_afbc(const struct pan_kmod_dev_props *props);
unsigned panfrost_query_optimal_tib_size(const struct panfrost_model *model);
uint64_t panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev,
uint64_t va);
#endif

View File

@@ -25,6 +25,7 @@
#include <drm-uapi/panfrost_drm.h>
#include <lib/pan_device.h>
#include <lib/pan_props.h>
#include <pan_perf_metrics.h>
#define PAN_COUNTERS_PER_CATEGORY 64
@@ -76,7 +77,7 @@ panfrost_perf_init(struct panfrost_perf *perf, struct panfrost_device *dev)
// Generally counter blocks are laid out in the following order:
// Job manager, tiler, one or more L2 caches, and one or more shader cores.
unsigned l2_slices = panfrost_query_l2_slices(dev);
unsigned l2_slices = panfrost_query_l2_slices(&dev->kmod.props);
uint32_t n_blocks = 2 + l2_slices + dev->core_id_range;
perf->n_counter_values = PAN_COUNTERS_PER_CATEGORY * n_blocks;
perf->counter_values = ralloc_array(perf, uint32_t, perf->n_counter_values);