diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build index e729981cdc9..e15cfed41f3 100644 --- a/src/panfrost/lib/meson.build +++ b/src/panfrost/lib/meson.build @@ -76,6 +76,7 @@ libpanfrost_lib_files = files( 'pan_bo.c', 'pan_blend.c', 'pan_clear.c', + 'pan_device.c', 'pan_earlyzs.c', 'pan_samples.c', 'pan_tiler.c', diff --git a/src/panfrost/lib/pan_device.c b/src/panfrost/lib/pan_device.c new file mode 100644 index 00000000000..bb30949126a --- /dev/null +++ b/src/panfrost/lib/pan_device.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Alyssa Rosenzweig + */ + +#include + +#include "drm-uapi/panfrost_drm.h" +#include "util/hash_table.h" +#include "util/macros.h" +#include "util/u_math.h" +#include "util/u_thread.h" +#include "pan_bo.h" +#include "pan_device.h" +#include "pan_encoder.h" +#include "pan_samples.h" +#include "pan_texture.h" +#include "pan_util.h" +#include "wrap.h" + +/* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported + * compressed formats, so we offer a helper to test if a format is supported */ + +bool +panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt) +{ + if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED) + return true; + + unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED; + assert(idx < 32); + + return panfrost_query_compressed_formats(&dev->kmod.props) & (1 << idx); +} + +/* Always reserve the lower 32MB. */ +#define PANFROST_VA_RESERVE_BOTTOM 0x2000000ull + +void +panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) +{ + dev->memctx = memctx; + + dev->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL); + if (!dev->kmod.dev) { + close(fd); + return; + } + + pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props); + + dev->arch = pan_arch(dev->kmod.props.gpu_prod_id); + dev->model = panfrost_get_model(dev->kmod.props.gpu_prod_id); + + /* If we don't recognize the model, bail early */ + if (!dev->model) + goto err_free_kmod_dev; + + /* 32bit address space, with the lower 32MB reserved. We clamp + * things so it matches kmod VA range limitations. + */ + uint64_t user_va_start = panfrost_clamp_to_usable_va_range( + dev->kmod.dev, PANFROST_VA_RESERVE_BOTTOM); + uint64_t user_va_end = + panfrost_clamp_to_usable_va_range(dev->kmod.dev, 1ull << 32); + + dev->kmod.vm = + pan_kmod_vm_create(dev->kmod.dev, PAN_KMOD_VM_FLAG_AUTO_VA, user_va_start, + user_va_end - user_va_start); + if (!dev->kmod.vm) + goto err_free_kmod_dev; + + dev->core_count = + panfrost_query_core_count(&dev->kmod.props, &dev->core_id_range); + dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(&dev->kmod.props); + dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev->model); + dev->compressed_formats = + panfrost_query_compressed_formats(&dev->kmod.props); + dev->tiler_features = panfrost_query_tiler_features(&dev->kmod.props); + dev->has_afbc = panfrost_query_afbc(&dev->kmod.props); + dev->formats = panfrost_format_table(dev->arch); + dev->blendable_formats = panfrost_blendable_format_table(dev->arch); + + util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512); + + pthread_mutex_init(&dev->bo_cache.lock, NULL); + list_inithead(&dev->bo_cache.lru); + + for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) + list_inithead(&dev->bo_cache.buckets[i]); + + /* Initialize pandecode before we start allocating */ + if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) + dev->decode_ctx = pandecode_create_context(!(dev->debug & PAN_DBG_TRACE)); + + /* Tiler heap is internally required by the tiler, which can only be + * active for a single job chain at once, so a single heap can be + * shared across batches/contextes */ + + dev->tiler_heap = panfrost_bo_create( + dev, 128 * 1024 * 1024, PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap"); + + pthread_mutex_init(&dev->submit_lock, NULL); + + /* Done once on init */ + dev->sample_positions = panfrost_bo_create( + dev, panfrost_sample_positions_buffer_size(), 0, "Sample positions"); + panfrost_upload_sample_positions(dev->sample_positions->ptr.cpu); + return; + +err_free_kmod_dev: + pan_kmod_dev_destroy(dev->kmod.dev); + dev->kmod.dev = NULL; +} + +void +panfrost_close_device(struct panfrost_device *dev) +{ + /* If we don't recognize the model, the rest of the device won't exist, + * we will have early-exited the device open. + */ + if (dev->model) { + pthread_mutex_destroy(&dev->submit_lock); + panfrost_bo_unreference(dev->tiler_heap); + panfrost_bo_unreference(dev->sample_positions); + panfrost_bo_cache_evict_all(dev); + pthread_mutex_destroy(&dev->bo_cache.lock); + util_sparse_array_finish(&dev->bo_map); + } + + if (dev->kmod.vm) + pan_kmod_vm_destroy(dev->kmod.vm); + + if (dev->kmod.dev) + pan_kmod_dev_destroy(dev->kmod.dev); +} diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h index aec372f59df..df393d2257c 100644 --- a/src/panfrost/lib/pan_device.h +++ b/src/panfrost/lib/pan_device.h @@ -42,6 +42,7 @@ #include "pan_blitter.h" #include "pan_indirect_dispatch.h" #include "pan_pool.h" +#include "pan_props.h" #include "pan_util.h" #include "kmod/pan_kmod.h" @@ -65,45 +66,6 @@ extern "C" { /* Fencepost problem, hence the off-by-one */ #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1) -/** Implementation-defined tiler features */ -struct panfrost_tiler_features { - /** Number of bytes per tiler bin */ - unsigned bin_size; - - /** Maximum number of levels that may be simultaneously enabled. - * Invariant: bitcount(hierarchy_mask) <= max_levels */ - unsigned max_levels; -}; - -struct panfrost_model { - /* GPU ID */ - uint32_t gpu_id; - - /* Marketing name for the GPU, used as the GL_RENDERER */ - const char *name; - - /* Set of associated performance counters */ - const char *performance_counters; - - /* Minimum GPU revision required for anisotropic filtering. ~0 and 0 - * means "no revisions support anisotropy" and "all revisions support - * anistropy" respectively -- so checking for anisotropy is simply - * comparing the reivsion. - */ - uint32_t min_rev_anisotropic; - - /* Default tilebuffer size in bytes for the model. */ - unsigned tilebuffer_size; - - struct { - /* The GPU lacks the capability for hierarchical tiling, without - * an "Advanced Tiling Unit", instead requiring a single bin - * size for the entire framebuffer be selected by the driver - */ - bool no_hierarchical_tiling; - } quirks; -}; - struct panfrost_device { /* For ralloc */ void *memctx; @@ -238,8 +200,6 @@ void panfrost_close_device(struct panfrost_device *dev); bool panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt); -unsigned panfrost_query_l2_slices(const struct panfrost_device *dev); - static inline struct panfrost_bo * pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle) { @@ -252,8 +212,6 @@ pan_is_bifrost(const struct panfrost_device *dev) return dev->arch >= 6 && dev->arch <= 7; } -const struct panfrost_model *panfrost_get_model(uint32_t gpu_id); - #if defined(__cplusplus) } // extern "C" #endif diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index 656174b5630..55871169638 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -24,20 +24,11 @@ * Alyssa Rosenzweig */ -#include +#include "kmod/pan_kmod.h" +#include "panfrost/util/pan_ir.h" +#include "pan_props.h" -#include "drm-uapi/panfrost_drm.h" -#include "util/hash_table.h" -#include "util/macros.h" -#include "util/u_math.h" -#include "util/u_thread.h" -#include "pan_bo.h" -#include "pan_device.h" -#include "pan_encoder.h" -#include "pan_samples.h" -#include "pan_texture.h" -#include "pan_util.h" -#include "wrap.h" +#include /* Fixed "minimum revisions" */ #define NO_ANISO (~0) @@ -95,17 +86,17 @@ panfrost_get_model(uint32_t gpu_id) } unsigned -panfrost_query_l2_slices(const struct panfrost_device *dev) +panfrost_query_l2_slices(const struct pan_kmod_dev_props *props) { /* L2_SLICES is MEM_FEATURES[11:8] minus(1) */ - return ((dev->kmod.props.mem_features >> 8) & 0xF) + 1; + return ((props->mem_features >> 8) & 0xF) + 1; } -static struct panfrost_tiler_features -panfrost_query_tiler_features(const struct panfrost_device *dev) +struct panfrost_tiler_features +panfrost_query_tiler_features(const struct pan_kmod_dev_props *props) { /* Default value (2^9 bytes and 8 levels) to match old behaviour */ - uint32_t raw = dev->kmod.props.tiler_features; + uint32_t raw = props->tiler_features; /* Bin size is log2 in the first byte, max levels in the second byte */ return (struct panfrost_tiler_features){ @@ -114,13 +105,13 @@ panfrost_query_tiler_features(const struct panfrost_device *dev) }; } -static unsigned -panfrost_query_core_count(const struct panfrost_device *dev, +unsigned +panfrost_query_core_count(const struct pan_kmod_dev_props *props, unsigned *core_id_range) { /* On older kernels, worst-case to 16 cores */ - unsigned mask = dev->kmod.props.shader_present; + unsigned mask = props->shader_present; /* Some cores might be absent. In some cases, we care * about the range of core IDs (that is, the greatest core ID + 1). If @@ -132,45 +123,31 @@ panfrost_query_core_count(const struct panfrost_device *dev, return util_bitcount(mask); } -static unsigned -panfrost_query_thread_tls_alloc(const struct panfrost_device *dev, - unsigned major) +unsigned +panfrost_query_thread_tls_alloc(const struct pan_kmod_dev_props *props) { - unsigned tls = dev->kmod.props.thread_tls_alloc; + unsigned tls = props->thread_tls_alloc; - return (tls > 0) ? tls : panfrost_max_thread_count(major, 0); + return (tls > 0) + ? tls + : panfrost_max_thread_count(pan_arch(props->gpu_prod_id), 0); } -static uint32_t -panfrost_query_compressed_formats(const struct panfrost_device *dev) +uint32_t +panfrost_query_compressed_formats(const struct pan_kmod_dev_props *props) { - return dev->kmod.props.texture_features[0]; -} - -/* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported - * compressed formats, so we offer a helper to test if a format is supported */ - -bool -panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt) -{ - if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED) - return true; - - unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED; - assert(idx < 32); - - return dev->compressed_formats & (1 << idx); + return props->texture_features[0]; } /* Check for AFBC hardware support. AFBC is introduced in v5. Implementations * may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */ -static bool -panfrost_query_afbc(struct panfrost_device *dev, unsigned arch) +bool +panfrost_query_afbc(const struct pan_kmod_dev_props *props) { - unsigned reg = dev->kmod.props.afbc_features; + unsigned reg = props->afbc_features; - return (arch >= 5) && (reg == 0); + return (pan_arch(props->gpu_prod_id) >= 5) && (reg == 0); } /* @@ -180,24 +157,23 @@ panfrost_query_afbc(struct panfrost_device *dev, unsigned arch) * For Mali-G510 and Mali-G310, we will need extra logic to query the tilebuffer * size for the particular variant. The CORE_FEATURES register might help. */ -static unsigned -panfrost_query_optimal_tib_size(const struct panfrost_device *dev) +unsigned +panfrost_query_optimal_tib_size(const struct panfrost_model *model) { /* Preconditions ensure the returned value is a multiple of 1 KiB, the * granularity of the colour buffer allocation field. */ - assert(dev->model->tilebuffer_size >= 2048); - assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size)); + assert(model->tilebuffer_size >= 2048); + assert(util_is_power_of_two_nonzero(model->tilebuffer_size)); - return dev->model->tilebuffer_size / 2; + return model->tilebuffer_size / 2; } -static uint64_t -panfrost_clamp_to_usable_va_range(const struct panfrost_device *dev, - uint64_t va) +uint64_t +panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va) { struct pan_kmod_va_range user_va_range = - pan_kmod_dev_query_user_va_range(dev->kmod.dev); + pan_kmod_dev_query_user_va_range(dev); if (va < user_va_range.start) return user_va_range.start; @@ -206,103 +182,3 @@ panfrost_clamp_to_usable_va_range(const struct panfrost_device *dev, return va; } - -/* Always reserve the lower 32MB. */ -#define PANFROST_VA_RESERVE_BOTTOM 0x2000000ull - -void -panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) -{ - dev->memctx = memctx; - - dev->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL); - if (!dev->kmod.dev) { - close(fd); - return; - } - - pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props); - - dev->arch = pan_arch(dev->kmod.props.gpu_prod_id); - dev->model = panfrost_get_model(dev->kmod.props.gpu_prod_id); - - /* If we don't recognize the model, bail early */ - if (!dev->model) - goto err_free_kmod_dev; - - /* 32bit address space, with the lower 32MB reserved. We clamp - * things so it matches kmod VA range limitations. - */ - uint64_t user_va_start = - panfrost_clamp_to_usable_va_range(dev, PANFROST_VA_RESERVE_BOTTOM); - uint64_t user_va_end = - panfrost_clamp_to_usable_va_range(dev, 1ull << 32); - - dev->kmod.vm = - pan_kmod_vm_create(dev->kmod.dev, PAN_KMOD_VM_FLAG_AUTO_VA, user_va_start, - user_va_end - user_va_start); - if (!dev->kmod.vm) - goto err_free_kmod_dev; - - dev->core_count = panfrost_query_core_count(dev, &dev->core_id_range); - dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(dev, dev->arch); - dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev); - dev->compressed_formats = panfrost_query_compressed_formats(dev); - dev->tiler_features = panfrost_query_tiler_features(dev); - dev->has_afbc = panfrost_query_afbc(dev, dev->arch); - dev->formats = panfrost_format_table(dev->arch); - dev->blendable_formats = panfrost_blendable_format_table(dev->arch); - - util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512); - - pthread_mutex_init(&dev->bo_cache.lock, NULL); - list_inithead(&dev->bo_cache.lru); - - for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) - list_inithead(&dev->bo_cache.buckets[i]); - - /* Initialize pandecode before we start allocating */ - if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) - dev->decode_ctx = pandecode_create_context(!(dev->debug & PAN_DBG_TRACE)); - - /* Tiler heap is internally required by the tiler, which can only be - * active for a single job chain at once, so a single heap can be - * shared across batches/contextes */ - - dev->tiler_heap = panfrost_bo_create( - dev, 128 * 1024 * 1024, PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap"); - - pthread_mutex_init(&dev->submit_lock, NULL); - - /* Done once on init */ - dev->sample_positions = panfrost_bo_create( - dev, panfrost_sample_positions_buffer_size(), 0, "Sample positions"); - panfrost_upload_sample_positions(dev->sample_positions->ptr.cpu); - return; - -err_free_kmod_dev: - pan_kmod_dev_destroy(dev->kmod.dev); - dev->kmod.dev = NULL; -} - -void -panfrost_close_device(struct panfrost_device *dev) -{ - /* If we don't recognize the model, the rest of the device won't exist, - * we will have early-exited the device open. - */ - if (dev->model) { - pthread_mutex_destroy(&dev->submit_lock); - panfrost_bo_unreference(dev->tiler_heap); - panfrost_bo_unreference(dev->sample_positions); - panfrost_bo_cache_evict_all(dev); - pthread_mutex_destroy(&dev->bo_cache.lock); - util_sparse_array_finish(&dev->bo_map); - } - - if (dev->kmod.vm) - pan_kmod_vm_destroy(dev->kmod.vm); - - if (dev->kmod.dev) - pan_kmod_dev_destroy(dev->kmod.dev); -} diff --git a/src/panfrost/lib/pan_props.h b/src/panfrost/lib/pan_props.h new file mode 100644 index 00000000000..3d15492b57d --- /dev/null +++ b/src/panfrost/lib/pan_props.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Alyssa Rosenzweig + */ + +#ifndef PAN_PROPS_H +#define PAN_PROPS_H + +#include +#include + +struct pan_kmod_dev; +struct pan_kmod_dev_props; + +/** Implementation-defined tiler features */ +struct panfrost_tiler_features { + /** Number of bytes per tiler bin */ + unsigned bin_size; + + /** Maximum number of levels that may be simultaneously enabled. + * Invariant: bitcount(hierarchy_mask) <= max_levels */ + unsigned max_levels; +}; + +struct panfrost_model { + /* GPU ID */ + uint32_t gpu_id; + + /* Marketing name for the GPU, used as the GL_RENDERER */ + const char *name; + + /* Set of associated performance counters */ + const char *performance_counters; + + /* Minimum GPU revision required for anisotropic filtering. ~0 and 0 + * means "no revisions support anisotropy" and "all revisions support + * anistropy" respectively -- so checking for anisotropy is simply + * comparing the reivsion. + */ + uint32_t min_rev_anisotropic; + + /* Default tilebuffer size in bytes for the model. */ + unsigned tilebuffer_size; + + struct { + /* The GPU lacks the capability for hierarchical tiling, without + * an "Advanced Tiling Unit", instead requiring a single bin + * size for the entire framebuffer be selected by the driver + */ + bool no_hierarchical_tiling; + } quirks; +}; + +const struct panfrost_model *panfrost_get_model(uint32_t gpu_id); + +unsigned panfrost_query_l2_slices(const struct pan_kmod_dev_props *props); + +struct panfrost_tiler_features +panfrost_query_tiler_features(const struct pan_kmod_dev_props *props); + +unsigned +panfrost_query_thread_tls_alloc(const struct pan_kmod_dev_props *props); + +uint32_t +panfrost_query_compressed_formats(const struct pan_kmod_dev_props *props); + +unsigned panfrost_query_core_count(const struct pan_kmod_dev_props *props, + unsigned *core_id_range); + +bool panfrost_query_afbc(const struct pan_kmod_dev_props *props); + +unsigned panfrost_query_optimal_tib_size(const struct panfrost_model *model); + +uint64_t panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, + uint64_t va); + +#endif diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 86b121ac3fd..55885a638c4 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -25,6 +25,7 @@ #include #include +#include #include #define PAN_COUNTERS_PER_CATEGORY 64 @@ -76,7 +77,7 @@ panfrost_perf_init(struct panfrost_perf *perf, struct panfrost_device *dev) // Generally counter blocks are laid out in the following order: // Job manager, tiler, one or more L2 caches, and one or more shader cores. - unsigned l2_slices = panfrost_query_l2_slices(dev); + unsigned l2_slices = panfrost_query_l2_slices(&dev->kmod.props); uint32_t n_blocks = 2 + l2_slices + dev->core_id_range; perf->n_counter_values = PAN_COUNTERS_PER_CATEGORY * n_blocks; perf->counter_values = ralloc_array(perf, uint32_t, perf->n_counter_values);