rusticl: translate spirv to nir and first steps to kernel arg handling
Signed-off-by: Karol Herbst <kherbst@redhat.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
@@ -966,6 +966,7 @@ if with_gallium_rusticl
|
||||
add_languages('rust', required: true)
|
||||
|
||||
with_clc = true
|
||||
with_libclc = true
|
||||
endif
|
||||
|
||||
dep_clc = null_dep
|
||||
|
@@ -1079,18 +1079,27 @@ extern "C" fn cl_enqueue_unmap_mem_object(
|
||||
}
|
||||
|
||||
extern "C" fn cl_enqueue_ndrange_kernel(
|
||||
_command_queue: cl_command_queue,
|
||||
_kernel: cl_kernel,
|
||||
_work_dim: cl_uint,
|
||||
_global_work_offset: *const usize,
|
||||
_global_work_size: *const usize,
|
||||
_local_work_size: *const usize,
|
||||
_num_events_in_wait_list: cl_uint,
|
||||
_event_wait_list: *const cl_event,
|
||||
_event: *mut cl_event,
|
||||
command_queue: cl_command_queue,
|
||||
kernel: cl_kernel,
|
||||
work_dim: cl_uint,
|
||||
global_work_offset: *const usize,
|
||||
global_work_size: *const usize,
|
||||
local_work_size: *const usize,
|
||||
num_events_in_wait_list: cl_uint,
|
||||
event_wait_list: *const cl_event,
|
||||
event: *mut cl_event,
|
||||
) -> cl_int {
|
||||
println!("cl_enqueue_ndrange_kernel not implemented");
|
||||
CL_OUT_OF_HOST_MEMORY
|
||||
match_err!(enqueue_ndrange_kernel(
|
||||
command_queue,
|
||||
kernel,
|
||||
work_dim,
|
||||
global_work_offset,
|
||||
global_work_size,
|
||||
local_work_size,
|
||||
num_events_in_wait_list,
|
||||
event_wait_list,
|
||||
event
|
||||
))
|
||||
}
|
||||
|
||||
extern "C" fn cl_get_extension_function_address(
|
||||
|
@@ -1,14 +1,17 @@
|
||||
extern crate mesa_rust_util;
|
||||
extern crate rusticl_opencl_gen;
|
||||
|
||||
use crate::api::event::create_and_queue;
|
||||
use crate::api::icd::*;
|
||||
use crate::api::util::*;
|
||||
use crate::core::event::*;
|
||||
use crate::core::kernel::*;
|
||||
|
||||
use self::mesa_rust_util::string::*;
|
||||
use self::rusticl_opencl_gen::*;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
|
||||
impl CLInfo<cl_kernel_info> for cl_kernel {
|
||||
@@ -73,6 +76,19 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
|
||||
}
|
||||
}
|
||||
|
||||
const ZERO_ARR: [usize; 3] = [0; 3];
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// This function is only safe when called on an array of `work_dim` length
|
||||
unsafe fn kernel_work_arr_or_default<'a>(arr: *const usize, work_dim: cl_uint) -> &'a [usize] {
|
||||
if !arr.is_null() {
|
||||
slice::from_raw_parts(arr, work_dim as usize)
|
||||
} else {
|
||||
&ZERO_ARR
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_kernel(
|
||||
program: cl_program,
|
||||
kernel_name: *const ::std::os::raw::c_char,
|
||||
@@ -108,6 +124,7 @@ pub fn create_kernel(
|
||||
// CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel function given by
|
||||
// kernel_name such as the number of arguments, the argument types are not the same for all
|
||||
// devices for which the program executable has been built.
|
||||
let nirs = p.nirs(&name);
|
||||
let kernel_args: HashSet<_> = devs.iter().map(|d| p.args(d, &name)).collect();
|
||||
if kernel_args.len() != 1 {
|
||||
return Err(CL_INVALID_KERNEL_DEFINITION);
|
||||
@@ -116,6 +133,7 @@ pub fn create_kernel(
|
||||
Ok(cl_kernel::from_arc(Kernel::new(
|
||||
name,
|
||||
p,
|
||||
nirs,
|
||||
kernel_args.into_iter().next().unwrap(),
|
||||
)))
|
||||
}
|
||||
@@ -123,24 +141,177 @@ pub fn create_kernel(
|
||||
pub fn set_kernel_arg(
|
||||
kernel: cl_kernel,
|
||||
arg_index: cl_uint,
|
||||
_arg_size: usize,
|
||||
_arg_value: *const ::std::os::raw::c_void,
|
||||
arg_size: usize,
|
||||
arg_value: *const ::std::os::raw::c_void,
|
||||
) -> CLResult<()> {
|
||||
let k = kernel.get_arc()?;
|
||||
|
||||
// CL_INVALID_ARG_INDEX if arg_index is not a valid argument index.
|
||||
if arg_index as usize >= k.args.len() {
|
||||
return Err(CL_INVALID_ARG_INDEX);
|
||||
if let Some(arg) = k.args.get(arg_index as usize) {
|
||||
// CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument
|
||||
// that is not a memory object or if the argument is a memory object and
|
||||
// arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the
|
||||
// local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler).
|
||||
match arg.kind {
|
||||
KernelArgType::MemLocal => {
|
||||
if arg_size == 0 {
|
||||
return Err(CL_INVALID_ARG_SIZE);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if arg.size != arg_size {
|
||||
return Err(CL_INVALID_ARG_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
|
||||
match arg.kind {
|
||||
// If the argument is declared with the local qualifier, the arg_value entry must be
|
||||
// NULL.
|
||||
KernelArgType::MemLocal => {
|
||||
if !arg_value.is_null() {
|
||||
return Err(CL_INVALID_ARG_VALUE);
|
||||
}
|
||||
}
|
||||
// If the argument is of type sampler_t, the arg_value entry must be a pointer to the
|
||||
// sampler object.
|
||||
KernelArgType::Constant | KernelArgType::Sampler => {
|
||||
if arg_value.is_null() {
|
||||
return Err(CL_INVALID_ARG_VALUE);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// let's create the arg now
|
||||
let arg = unsafe {
|
||||
if arg.dead {
|
||||
KernelArgValue::None
|
||||
} else {
|
||||
match arg.kind {
|
||||
KernelArgType::Constant => KernelArgValue::Constant(
|
||||
slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
|
||||
),
|
||||
KernelArgType::MemConstant | KernelArgType::MemGlobal => {
|
||||
let ptr: *const cl_mem = arg_value.cast();
|
||||
if ptr.is_null() || (*ptr).is_null() {
|
||||
KernelArgValue::None
|
||||
} else {
|
||||
KernelArgValue::MemObject((*ptr).get_ref()?)
|
||||
}
|
||||
}
|
||||
KernelArgType::MemLocal => KernelArgValue::LocalMem(arg_size),
|
||||
KernelArgType::Sampler => {
|
||||
let ptr: *const cl_sampler = arg_value.cast();
|
||||
KernelArgValue::Sampler((*ptr).get_ref()?)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
k.values.get(arg_index as usize).unwrap().replace(Some(arg));
|
||||
Ok(())
|
||||
} else {
|
||||
Err(CL_INVALID_ARG_INDEX)
|
||||
}
|
||||
|
||||
//• CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
|
||||
//• CL_INVALID_MEM_OBJECT for an argument declared to be a memory object when the specified arg_value is not a valid memory object.
|
||||
//• CL_INVALID_SAMPLER for an argument declared to be of type sampler_t when the specified arg_value is not a valid sampler object.
|
||||
//• CL_INVALID_DEVICE_QUEUE for an argument declared to be of type queue_t when the specified arg_value is not a valid device queue object. This error code is missing before version 2.0.
|
||||
//• CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument that is not a memory object or if the argument is a memory object and arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler).
|
||||
//• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2.
|
||||
//• CL_INVALID_ARG_VALUE if the argument is an image declared with the read_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_WRITE_ONLY or if the image argument is declared with the write_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_READ_ONLY.
|
||||
|
||||
println!("set_kernel_arg not implemented");
|
||||
Err(CL_OUT_OF_HOST_MEMORY)
|
||||
//• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2.
|
||||
}
|
||||
|
||||
pub fn enqueue_ndrange_kernel(
|
||||
command_queue: cl_command_queue,
|
||||
kernel: cl_kernel,
|
||||
work_dim: cl_uint,
|
||||
global_work_offset: *const usize,
|
||||
global_work_size: *const usize,
|
||||
local_work_size: *const usize,
|
||||
num_events_in_wait_list: cl_uint,
|
||||
event_wait_list: *const cl_event,
|
||||
event: *mut cl_event,
|
||||
) -> CLResult<()> {
|
||||
let q = command_queue.get_arc()?;
|
||||
let k = kernel.get_arc()?;
|
||||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
// CL_INVALID_CONTEXT if context associated with command_queue and kernel are not the same
|
||||
if q.context != k.prog.context {
|
||||
return Err(CL_INVALID_CONTEXT);
|
||||
}
|
||||
|
||||
// CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
|
||||
// for device associated with command_queue.
|
||||
if k.prog.status(&q.device) != CL_BUILD_SUCCESS as cl_build_status {
|
||||
return Err(CL_INVALID_PROGRAM_EXECUTABLE);
|
||||
}
|
||||
|
||||
// CL_INVALID_KERNEL_ARGS if the kernel argument values have not been specified.
|
||||
if k.values.iter().any(|v| v.borrow().is_none()) {
|
||||
return Err(CL_INVALID_KERNEL_ARGS);
|
||||
}
|
||||
|
||||
// CL_INVALID_WORK_DIMENSION if work_dim is not a valid value (i.e. a value between 1 and
|
||||
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS).
|
||||
if work_dim == 0 || work_dim > q.device.max_grid_dimensions() {
|
||||
return Err(CL_INVALID_WORK_DIMENSION);
|
||||
}
|
||||
|
||||
// we assume the application gets it right and doesn't pass shorter arrays then actually needed.
|
||||
let global_work_size = unsafe { kernel_work_arr_or_default(global_work_size, work_dim) };
|
||||
let local_work_size = unsafe { kernel_work_arr_or_default(local_work_size, work_dim) };
|
||||
let global_work_offset = unsafe { kernel_work_arr_or_default(global_work_offset, work_dim) };
|
||||
|
||||
if q.device.address_bits() == 32 {
|
||||
for (s, o) in global_work_size.iter().zip(global_work_offset) {
|
||||
// CL_INVALID_GLOBAL_WORK_SIZE if any of the values specified in global_work_size[0], …
|
||||
// global_work_size[work_dim - 1] exceed the maximum value representable by size_t on
|
||||
// the device on which the kernel-instance will be enqueued.
|
||||
if *s > u32::MAX as usize {
|
||||
return Err(CL_INVALID_GLOBAL_WORK_SIZE);
|
||||
}
|
||||
// CL_INVALID_GLOBAL_OFFSET if the value specified in global_work_size + the
|
||||
// corresponding values in global_work_offset for any dimensions is greater than the
|
||||
// maximum value representable by size t on the device on which the kernel-instance
|
||||
// will be enqueued
|
||||
if s + o > u32::MAX as usize {
|
||||
return Err(CL_INVALID_GLOBAL_OFFSET);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CL_INVALID_WORK_ITEM_SIZE if the number of work-items specified in any of
|
||||
// local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding values
|
||||
// specified by CL_DEVICE_MAX_WORK_ITEM_SIZES[0], …, CL_DEVICE_MAX_WORK_ITEM_SIZES[work_dim - 1].
|
||||
if local_work_size.iter().gt(q.device.max_block_sizes().iter()) {
|
||||
return Err(CL_INVALID_WORK_ITEM_SIZE);
|
||||
}
|
||||
|
||||
// If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel
|
||||
// command will trivially succeed after its event dependencies are satisfied and subsequently
|
||||
// update its completion event.
|
||||
let cb: EventSig = if global_work_size.contains(&0) {
|
||||
Box::new(|_, _| Ok(()))
|
||||
} else {
|
||||
k.launch(
|
||||
&q,
|
||||
work_dim,
|
||||
local_work_size,
|
||||
global_work_size,
|
||||
global_work_offset,
|
||||
)
|
||||
};
|
||||
|
||||
create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb)
|
||||
|
||||
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required work-group size for kernel in the program source.
|
||||
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source.
|
||||
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table.
|
||||
//• CL_INVALID_WORK_GROUP_SIZE if the work-group size must be uniform and the local_work_size is not NULL, is not equal to the required work-group size specified in the kernel source, or the global_work_size is not evenly divisible by the local_work_size.
|
||||
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code
|
||||
//• CL_INVALID_IMAGE_SIZE if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue.
|
||||
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is specified as an argument value and the image format (image channel order and data type) is not supported by device associated with queue.
|
||||
//• CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance of kernel on the command-queue because of insufficient resources needed to execute the kernel. For example, the explicitly specified local_work_size causes a failure to execute the kernel because of insufficient resources such as registers or local memory. Another example would be the number of read-only image args used in kernel exceed the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of write-only and read-write image args used in kernel exceed the CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS value for device or the number of samplers used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
|
||||
//• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with image or buffer objects specified as arguments to kernel.
|
||||
//• CL_INVALID_OPERATION if SVM pointers are passed as arguments to a kernel and the device does not support SVM or if system pointers are passed as arguments to a kernel and/or stored inside SVM allocations passed as kernel arguments and the device does not support fine grain system SVM allocations.
|
||||
}
|
||||
|
@@ -30,7 +30,7 @@ impl Context {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_buffer(&self, size: usize) -> CLResult<HashMap<Arc<Device>, PipeResource>> {
|
||||
pub fn create_buffer(&self, size: usize) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
|
||||
let mut res = HashMap::new();
|
||||
for dev in &self.devs {
|
||||
@@ -38,7 +38,7 @@ impl Context {
|
||||
.screen()
|
||||
.resource_create_buffer(adj_size)
|
||||
.ok_or(CL_OUT_OF_RESOURCES);
|
||||
res.insert(Arc::clone(dev), resource?);
|
||||
res.insert(Arc::clone(dev), Arc::new(resource?));
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
@@ -47,7 +47,7 @@ impl Context {
|
||||
&self,
|
||||
size: usize,
|
||||
user_ptr: *mut c_void,
|
||||
) -> CLResult<HashMap<Arc<Device>, PipeResource>> {
|
||||
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
|
||||
let mut res = HashMap::new();
|
||||
for dev in &self.devs {
|
||||
@@ -55,7 +55,7 @@ impl Context {
|
||||
.screen()
|
||||
.resource_create_buffer_from_user(adj_size, user_ptr)
|
||||
.ok_or(CL_OUT_OF_RESOURCES);
|
||||
res.insert(Arc::clone(dev), resource?);
|
||||
res.insert(Arc::clone(dev), Arc::new(resource?));
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
@@ -10,6 +10,8 @@ use crate::core::util::*;
|
||||
use crate::core::version::*;
|
||||
use crate::impl_cl_type_trait;
|
||||
|
||||
use self::mesa_rust::compiler::clc::*;
|
||||
use self::mesa_rust::compiler::nir::*;
|
||||
use self::mesa_rust::pipe::context::*;
|
||||
use self::mesa_rust::pipe::device::load_screens;
|
||||
use self::mesa_rust::pipe::screen::*;
|
||||
@@ -27,7 +29,7 @@ use std::sync::MutexGuard;
|
||||
|
||||
pub struct Device {
|
||||
pub base: CLObjectBase<CL_INVALID_DEVICE>,
|
||||
screen: Arc<PipeScreen>,
|
||||
pub screen: Arc<PipeScreen>,
|
||||
pub cl_version: CLVersion,
|
||||
pub clc_version: CLVersion,
|
||||
pub clc_versions: Vec<cl_name_version>,
|
||||
@@ -36,6 +38,7 @@ pub struct Device {
|
||||
pub extension_string: String,
|
||||
pub extensions: Vec<cl_name_version>,
|
||||
pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
|
||||
pub lib_clc: NirShader,
|
||||
helper_ctx: Mutex<Arc<PipeContext>>,
|
||||
}
|
||||
|
||||
@@ -43,6 +46,11 @@ impl_cl_type_trait!(cl_device_id, Device, CL_INVALID_DEVICE);
|
||||
|
||||
impl Device {
|
||||
fn new(screen: Arc<PipeScreen>) -> Option<Arc<Device>> {
|
||||
if !Self::check_valid(&screen) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
|
||||
let mut d = Self {
|
||||
base: CLObjectBase::new(),
|
||||
helper_ctx: Mutex::new(screen.create_context().unwrap()),
|
||||
@@ -55,12 +63,9 @@ impl Device {
|
||||
extension_string: String::from(""),
|
||||
extensions: Vec::new(),
|
||||
formats: HashMap::new(),
|
||||
lib_clc: lib_clc?,
|
||||
};
|
||||
|
||||
if !d.check_valid() {
|
||||
return None;
|
||||
}
|
||||
|
||||
d.fill_format_tables();
|
||||
|
||||
// check if we are embedded or full profile first
|
||||
@@ -112,17 +117,21 @@ impl Device {
|
||||
}
|
||||
}
|
||||
|
||||
fn check_valid(&self) -> bool {
|
||||
if self.screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0 ||
|
||||
fn check_valid(screen: &PipeScreen) -> bool {
|
||||
if screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0 ||
|
||||
// even though we use PIPE_SHADER_IR_NIR, PIPE_SHADER_IR_NIR_SERIALIZED marks CL support by the driver
|
||||
self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR_SERIALIZED as i32)) == 0
|
||||
screen.shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR_SERIALIZED as i32)) == 0
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// CL_DEVICE_MAX_PARAMETER_SIZE
|
||||
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel
|
||||
if self.param_max_size() < 128 {
|
||||
if ComputeParam::<u64>::compute_param(
|
||||
screen,
|
||||
pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_INPUT_SIZE,
|
||||
) < 128
|
||||
{
|
||||
return false;
|
||||
}
|
||||
true
|
||||
|
@@ -3,37 +3,430 @@ extern crate mesa_rust_gen;
|
||||
extern crate rusticl_opencl_gen;
|
||||
|
||||
use crate::api::icd::*;
|
||||
use crate::core::device::*;
|
||||
use crate::core::event::*;
|
||||
use crate::core::memory::*;
|
||||
use crate::core::program::*;
|
||||
use crate::core::queue::*;
|
||||
use crate::impl_cl_type_trait;
|
||||
|
||||
use self::mesa_rust::compiler::clc::*;
|
||||
use self::mesa_rust::compiler::nir::*;
|
||||
use self::mesa_rust_gen::*;
|
||||
use self::rusticl_opencl_gen::*;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::ptr;
|
||||
use std::sync::Arc;
|
||||
|
||||
// ugh, we are not allowed to take refs, so...
|
||||
pub enum KernelArgValue {
|
||||
None,
|
||||
Constant(Vec<u8>),
|
||||
MemObject(&'static Mem),
|
||||
Sampler(&'static Sampler),
|
||||
LocalMem(usize),
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub enum KernelArgType {
|
||||
Constant, // for anything passed by value
|
||||
Sampler,
|
||||
MemGlobal,
|
||||
MemConstant,
|
||||
MemLocal,
|
||||
}
|
||||
|
||||
#[derive(Hash, PartialEq, Eq)]
|
||||
pub enum InternalKernelArgType {
|
||||
ConstantBuffer,
|
||||
}
|
||||
|
||||
pub struct KernelArg {
|
||||
spirv: spirv::SPIRVKernelArg,
|
||||
pub kind: KernelArgType,
|
||||
pub size: usize,
|
||||
pub offset: usize,
|
||||
pub dead: bool,
|
||||
}
|
||||
|
||||
#[derive(Hash, PartialEq, Eq)]
|
||||
pub struct InternalKernelArg {
|
||||
pub kind: InternalKernelArgType,
|
||||
pub size: usize,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
impl KernelArg {
|
||||
fn from_spirv_nir(spirv: Vec<spirv::SPIRVKernelArg>, nir: &mut NirShader) -> Vec<Self> {
|
||||
let nir_arg_map: HashMap<_, _> = nir
|
||||
.variables_with_mode(
|
||||
nir_variable_mode::nir_var_uniform | nir_variable_mode::nir_var_image,
|
||||
)
|
||||
.map(|v| (v.data.location, v))
|
||||
.collect();
|
||||
let mut res = Vec::new();
|
||||
|
||||
for (i, s) in spirv.into_iter().enumerate() {
|
||||
let nir = nir_arg_map.get(&(i as i32)).unwrap();
|
||||
let kind = match s.address_qualifier {
|
||||
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => {
|
||||
if unsafe { glsl_type_is_sampler(nir.type_) } {
|
||||
KernelArgType::Sampler
|
||||
} else {
|
||||
KernelArgType::Constant
|
||||
}
|
||||
}
|
||||
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_CONSTANT => {
|
||||
KernelArgType::MemConstant
|
||||
}
|
||||
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_LOCAL => {
|
||||
KernelArgType::MemLocal
|
||||
}
|
||||
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_GLOBAL => {
|
||||
KernelArgType::MemGlobal
|
||||
}
|
||||
};
|
||||
|
||||
res.push(Self {
|
||||
spirv: s,
|
||||
size: unsafe { glsl_get_cl_size(nir.type_) } as usize,
|
||||
// we'll update it later in the 2nd pass
|
||||
kind: kind,
|
||||
offset: 0,
|
||||
dead: true,
|
||||
});
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn assign_locations(
|
||||
args: &mut [Self],
|
||||
internal_args: &mut [InternalKernelArg],
|
||||
nir: &mut NirShader,
|
||||
) {
|
||||
for var in nir.variables_with_mode(
|
||||
nir_variable_mode::nir_var_uniform | nir_variable_mode::nir_var_image,
|
||||
) {
|
||||
if let Some(arg) = args.get_mut(var.data.location as usize) {
|
||||
arg.offset = var.data.driver_location as usize;
|
||||
arg.dead = false;
|
||||
} else {
|
||||
internal_args
|
||||
.get_mut(var.data.location as usize - args.len())
|
||||
.unwrap()
|
||||
.offset = var.data.driver_location as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct Kernel {
|
||||
pub base: CLObjectBase<CL_INVALID_KERNEL>,
|
||||
pub prog: Arc<Program>,
|
||||
pub name: String,
|
||||
pub args: Vec<spirv::SPIRVKernelArg>,
|
||||
pub args: Vec<KernelArg>,
|
||||
pub values: Vec<RefCell<Option<KernelArgValue>>>,
|
||||
internal_args: Vec<InternalKernelArg>,
|
||||
nirs: HashMap<Arc<Device>, NirShader>,
|
||||
}
|
||||
|
||||
impl_cl_type_trait!(cl_kernel, Kernel, CL_INVALID_KERNEL);
|
||||
|
||||
fn create_kernel_arr(vals: &[usize], val: u32) -> [u32; 3] {
|
||||
let mut res = [val; 3];
|
||||
for (i, v) in vals.iter().enumerate() {
|
||||
res[i] = (*v).try_into().expect("64 bit work groups not supported");
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
// mostly like clc_spirv_to_dxil
|
||||
// does not DCEe uniforms or images!
|
||||
fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
|
||||
nir.set_workgroup_size(&[0; 3]);
|
||||
nir.structurize();
|
||||
while {
|
||||
let mut progress = false;
|
||||
progress |= nir.pass0(nir_copy_prop);
|
||||
progress |= nir.pass0(nir_opt_copy_prop_vars);
|
||||
progress |= nir.pass0(nir_opt_deref);
|
||||
progress |= nir.pass0(nir_opt_dce);
|
||||
progress |= nir.pass0(nir_opt_undef);
|
||||
progress |= nir.pass0(nir_opt_constant_folding);
|
||||
progress |= nir.pass0(nir_opt_cse);
|
||||
progress |= nir.pass0(nir_lower_vars_to_ssa);
|
||||
progress |= nir.pass0(nir_opt_algebraic);
|
||||
progress
|
||||
} {}
|
||||
nir.inline(lib_clc);
|
||||
nir.remove_non_entrypoints();
|
||||
while {
|
||||
let mut progress = false;
|
||||
progress |= nir.pass0(nir_copy_prop);
|
||||
progress |= nir.pass0(nir_opt_copy_prop_vars);
|
||||
progress |= nir.pass0(nir_opt_deref);
|
||||
progress |= nir.pass0(nir_opt_dce);
|
||||
progress |= nir.pass0(nir_opt_undef);
|
||||
progress |= nir.pass0(nir_opt_constant_folding);
|
||||
progress |= nir.pass0(nir_opt_cse);
|
||||
progress |= nir.pass0(nir_split_var_copies);
|
||||
progress |= nir.pass0(nir_lower_var_copies);
|
||||
progress |= nir.pass0(nir_lower_vars_to_ssa);
|
||||
progress |= nir.pass0(nir_opt_algebraic);
|
||||
progress |= nir.pass1(
|
||||
nir_opt_if,
|
||||
nir_opt_if_options::nir_opt_if_aggressive_last_continue
|
||||
| nir_opt_if_options::nir_opt_if_optimize_phi_true_false,
|
||||
);
|
||||
progress |= nir.pass0(nir_opt_dead_cf);
|
||||
progress |= nir.pass0(nir_opt_remove_phis);
|
||||
// we don't want to be too aggressive here, but it kills a bit of CFG
|
||||
progress |= nir.pass3(nir_opt_peephole_select, 1, true, true);
|
||||
progress |= nir.pass1(
|
||||
nir_lower_vec3_to_vec4,
|
||||
nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform,
|
||||
);
|
||||
progress
|
||||
} {}
|
||||
// TODO variable initializers
|
||||
// TODO lower memcpy
|
||||
nir.pass0(nir_dedup_inline_samplers);
|
||||
nir.pass2(
|
||||
nir_lower_vars_to_explicit_types,
|
||||
nir_variable_mode::nir_var_function_temp,
|
||||
Some(glsl_get_cl_type_size_align),
|
||||
);
|
||||
// TODO printf
|
||||
nir.pass0(nir_split_var_copies);
|
||||
nir.pass0(nir_opt_copy_prop_vars);
|
||||
nir.pass0(nir_lower_var_copies);
|
||||
nir.pass0(nir_lower_vars_to_ssa);
|
||||
nir.pass0(nir_lower_alu);
|
||||
nir.pass0(nir_opt_dce);
|
||||
nir.pass0(nir_opt_deref);
|
||||
}
|
||||
|
||||
fn lower_and_optimize_nir_late(
|
||||
dev: &Device,
|
||||
nir: &mut NirShader,
|
||||
args: usize,
|
||||
) -> Vec<InternalKernelArg> {
|
||||
let mut res = Vec::new();
|
||||
let mut lower_state = rusticl_lower_state::default();
|
||||
|
||||
nir.pass2(
|
||||
nir_remove_dead_variables,
|
||||
nir_variable_mode::nir_var_uniform
|
||||
| nir_variable_mode::nir_var_mem_constant
|
||||
| nir_variable_mode::nir_var_function_temp,
|
||||
ptr::null(),
|
||||
);
|
||||
nir.pass1(nir_lower_readonly_images_to_tex, false);
|
||||
nir.pass2(
|
||||
nir_remove_dead_variables,
|
||||
nir_variable_mode::nir_var_mem_shared | nir_variable_mode::nir_var_function_temp,
|
||||
ptr::null(),
|
||||
);
|
||||
nir.reset_scratch_size();
|
||||
nir.pass2(
|
||||
nir_lower_vars_to_explicit_types,
|
||||
nir_variable_mode::nir_var_mem_constant,
|
||||
Some(glsl_get_cl_type_size_align),
|
||||
);
|
||||
nir.extract_constant_initializers();
|
||||
// TODO printf
|
||||
// TODO 32 bit devices
|
||||
if nir.has_constant() {
|
||||
res.push(InternalKernelArg {
|
||||
kind: InternalKernelArgType::ConstantBuffer,
|
||||
offset: 0,
|
||||
size: 8,
|
||||
});
|
||||
lower_state.const_buf = nir.add_var(
|
||||
nir_variable_mode::nir_var_uniform,
|
||||
unsafe { glsl_uint64_t_type() },
|
||||
args + res.len() - 1,
|
||||
"constant_buffer_addr",
|
||||
);
|
||||
}
|
||||
|
||||
nir.pass2(
|
||||
nir_lower_vars_to_explicit_types,
|
||||
nir_variable_mode::nir_var_mem_shared
|
||||
| nir_variable_mode::nir_var_function_temp
|
||||
| nir_variable_mode::nir_var_uniform
|
||||
| nir_variable_mode::nir_var_mem_global,
|
||||
Some(glsl_get_cl_type_size_align),
|
||||
);
|
||||
nir.pass2(
|
||||
nir_lower_explicit_io,
|
||||
nir_variable_mode::nir_var_mem_global | nir_variable_mode::nir_var_mem_constant,
|
||||
nir_address_format::nir_address_format_64bit_global,
|
||||
);
|
||||
nir.pass1(rusticl_lower_intrinsics, &mut lower_state);
|
||||
nir.pass2(
|
||||
nir_lower_explicit_io,
|
||||
nir_variable_mode::nir_var_mem_shared
|
||||
| nir_variable_mode::nir_var_function_temp
|
||||
| nir_variable_mode::nir_var_uniform,
|
||||
nir_address_format::nir_address_format_32bit_offset_as_64bit,
|
||||
);
|
||||
nir.pass0(nir_lower_system_values);
|
||||
let compute_options = nir_lower_compute_system_values_options::default();
|
||||
nir.pass1(nir_lower_compute_system_values, &compute_options);
|
||||
nir.pass0(nir_opt_deref);
|
||||
nir.pass0(nir_lower_vars_to_ssa);
|
||||
|
||||
// TODO whatever clc is doing here
|
||||
|
||||
nir.pass1(nir_lower_convert_alu_types, None);
|
||||
nir.pass0(nir_opt_dce);
|
||||
dev.screen.finalize_nir(nir);
|
||||
res
|
||||
}
|
||||
|
||||
impl Kernel {
|
||||
pub fn new(name: String, prog: Arc<Program>, args: Vec<spirv::SPIRVKernelArg>) -> Arc<Kernel> {
|
||||
pub fn new(
|
||||
name: String,
|
||||
prog: Arc<Program>,
|
||||
mut nirs: HashMap<Arc<Device>, NirShader>,
|
||||
args: Vec<spirv::SPIRVKernelArg>,
|
||||
) -> Arc<Kernel> {
|
||||
nirs.iter_mut()
|
||||
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc));
|
||||
let nir = nirs.values_mut().next().unwrap();
|
||||
let mut args = KernelArg::from_spirv_nir(args, nir);
|
||||
// can't use vec!...
|
||||
let values = args.iter().map(|_| RefCell::new(None)).collect();
|
||||
let internal_args: HashSet<_> = nirs
|
||||
.iter_mut()
|
||||
.map(|(d, n)| lower_and_optimize_nir_late(d, n, args.len()))
|
||||
.collect();
|
||||
// we want the same internal args for every compiled kernel, for now
|
||||
assert!(internal_args.len() == 1);
|
||||
let mut internal_args = internal_args.into_iter().next().unwrap();
|
||||
|
||||
nirs.values_mut()
|
||||
.for_each(|n| KernelArg::assign_locations(&mut args, &mut internal_args, n));
|
||||
|
||||
Arc::new(Self {
|
||||
base: CLObjectBase::new(),
|
||||
prog: prog,
|
||||
name: name,
|
||||
args: args,
|
||||
values: values,
|
||||
internal_args: internal_args,
|
||||
// caller has to verify all kernels have the same sig
|
||||
nirs: nirs,
|
||||
})
|
||||
}
|
||||
|
||||
// the painful part is, that host threads are allowed to modify the kernel object once it was
|
||||
// enqueued, so return a closure with all req data included.
|
||||
pub fn launch(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
work_dim: u32,
|
||||
block: &[usize],
|
||||
grid: &[usize],
|
||||
offsets: &[usize],
|
||||
) -> EventSig {
|
||||
let nir = self.nirs.get(&q.device).unwrap();
|
||||
let mut block = create_kernel_arr(block, 1);
|
||||
let mut grid = create_kernel_arr(grid, 1);
|
||||
let offsets = create_kernel_arr(offsets, 0);
|
||||
let mut input: Vec<u8> = Vec::new();
|
||||
let mut resource_info = Vec::new();
|
||||
let mut local_size: u32 = nir.shared_size();
|
||||
|
||||
for i in 0..3 {
|
||||
if block[i] == 0 {
|
||||
block[i] = 1;
|
||||
} else {
|
||||
grid[i] /= block[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (arg, val) in self.args.iter().zip(&self.values) {
|
||||
if arg.dead {
|
||||
continue;
|
||||
}
|
||||
match val.borrow().as_ref().unwrap() {
|
||||
KernelArgValue::Constant(c) => input.extend_from_slice(c),
|
||||
KernelArgValue::MemObject(mem) => {
|
||||
input.extend_from_slice(&mem.offset.to_ne_bytes());
|
||||
resource_info.push((Some(mem.get_res_of_dev(&q.device).clone()), arg.offset));
|
||||
}
|
||||
KernelArgValue::LocalMem(size) => {
|
||||
// TODO 32 bit
|
||||
input.extend_from_slice(&[0; 8]);
|
||||
local_size += *size as u32;
|
||||
}
|
||||
KernelArgValue::None => {
|
||||
assert!(
|
||||
arg.kind == KernelArgType::MemGlobal
|
||||
|| arg.kind == KernelArgType::MemConstant
|
||||
);
|
||||
input.extend_from_slice(&[0; 8]);
|
||||
}
|
||||
_ => panic!("unhandled arg type"),
|
||||
}
|
||||
}
|
||||
|
||||
for arg in &self.internal_args {
|
||||
match arg.kind {
|
||||
InternalKernelArgType::ConstantBuffer => {
|
||||
input.extend_from_slice(&[0; 8]);
|
||||
let buf = nir.get_constant_buffer();
|
||||
let res = Arc::new(
|
||||
q.device
|
||||
.screen()
|
||||
.resource_create_buffer(buf.len() as u32)
|
||||
.unwrap(),
|
||||
);
|
||||
q.device.helper_ctx().buffer_subdata(
|
||||
&res,
|
||||
0,
|
||||
buf.as_ptr().cast(),
|
||||
buf.len() as u32,
|
||||
);
|
||||
resource_info.push((Some(res), arg.offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let cso = q
|
||||
.device
|
||||
.helper_ctx()
|
||||
.create_compute_state(nir, input.len() as u32, local_size);
|
||||
|
||||
Box::new(move |_, ctx| {
|
||||
let mut input = input.clone();
|
||||
let mut resources = Vec::with_capacity(resource_info.len());
|
||||
let mut globals: Vec<*mut u32> = Vec::new();
|
||||
|
||||
for (res, offset) in resource_info.clone() {
|
||||
resources.push(res);
|
||||
globals.push(unsafe { input.as_mut_ptr().add(offset) }.cast());
|
||||
}
|
||||
|
||||
ctx.bind_compute_state(cso);
|
||||
ctx.set_global_binding(resources.as_slice(), &mut globals);
|
||||
ctx.launch_grid(work_dim, block, grid, offsets, &input);
|
||||
ctx.clear_global_binding(globals.len() as u32);
|
||||
ctx.delete_compute_state(cso);
|
||||
ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER);
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn access_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_access_qualifier {
|
||||
let aq = self.args[idx as usize].access_qualifier;
|
||||
let aq = self.args[idx as usize].spirv.access_qualifier;
|
||||
|
||||
if aq
|
||||
== clc_kernel_arg_access_qualifier::CLC_KERNEL_ARG_ACCESS_READ
|
||||
@@ -50,7 +443,7 @@ impl Kernel {
|
||||
}
|
||||
|
||||
pub fn address_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_address_qualifier {
|
||||
match self.args[idx as usize].address_qualifier {
|
||||
match self.args[idx as usize].spirv.address_qualifier {
|
||||
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => {
|
||||
CL_KERNEL_ARG_ADDRESS_PRIVATE
|
||||
}
|
||||
@@ -67,7 +460,7 @@ impl Kernel {
|
||||
}
|
||||
|
||||
pub fn type_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_type_qualifier {
|
||||
let tq = self.args[idx as usize].type_qualifier;
|
||||
let tq = self.args[idx as usize].spirv.type_qualifier;
|
||||
let zero = clc_kernel_arg_type_qualifier(0);
|
||||
let mut res = CL_KERNEL_ARG_TYPE_NONE;
|
||||
|
||||
@@ -87,10 +480,10 @@ impl Kernel {
|
||||
}
|
||||
|
||||
pub fn arg_name(&self, idx: cl_uint) -> &String {
|
||||
&self.args[idx as usize].name
|
||||
&self.args[idx as usize].spirv.name
|
||||
}
|
||||
|
||||
pub fn arg_type_name(&self, idx: cl_uint) -> &String {
|
||||
&self.args[idx as usize].type_name
|
||||
&self.args[idx as usize].spirv.type_name
|
||||
}
|
||||
}
|
||||
|
@@ -36,7 +36,7 @@ pub struct Mem {
|
||||
pub image_desc: cl_image_desc,
|
||||
pub image_elem_size: u8,
|
||||
pub cbs: Mutex<Vec<Box<dyn Fn(cl_mem)>>>,
|
||||
res: Option<HashMap<Arc<Device>, PipeResource>>,
|
||||
res: Option<HashMap<Arc<Device>, Arc<PipeResource>>>,
|
||||
maps: Mutex<HashMap<*mut c_void, (u32, PipeTransfer)>>,
|
||||
}
|
||||
|
||||
@@ -187,7 +187,7 @@ impl Mem {
|
||||
ptr::eq(a, b)
|
||||
}
|
||||
|
||||
fn get_res(&self) -> &HashMap<Arc<Device>, PipeResource> {
|
||||
fn get_res(&self) -> &HashMap<Arc<Device>, Arc<PipeResource>> {
|
||||
self.parent
|
||||
.as_ref()
|
||||
.map_or(self, |p| p.as_ref())
|
||||
@@ -196,6 +196,10 @@ impl Mem {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_res_of_dev(&self, dev: &Arc<Device>) -> &Arc<PipeResource> {
|
||||
self.get_res().get(dev).unwrap()
|
||||
}
|
||||
|
||||
fn to_parent<'a>(&'a self, offset: &mut usize) -> &'a Self {
|
||||
if let Some(parent) = &self.parent {
|
||||
offset.add_assign(self.offset);
|
||||
|
@@ -1,4 +1,5 @@
|
||||
extern crate mesa_rust;
|
||||
extern crate mesa_rust_gen;
|
||||
extern crate rusticl_opencl_gen;
|
||||
|
||||
use crate::api::icd::*;
|
||||
@@ -7,6 +8,8 @@ use crate::core::device::*;
|
||||
use crate::impl_cl_type_trait;
|
||||
|
||||
use self::mesa_rust::compiler::clc::*;
|
||||
use self::mesa_rust::compiler::nir::*;
|
||||
use self::mesa_rust_gen::*;
|
||||
use self::rusticl_opencl_gen::*;
|
||||
|
||||
use std::collections::HashMap;
|
||||
@@ -227,4 +230,28 @@ impl Program {
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn nirs(&self, kernel: &str) -> HashMap<Arc<Device>, NirShader> {
|
||||
let mut lock = self.build_info();
|
||||
let mut res = HashMap::new();
|
||||
for d in &self.devs {
|
||||
let info = Self::dev_build_info(&mut lock, d);
|
||||
if info.status != CL_BUILD_SUCCESS as cl_build_status {
|
||||
continue;
|
||||
}
|
||||
let nir = info
|
||||
.spirv
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.to_nir(
|
||||
kernel,
|
||||
d.screen
|
||||
.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE),
|
||||
&d.lib_clc,
|
||||
)
|
||||
.unwrap();
|
||||
res.insert(d.clone(), nir);
|
||||
}
|
||||
res
|
||||
}
|
||||
}
|
||||
|
@@ -1,6 +1,9 @@
|
||||
extern crate mesa_rust_gen;
|
||||
extern crate mesa_rust_util;
|
||||
|
||||
use crate::compiler::nir::*;
|
||||
use crate::pipe::screen::*;
|
||||
|
||||
use self::mesa_rust_gen::*;
|
||||
use self::mesa_rust_util::string::*;
|
||||
|
||||
@@ -167,6 +170,70 @@ impl SPIRVBin {
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_spirv_options(library: bool, clc_shader: *const nir_shader) -> spirv_to_nir_options {
|
||||
spirv_to_nir_options {
|
||||
create_library: library,
|
||||
environment: nir_spirv_execution_environment::NIR_SPIRV_OPENCL,
|
||||
clc_shader: clc_shader,
|
||||
float_controls_execution_mode: float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32
|
||||
as u16,
|
||||
|
||||
caps: spirv_supported_capabilities {
|
||||
address: true,
|
||||
float64: true,
|
||||
int8: true,
|
||||
int16: true,
|
||||
int64: true,
|
||||
kernel: true,
|
||||
kernel_image: true,
|
||||
linkage: true,
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
constant_addr_format: nir_address_format::nir_address_format_64bit_global,
|
||||
global_addr_format: nir_address_format::nir_address_format_64bit_global, // TODO 32 bit devices
|
||||
shared_addr_format: nir_address_format::nir_address_format_32bit_offset_as_64bit,
|
||||
temp_addr_format: nir_address_format::nir_address_format_32bit_offset_as_64bit,
|
||||
|
||||
// default
|
||||
debug: spirv_to_nir_options__bindgen_ty_1::default(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_nir(
|
||||
&self,
|
||||
entry_point: &str,
|
||||
nir_options: *const nir_shader_compiler_options,
|
||||
libclc: &NirShader,
|
||||
) -> Option<NirShader> {
|
||||
let c_entry = CString::new(entry_point.as_bytes()).unwrap();
|
||||
let spirv_options = Self::get_spirv_options(false, libclc.get_nir());
|
||||
let nir = unsafe {
|
||||
spirv_to_nir(
|
||||
self.spirv.data.cast(),
|
||||
self.spirv.size / 4,
|
||||
ptr::null_mut(), // spec
|
||||
0, // spec count
|
||||
gl_shader_stage::MESA_SHADER_KERNEL,
|
||||
c_entry.as_ptr(),
|
||||
&spirv_options,
|
||||
nir_options,
|
||||
)
|
||||
};
|
||||
|
||||
NirShader::new(nir)
|
||||
}
|
||||
|
||||
pub fn get_lib_clc(screen: &PipeScreen) -> Option<NirShader> {
|
||||
let nir_options = screen.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE);
|
||||
let spirv_options = Self::get_spirv_options(true, ptr::null());
|
||||
let shader_cache = screen.shader_cache();
|
||||
NirShader::new(unsafe {
|
||||
nir_load_libclc_shader(64, shader_cache, &spirv_options, nir_options)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SPIRVBin {
|
||||
|
@@ -2,12 +2,215 @@ extern crate mesa_rust_gen;
|
||||
|
||||
use self::mesa_rust_gen::*;
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::ffi::c_void;
|
||||
use std::ffi::CString;
|
||||
use std::marker::PhantomData;
|
||||
use std::ptr;
|
||||
use std::ptr::NonNull;
|
||||
use std::slice;
|
||||
|
||||
// from https://internals.rust-lang.org/t/discussion-on-offset-of/7440/2
|
||||
macro_rules! offset_of {
|
||||
($Struct:path, $field:ident) => {{
|
||||
// Using a separate function to minimize unhygienic hazards
|
||||
// (e.g. unsafety of #[repr(packed)] field borrows).
|
||||
// Uncomment `const` when `const fn`s can juggle pointers.
|
||||
/*const*/
|
||||
fn offset() -> usize {
|
||||
let u = std::mem::MaybeUninit::<$Struct>::uninit();
|
||||
// Use pattern-matching to avoid accidentally going through Deref.
|
||||
let &$Struct { $field: ref f, .. } = unsafe { &*u.as_ptr() };
|
||||
let o = (f as *const _ as usize).wrapping_sub(&u as *const _ as usize);
|
||||
// Triple check that we are within `u` still.
|
||||
assert!((0..=std::mem::size_of_val(&u)).contains(&o));
|
||||
o
|
||||
}
|
||||
offset()
|
||||
}};
|
||||
}
|
||||
|
||||
pub struct ExecListIter<'a, T> {
|
||||
n: &'a mut exec_node,
|
||||
offset: usize,
|
||||
_marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T> ExecListIter<'a, T> {
|
||||
fn new(l: &'a mut exec_list, offset: usize) -> Self {
|
||||
Self {
|
||||
n: &mut l.head_sentinel,
|
||||
offset: offset,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: 'a> Iterator for ExecListIter<'a, T> {
|
||||
type Item = &'a mut T;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.n = unsafe { &mut *self.n.next };
|
||||
if self.n.next.is_null() {
|
||||
None
|
||||
} else {
|
||||
let t: *mut c_void = (self.n as *mut exec_node).cast();
|
||||
Some(unsafe { &mut *(t.sub(self.offset).cast()) })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NirShader {
|
||||
nir: NonNull<nir_shader>,
|
||||
}
|
||||
|
||||
impl NirShader {
|
||||
pub fn new(nir: *mut nir_shader) -> Option<Self> {
|
||||
NonNull::new(nir).map(|nir| Self { nir: nir })
|
||||
}
|
||||
|
||||
pub fn print(&self) {
|
||||
unsafe { nir_print_shader(self.nir.as_ptr(), stderr) };
|
||||
}
|
||||
|
||||
pub fn get_nir(&self) -> *mut nir_shader {
|
||||
self.nir.as_ptr()
|
||||
}
|
||||
|
||||
pub fn dup_for_driver(&self) -> *mut nir_shader {
|
||||
unsafe { nir_shader_clone(ptr::null_mut(), self.nir.as_ptr()) }
|
||||
}
|
||||
|
||||
pub fn pass0<R>(&mut self, pass: unsafe extern "C" fn(*mut nir_shader) -> R) -> R {
|
||||
unsafe { pass(self.nir.as_ptr()) }
|
||||
}
|
||||
|
||||
pub fn pass1<R, A>(
|
||||
&mut self,
|
||||
pass: unsafe extern "C" fn(*mut nir_shader, a: A) -> R,
|
||||
a: A,
|
||||
) -> R {
|
||||
unsafe { pass(self.nir.as_ptr(), a) }
|
||||
}
|
||||
|
||||
pub fn pass2<R, A, B>(
|
||||
&mut self,
|
||||
pass: unsafe extern "C" fn(*mut nir_shader, a: A, b: B) -> R,
|
||||
a: A,
|
||||
b: B,
|
||||
) -> R {
|
||||
unsafe { pass(self.nir.as_ptr(), a, b) }
|
||||
}
|
||||
|
||||
pub fn pass3<R, A, B, C>(
|
||||
&mut self,
|
||||
pass: unsafe extern "C" fn(*mut nir_shader, a: A, b: B, c: C) -> R,
|
||||
a: A,
|
||||
b: B,
|
||||
c: C,
|
||||
) -> R {
|
||||
unsafe { pass(self.nir.as_ptr(), a, b, c) }
|
||||
}
|
||||
|
||||
pub fn structurize(&mut self) {
|
||||
self.pass0(nir_lower_goto_ifs);
|
||||
self.pass0(nir_opt_dead_cf);
|
||||
}
|
||||
|
||||
pub fn inline(&mut self, libclc: &NirShader) {
|
||||
self.pass1(
|
||||
nir_lower_variable_initializers,
|
||||
nir_variable_mode::nir_var_function_temp,
|
||||
);
|
||||
self.pass0(nir_lower_returns);
|
||||
self.pass1(nir_lower_libclc, libclc.nir.as_ptr());
|
||||
self.pass0(nir_inline_functions);
|
||||
}
|
||||
|
||||
pub fn remove_non_entrypoints(&mut self) {
|
||||
unsafe { nir_remove_non_entrypoints(self.nir.as_ptr()) };
|
||||
}
|
||||
|
||||
pub fn variables(&mut self) -> ExecListIter<nir_variable> {
|
||||
ExecListIter::new(
|
||||
&mut unsafe { self.nir.as_mut() }.variables,
|
||||
offset_of!(nir_variable, node),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn reset_scratch_size(&self) {
|
||||
unsafe {
|
||||
(*self.nir.as_ptr()).scratch_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shared_size(&self) -> u32 {
|
||||
unsafe { (*self.nir.as_ptr()).info.shared_size }
|
||||
}
|
||||
|
||||
pub fn set_workgroup_size(&self, workgroup: &[u16; 3]) {
|
||||
let mut nir = self.nir.as_ptr();
|
||||
unsafe {
|
||||
(*nir).info.set_workgroup_size_variable(workgroup[0] == 0);
|
||||
(*nir).info.workgroup_size[0] = workgroup[0];
|
||||
(*nir).info.workgroup_size[1] = workgroup[1];
|
||||
(*nir).info.workgroup_size[2] = workgroup[2];
|
||||
}
|
||||
}
|
||||
|
||||
pub fn variables_with_mode(
|
||||
&mut self,
|
||||
mode: nir_variable_mode,
|
||||
) -> impl Iterator<Item = &mut nir_variable> {
|
||||
self.variables()
|
||||
.filter(move |v| v.data.mode() & mode.0 != 0)
|
||||
}
|
||||
|
||||
pub fn extract_constant_initializers(&self) {
|
||||
let nir = self.nir.as_ptr();
|
||||
unsafe {
|
||||
if (*nir).constant_data_size > 0 {
|
||||
assert!((*nir).constant_data.is_null());
|
||||
(*nir).constant_data = rzalloc_size(nir.cast(), (*nir).constant_data_size as usize);
|
||||
nir_gather_explicit_io_initializers(
|
||||
nir,
|
||||
(*nir).constant_data,
|
||||
(*nir).constant_data_size as usize,
|
||||
nir_variable_mode::nir_var_mem_constant,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_constant(&self) -> bool {
|
||||
unsafe {
|
||||
!self.nir.as_ref().constant_data.is_null() && self.nir.as_ref().constant_data_size > 0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_constant_buffer(&self) -> &[u8] {
|
||||
unsafe {
|
||||
let nir = self.nir.as_ref();
|
||||
slice::from_raw_parts(nir.constant_data.cast(), nir.constant_data_size as usize)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_var(
|
||||
&self,
|
||||
mode: nir_variable_mode,
|
||||
glsl_type: *const glsl_type,
|
||||
loc: usize,
|
||||
name: &str,
|
||||
) -> *mut nir_variable {
|
||||
let name = CString::new(name).unwrap();
|
||||
unsafe {
|
||||
let var = nir_variable_create(self.nir.as_ptr(), mode, glsl_type, name.as_ptr());
|
||||
(*var).data.location = loc.try_into().unwrap();
|
||||
var
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for NirShader {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ralloc_free(self.nir.as_ptr().cast()) };
|
||||
|
@@ -1,5 +1,6 @@
|
||||
extern crate mesa_rust_gen;
|
||||
|
||||
use crate::compiler::nir::*;
|
||||
use crate::pipe::resource::*;
|
||||
use crate::pipe::transfer::*;
|
||||
|
||||
@@ -97,6 +98,84 @@ impl PipeContext {
|
||||
|
||||
unsafe { self.pipe.as_ref().blit.unwrap()(self.pipe.as_ptr(), &blit_info) }
|
||||
}
|
||||
|
||||
pub fn create_compute_state(
|
||||
&self,
|
||||
nir: &NirShader,
|
||||
input_mem: u32,
|
||||
local_mem: u32,
|
||||
) -> *mut c_void {
|
||||
let state = pipe_compute_state {
|
||||
ir_type: pipe_shader_ir::PIPE_SHADER_IR_NIR,
|
||||
prog: nir.dup_for_driver().cast(),
|
||||
req_input_mem: input_mem,
|
||||
req_local_mem: local_mem,
|
||||
req_private_mem: 0,
|
||||
};
|
||||
unsafe { self.pipe.as_ref().create_compute_state.unwrap()(self.pipe.as_ptr(), &state) }
|
||||
}
|
||||
|
||||
pub fn bind_compute_state(&self, state: *mut c_void) {
|
||||
unsafe { self.pipe.as_ref().bind_compute_state.unwrap()(self.pipe.as_ptr(), state) }
|
||||
}
|
||||
|
||||
pub fn delete_compute_state(&self, state: *mut c_void) {
|
||||
unsafe { self.pipe.as_ref().delete_compute_state.unwrap()(self.pipe.as_ptr(), state) }
|
||||
}
|
||||
|
||||
pub fn launch_grid(
|
||||
&self,
|
||||
work_dim: u32,
|
||||
block: [u32; 3],
|
||||
grid: [u32; 3],
|
||||
grid_base: [u32; 3],
|
||||
input: &[u8],
|
||||
) {
|
||||
let info = pipe_grid_info {
|
||||
pc: 0,
|
||||
input: input.as_ptr().cast(),
|
||||
work_dim: work_dim,
|
||||
block: block,
|
||||
last_block: [0; 3],
|
||||
grid: grid,
|
||||
grid_base: grid_base,
|
||||
indirect: ptr::null_mut(),
|
||||
indirect_offset: 0,
|
||||
};
|
||||
unsafe { self.pipe.as_ref().launch_grid.unwrap()(self.pipe.as_ptr(), &info) }
|
||||
}
|
||||
|
||||
pub fn set_global_binding(&self, res: &[Option<Arc<PipeResource>>], out: &mut [*mut u32]) {
|
||||
let mut res: Vec<_> = res
|
||||
.iter()
|
||||
.map(|o| o.as_ref().map_or(ptr::null_mut(), |r| r.pipe()))
|
||||
.collect();
|
||||
unsafe {
|
||||
self.pipe.as_ref().set_global_binding.unwrap()(
|
||||
self.pipe.as_ptr(),
|
||||
0,
|
||||
res.len() as u32,
|
||||
res.as_mut_ptr(),
|
||||
out.as_mut_ptr(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear_global_binding(&self, count: u32) {
|
||||
unsafe {
|
||||
self.pipe.as_ref().set_global_binding.unwrap()(
|
||||
self.pipe.as_ptr(),
|
||||
0,
|
||||
count,
|
||||
ptr::null_mut(),
|
||||
ptr::null_mut(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn memory_barrier(&self, barriers: u32) {
|
||||
unsafe { self.pipe.as_ref().memory_barrier.unwrap()(self.pipe.as_ptr(), barriers) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PipeContext {
|
||||
@@ -109,8 +188,14 @@ impl Drop for PipeContext {
|
||||
|
||||
fn has_required_cbs(c: &pipe_context) -> bool {
|
||||
c.destroy.is_some()
|
||||
&& c.bind_compute_state.is_some()
|
||||
&& c.blit.is_some()
|
||||
&& c.buffer_map.is_some()
|
||||
&& c.buffer_subdata.is_some()
|
||||
&& c.buffer_unmap.is_some()
|
||||
&& c.create_compute_state.is_some()
|
||||
&& c.delete_compute_state.is_some()
|
||||
&& c.launch_grid.is_some()
|
||||
&& c.memory_barrier.is_some()
|
||||
&& c.set_global_binding.is_some()
|
||||
}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
extern crate mesa_rust_gen;
|
||||
extern crate mesa_rust_util;
|
||||
|
||||
use crate::compiler::nir::NirShader;
|
||||
use crate::pipe::context::*;
|
||||
use crate::pipe::device::*;
|
||||
use crate::pipe::resource::*;
|
||||
@@ -151,6 +152,39 @@ impl PipeScreen {
|
||||
let s = &mut unsafe { *self.screen };
|
||||
unsafe { s.is_format_supported.unwrap()(self.screen, format, target, 0, 0, bindings) }
|
||||
}
|
||||
|
||||
pub fn nir_shader_compiler_options(
|
||||
&self,
|
||||
shader: pipe_shader_type,
|
||||
) -> *const nir_shader_compiler_options {
|
||||
unsafe {
|
||||
(*self.screen).get_compiler_options.unwrap()(
|
||||
self.screen,
|
||||
pipe_shader_ir::PIPE_SHADER_IR_NIR,
|
||||
shader,
|
||||
)
|
||||
.cast()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shader_cache(&self) -> *mut disk_cache {
|
||||
let s = &mut unsafe { *self.screen };
|
||||
|
||||
if let Some(func) = s.get_disk_shader_cache {
|
||||
unsafe { func(self.screen) }
|
||||
} else {
|
||||
ptr::null_mut()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finalize_nir(&self, nir: &NirShader) {
|
||||
let s = &mut unsafe { *self.screen };
|
||||
if let Some(func) = s.finalize_nir {
|
||||
unsafe {
|
||||
func(s, nir.get_nir().cast());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PipeScreen {
|
||||
@@ -165,6 +199,7 @@ fn has_required_cbs(screen: *mut pipe_screen) -> bool {
|
||||
let s = unsafe { *screen };
|
||||
s.context_create.is_some()
|
||||
&& s.destroy.is_some()
|
||||
&& s.get_compiler_options.is_some()
|
||||
&& s.get_compute_param.is_some()
|
||||
&& s.get_name.is_some()
|
||||
&& s.get_param.is_some()
|
||||
|
@@ -149,13 +149,16 @@ rusticl_mesa_bindings_inline_wrapper = static_library(
|
||||
'mesa_bindings_inline_wrapper',
|
||||
[
|
||||
'rusticl_mesa_inline_bindings_wrapper.c',
|
||||
'rusticl_mesa_inline_bindings_wrapper.h'
|
||||
'rusticl_mesa_inline_bindings_wrapper.h',
|
||||
'rusticl_nir.c',
|
||||
'rusticl_nir.h',
|
||||
],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
include_directories : [
|
||||
inc_gallium,
|
||||
inc_gallium_aux,
|
||||
inc_include,
|
||||
inc_nir,
|
||||
inc_src,
|
||||
],
|
||||
c_args : pre_args,
|
||||
@@ -179,16 +182,23 @@ rusticl_mesa_bindings_rs = rust.bindgen(
|
||||
args : [
|
||||
rusticl_bindgen_args,
|
||||
'--whitelist-function', 'clc_.*',
|
||||
'--whitelist-function', 'glsl_.*',
|
||||
'--whitelist-function', 'nir_.*',
|
||||
'--whitelist-function', 'pipe_.*',
|
||||
'--whitelist-function', 'ralloc_.*',
|
||||
'--whitelist-function', 'rusticl_.*',
|
||||
'--whitelist-function', 'rz?alloc_.*',
|
||||
'--whitelist-function', 'spirv_.*',
|
||||
'--whitelist-type', 'pipe_endian',
|
||||
'--whitelist-type', 'clc_kernel_arg_access_qualifier',
|
||||
'--bitfield-enum', 'clc_kernel_arg_access_qualifier',
|
||||
'--whitelist-type', 'clc_kernel_arg_type_qualifier',
|
||||
'--bitfield-enum', 'clc_kernel_arg_type_qualifier',
|
||||
'--bitfield-enum', 'nir_opt_if_options',
|
||||
'--bitfield-enum', 'nir_variable_mode',
|
||||
'--whitelist-type', 'float_controls',
|
||||
'--whitelist-var', 'PIPE_.*',
|
||||
'--bitfield-enum', 'pipe_map_flags',
|
||||
'--allowlist-var', 'stderr',
|
||||
],
|
||||
)
|
||||
|
||||
|
@@ -1,11 +1,13 @@
|
||||
#include "rusticl_mesa_inline_bindings_wrapper.h"
|
||||
|
||||
#include "compiler/clc/clc.h"
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_types.h"
|
||||
#include "spirv/nir_spirv.h"
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe-loader/pipe_loader.h"
|
||||
|
||||
#include "rusticl_nir.h"
|
||||
|
38
src/gallium/frontends/rusticl/rusticl_nir.c
Normal file
38
src/gallium/frontends/rusticl/rusticl_nir.c
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "rusticl_nir.h"
|
||||
|
||||
static bool
|
||||
rusticl_lower_intrinsics_filter(const nir_instr* instr, const void* state)
|
||||
{
|
||||
return instr->type == nir_instr_type_intrinsic;
|
||||
}
|
||||
|
||||
static nir_ssa_def*
|
||||
rusticl_lower_intrinsics_instr(
|
||||
nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void* _state
|
||||
) {
|
||||
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
|
||||
struct rusticl_lower_state *state = _state;
|
||||
|
||||
switch (intrinsic->intrinsic) {
|
||||
case nir_intrinsic_load_constant_base_ptr:
|
||||
return nir_load_var(b, state->const_buf);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state* state)
|
||||
{
|
||||
return nir_shader_lower_instructions(
|
||||
nir,
|
||||
rusticl_lower_intrinsics_filter,
|
||||
rusticl_lower_intrinsics_instr,
|
||||
state
|
||||
);
|
||||
}
|
5
src/gallium/frontends/rusticl/rusticl_nir.h
Normal file
5
src/gallium/frontends/rusticl/rusticl_nir.h
Normal file
@@ -0,0 +1,5 @@
|
||||
struct rusticl_lower_state {
|
||||
nir_variable *const_buf;
|
||||
};
|
||||
|
||||
bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);
|
Reference in New Issue
Block a user