rusticl/mem: implement copies between buffers and images
v2: Use the pitches from the pipe_transfer_map (Jason) Signed-off-by: Karol Herbst <kherbst@redhat.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
@@ -1073,33 +1073,51 @@ extern "C" fn cl_enqueue_copy_image(
|
||||
}
|
||||
|
||||
extern "C" fn cl_enqueue_copy_image_to_buffer(
|
||||
_command_queue: cl_command_queue,
|
||||
_src_image: cl_mem,
|
||||
_dst_buffer: cl_mem,
|
||||
_src_origin: *const usize,
|
||||
_region: *const usize,
|
||||
_dst_offset: usize,
|
||||
_num_events_in_wait_list: cl_uint,
|
||||
_event_wait_list: *const cl_event,
|
||||
_event: *mut cl_event,
|
||||
command_queue: cl_command_queue,
|
||||
src_image: cl_mem,
|
||||
dst_buffer: cl_mem,
|
||||
src_origin: *const usize,
|
||||
region: *const usize,
|
||||
dst_offset: usize,
|
||||
num_events_in_wait_list: cl_uint,
|
||||
event_wait_list: *const cl_event,
|
||||
event: *mut cl_event,
|
||||
) -> cl_int {
|
||||
println!("cl_enqueue_copy_image_to_buffer not implemented");
|
||||
CL_OUT_OF_HOST_MEMORY
|
||||
match_err!(enqueue_copy_image_to_buffer(
|
||||
command_queue,
|
||||
src_image,
|
||||
dst_buffer,
|
||||
src_origin,
|
||||
region,
|
||||
dst_offset,
|
||||
num_events_in_wait_list,
|
||||
event_wait_list,
|
||||
event,
|
||||
))
|
||||
}
|
||||
|
||||
extern "C" fn cl_enqueue_copy_buffer_to_image(
|
||||
_command_queue: cl_command_queue,
|
||||
_src_buffer: cl_mem,
|
||||
_dst_image: cl_mem,
|
||||
_src_offset: usize,
|
||||
_dst_origin: *const usize,
|
||||
_region: *const usize,
|
||||
_num_events_in_wait_list: cl_uint,
|
||||
_event_wait_list: *const cl_event,
|
||||
_event: *mut cl_event,
|
||||
command_queue: cl_command_queue,
|
||||
src_buffer: cl_mem,
|
||||
dst_image: cl_mem,
|
||||
src_offset: usize,
|
||||
dst_origin: *const usize,
|
||||
region: *const usize,
|
||||
num_events_in_wait_list: cl_uint,
|
||||
event_wait_list: *const cl_event,
|
||||
event: *mut cl_event,
|
||||
) -> cl_int {
|
||||
println!("cl_enqueue_copy_buffer_to_image not implemented");
|
||||
CL_OUT_OF_HOST_MEMORY
|
||||
match_err!(enqueue_copy_buffer_to_image(
|
||||
command_queue,
|
||||
src_buffer,
|
||||
dst_image,
|
||||
src_offset,
|
||||
dst_origin,
|
||||
region,
|
||||
num_events_in_wait_list,
|
||||
event_wait_list,
|
||||
event,
|
||||
))
|
||||
}
|
||||
|
||||
extern "C" fn cl_enqueue_map_buffer(
|
||||
|
@@ -16,6 +16,7 @@ use self::mesa_rust_util::properties::Properties;
|
||||
use self::mesa_rust_util::ptr::*;
|
||||
use self::rusticl_opencl_gen::*;
|
||||
|
||||
use std::cell::Cell;
|
||||
use std::cmp::Ordering;
|
||||
use std::os::raw::c_void;
|
||||
use std::ptr;
|
||||
@@ -1502,11 +1503,11 @@ pub fn enqueue_map_buffer(
|
||||
event: *mut cl_event,
|
||||
) -> CLResult<*mut c_void> {
|
||||
let q = command_queue.get_arc()?;
|
||||
let b = buffer.get_ref()?;
|
||||
let b = buffer.get_arc()?;
|
||||
let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?;
|
||||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
validate_map_flags(b, map_flags)?;
|
||||
validate_map_flags(&b, map_flags)?;
|
||||
|
||||
// CL_INVALID_VALUE if region being mapped given by (offset, size) is out of bounds or if size
|
||||
// is 0
|
||||
@@ -1525,17 +1526,37 @@ pub fn enqueue_map_buffer(
|
||||
return Err(CL_INVALID_CONTEXT);
|
||||
}
|
||||
|
||||
create_and_queue(
|
||||
q.clone(),
|
||||
CL_COMMAND_MAP_BUFFER,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
// we don't really have anything to do here?
|
||||
Box::new(|_, _| Ok(())),
|
||||
)?;
|
||||
if block {
|
||||
let ptr = Arc::new(Cell::new(Ok(ptr::null_mut())));
|
||||
let cloned = ptr.clone();
|
||||
create_and_queue(
|
||||
q,
|
||||
CL_COMMAND_MAP_BUFFER,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
// we don't really have anything to do here?
|
||||
Box::new(move |q, ctx| {
|
||||
cloned.set(b.map_buffer(q, Some(ctx), offset, size));
|
||||
Ok(())
|
||||
}),
|
||||
)?;
|
||||
|
||||
ptr.get()
|
||||
} else {
|
||||
create_and_queue(
|
||||
q.clone(),
|
||||
CL_COMMAND_MAP_BUFFER,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
// we don't really have anything to do here?
|
||||
Box::new(|_, _| Ok(())),
|
||||
)?;
|
||||
|
||||
b.map_buffer(&q, None, offset, size)
|
||||
}
|
||||
|
||||
b.map_buffer(&q, offset, size, block)
|
||||
// TODO
|
||||
// CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for the device associated with queue. This error code is missing before version 1.1.
|
||||
// CL_MAP_FAILURE if there is a failure to map the requested region into the host address space. This error cannot occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR.
|
||||
@@ -1815,6 +1836,106 @@ pub fn enqueue_fill_image(
|
||||
//image are not supported by device associated with queue.
|
||||
}
|
||||
|
||||
pub fn enqueue_copy_buffer_to_image(
|
||||
command_queue: cl_command_queue,
|
||||
src_buffer: cl_mem,
|
||||
dst_image: cl_mem,
|
||||
src_offset: usize,
|
||||
dst_origin: *const usize,
|
||||
region: *const usize,
|
||||
num_events_in_wait_list: cl_uint,
|
||||
event_wait_list: *const cl_event,
|
||||
event: *mut cl_event,
|
||||
) -> CLResult<()> {
|
||||
let q = command_queue.get_arc()?;
|
||||
let src = src_buffer.get_arc()?;
|
||||
let dst = dst_image.get_arc()?;
|
||||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
// CL_INVALID_CONTEXT if the context associated with command_queue, src_buffer and dst_image
|
||||
// are not the same
|
||||
if q.context != src.context || q.context != dst.context {
|
||||
return Err(CL_INVALID_CONTEXT);
|
||||
}
|
||||
|
||||
// CL_INVALID_VALUE if dst_origin or region is NULL.
|
||||
if dst_origin.is_null() || region.is_null() {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
let region = unsafe { CLVec::from_raw(region) };
|
||||
let src_origin = CLVec::new([src_offset, 0, 0]);
|
||||
let dst_origin = unsafe { CLVec::from_raw(dst_origin) };
|
||||
|
||||
create_and_queue(
|
||||
q,
|
||||
CL_COMMAND_COPY_BUFFER_TO_IMAGE,
|
||||
evs,
|
||||
event,
|
||||
false,
|
||||
Box::new(move |q, ctx| src.copy_to(q, ctx, &dst, src_origin, dst_origin, ®ion)),
|
||||
)
|
||||
|
||||
//• CL_INVALID_MEM_OBJECT if src_buffer is not a valid buffer object or dst_image is not a valid image object or if dst_image is a 1D image buffer object created from src_buffer.
|
||||
//• CL_INVALID_VALUE if the 1D, 2D or 3D rectangular region specified by dst_origin and dst_origin + region refer to a region outside dst_image, or if the region specified by src_offset and src_offset + src_cb refer to a region outside src_buffer.
|
||||
//• CL_INVALID_VALUE if values in dst_origin and region do not follow rules described in the argument description for dst_origin and region.
|
||||
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if src_buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
|
||||
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for dst_image are not supported by device associated with queue.
|
||||
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for dst_image are not supported by device associated with queue.
|
||||
//• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with src_buffer or dst_image.
|
||||
//• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE).
|
||||
}
|
||||
|
||||
pub fn enqueue_copy_image_to_buffer(
|
||||
command_queue: cl_command_queue,
|
||||
src_image: cl_mem,
|
||||
dst_buffer: cl_mem,
|
||||
src_origin: *const usize,
|
||||
region: *const usize,
|
||||
dst_offset: usize,
|
||||
num_events_in_wait_list: cl_uint,
|
||||
event_wait_list: *const cl_event,
|
||||
event: *mut cl_event,
|
||||
) -> CLResult<()> {
|
||||
let q = command_queue.get_arc()?;
|
||||
let src = src_image.get_arc()?;
|
||||
let dst = dst_buffer.get_arc()?;
|
||||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
// CL_INVALID_CONTEXT if the context associated with command_queue, src_image and dst_buffer
|
||||
// are not the same
|
||||
if q.context != src.context || q.context != dst.context {
|
||||
return Err(CL_INVALID_CONTEXT);
|
||||
}
|
||||
|
||||
// CL_INVALID_VALUE if src_origin or region is NULL.
|
||||
if src_origin.is_null() || region.is_null() {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
let region = unsafe { CLVec::from_raw(region) };
|
||||
let src_origin = unsafe { CLVec::from_raw(src_origin) };
|
||||
let dst_origin = CLVec::new([dst_offset, 0, 0]);
|
||||
|
||||
create_and_queue(
|
||||
q,
|
||||
CL_COMMAND_COPY_IMAGE_TO_BUFFER,
|
||||
evs,
|
||||
event,
|
||||
false,
|
||||
Box::new(move |q, ctx| src.copy_to(q, ctx, &dst, src_origin, dst_origin, ®ion)),
|
||||
)
|
||||
|
||||
//• CL_INVALID_MEM_OBJECT if src_image is not a valid image object or dst_buffer is not a valid buffer object or if src_image is a 1D image buffer object created from dst_buffer.
|
||||
//• CL_INVALID_VALUE if the 1D, 2D or 3D rectangular region specified by src_origin and src_origin + region refers to a region outside src_image, or if the region specified by dst_offset and dst_offset + dst_cb to a region outside dst_buffer.
|
||||
//• CL_INVALID_VALUE if values in src_origin and region do not follow rules described in the argument description for src_origin and region.
|
||||
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if dst_buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code is missing before version 1.1.
|
||||
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for src_image are not supported by device associated with queue.
|
||||
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for src_image are not supported by device associated with queue.
|
||||
//• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with src_image or dst_buffer.
|
||||
//• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE).
|
||||
}
|
||||
|
||||
pub fn enqueue_map_image(
|
||||
command_queue: cl_command_queue,
|
||||
image: cl_mem,
|
||||
@@ -1829,11 +1950,11 @@ pub fn enqueue_map_image(
|
||||
event: *mut cl_event,
|
||||
) -> CLResult<*mut ::std::os::raw::c_void> {
|
||||
let q = command_queue.get_arc()?;
|
||||
let i = image.get_ref()?;
|
||||
let i = image.get_arc()?;
|
||||
let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?;
|
||||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
validate_map_flags(i, map_flags)?;
|
||||
validate_map_flags(&i, map_flags)?;
|
||||
|
||||
// CL_INVALID_CONTEXT if context associated with command_queue and image are not the same
|
||||
if i.context != q.context {
|
||||
@@ -1849,16 +1970,6 @@ pub fn enqueue_map_image(
|
||||
let region = unsafe { CLVec::from_raw(region) };
|
||||
let origin = unsafe { CLVec::from_raw(origin) };
|
||||
|
||||
create_and_queue(
|
||||
q.clone(),
|
||||
CL_COMMAND_MAP_IMAGE,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
// we don't really have anything to do here?
|
||||
Box::new(|_, _| Ok(())),
|
||||
)?;
|
||||
|
||||
let mut dummy_slice_pitch: usize = 0;
|
||||
let image_slice_pitch = if image_slice_pitch.is_null() {
|
||||
// CL_INVALID_VALUE if image is a 3D image, 1D or 2D image array object and
|
||||
@@ -1871,14 +1982,60 @@ pub fn enqueue_map_image(
|
||||
unsafe { image_slice_pitch.as_mut().unwrap() }
|
||||
};
|
||||
|
||||
i.map_image(
|
||||
&q,
|
||||
&origin,
|
||||
®ion,
|
||||
unsafe { image_row_pitch.as_mut().unwrap() },
|
||||
image_slice_pitch,
|
||||
block,
|
||||
)
|
||||
if block {
|
||||
let res = Arc::new(Cell::new((Ok(ptr::null_mut()), 0, 0)));
|
||||
let cloned = res.clone();
|
||||
|
||||
create_and_queue(
|
||||
q.clone(),
|
||||
CL_COMMAND_MAP_IMAGE,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
// we don't really have anything to do here?
|
||||
Box::new(move |q, ctx| {
|
||||
let mut image_row_pitch = 0;
|
||||
let mut image_slice_pitch = 0;
|
||||
|
||||
let ptr = i.map_image(
|
||||
q,
|
||||
Some(ctx),
|
||||
&origin,
|
||||
®ion,
|
||||
&mut image_row_pitch,
|
||||
&mut image_slice_pitch,
|
||||
);
|
||||
cloned.set((ptr, image_row_pitch, image_slice_pitch));
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
)?;
|
||||
|
||||
let res = res.get();
|
||||
unsafe { *image_row_pitch = res.1 };
|
||||
*image_slice_pitch = res.2;
|
||||
res.0
|
||||
} else {
|
||||
create_and_queue(
|
||||
q.clone(),
|
||||
CL_COMMAND_MAP_IMAGE,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
// we don't really have anything to do here?
|
||||
Box::new(|_, _| Ok(())),
|
||||
)?;
|
||||
|
||||
i.map_image(
|
||||
&q,
|
||||
None,
|
||||
&origin,
|
||||
®ion,
|
||||
unsafe { image_row_pitch.as_mut().unwrap() },
|
||||
image_slice_pitch,
|
||||
)
|
||||
}
|
||||
|
||||
//• CL_INVALID_VALUE if region being mapped given by (origin, origin + region) is out of bounds or if values specified in map_flags are not valid.
|
||||
//• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region.
|
||||
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue.
|
||||
|
@@ -2,6 +2,8 @@ extern crate rusticl_opencl_gen;
|
||||
|
||||
use self::rusticl_opencl_gen::*;
|
||||
|
||||
use std::iter::Product;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! cl_closure {
|
||||
(|$obj:ident| $cb:ident($($arg:ident$(,)?)*)) => {
|
||||
@@ -91,6 +93,13 @@ impl<T: Copy> CLVec<T> {
|
||||
pub unsafe fn from_raw(v: *const T) -> Self {
|
||||
Self { vals: *v.cast() }
|
||||
}
|
||||
|
||||
pub fn pixels<'a>(&'a self) -> T
|
||||
where
|
||||
T: Product<&'a T>,
|
||||
{
|
||||
self.vals.iter().product()
|
||||
}
|
||||
}
|
||||
|
||||
impl CLVec<usize> {
|
||||
|
@@ -4,6 +4,7 @@ extern crate rusticl_opencl_gen;
|
||||
use crate::api::icd::*;
|
||||
use crate::core::device::*;
|
||||
use crate::core::format::*;
|
||||
use crate::core::memory::*;
|
||||
use crate::core::util::*;
|
||||
use crate::impl_cl_type_trait;
|
||||
|
||||
@@ -35,7 +36,11 @@ impl Context {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_buffer(&self, size: usize) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
pub fn create_buffer(
|
||||
&self,
|
||||
size: usize,
|
||||
user_ptr: *mut c_void,
|
||||
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
|
||||
let mut res = HashMap::new();
|
||||
for dev in &self.devs {
|
||||
@@ -45,6 +50,16 @@ impl Context {
|
||||
.ok_or(CL_OUT_OF_RESOURCES);
|
||||
res.insert(Arc::clone(dev), Arc::new(resource?));
|
||||
}
|
||||
|
||||
if !user_ptr.is_null() {
|
||||
res.iter()
|
||||
.map(|(d, r)| {
|
||||
d.helper_ctx()
|
||||
.exec(|ctx| ctx.buffer_subdata(r, 0, user_ptr, size.try_into().unwrap()))
|
||||
})
|
||||
.for_each(|f| f.wait());
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
@@ -69,6 +84,7 @@ impl Context {
|
||||
&self,
|
||||
desc: &cl_image_desc,
|
||||
format: &cl_image_format,
|
||||
user_ptr: *mut c_void,
|
||||
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
let width = desc
|
||||
.image_width
|
||||
@@ -97,6 +113,20 @@ impl Context {
|
||||
.ok_or(CL_OUT_OF_RESOURCES);
|
||||
res.insert(Arc::clone(dev), Arc::new(resource?));
|
||||
}
|
||||
|
||||
if !user_ptr.is_null() {
|
||||
let bx = desc.bx()?;
|
||||
let stride = desc.row_pitch()?;
|
||||
let layer_stride = desc.slice_pitch()?;
|
||||
|
||||
res.iter()
|
||||
.map(|(d, r)| {
|
||||
d.helper_ctx()
|
||||
.exec(|ctx| ctx.texture_subdata(r, &bx, user_ptr, stride, layer_stride))
|
||||
})
|
||||
.for_each(|f| f.wait());
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
|
@@ -14,7 +14,10 @@ use self::mesa_rust::compiler::clc::*;
|
||||
use self::mesa_rust::compiler::nir::*;
|
||||
use self::mesa_rust::pipe::context::*;
|
||||
use self::mesa_rust::pipe::device::load_screens;
|
||||
use self::mesa_rust::pipe::fence::*;
|
||||
use self::mesa_rust::pipe::resource::*;
|
||||
use self::mesa_rust::pipe::screen::*;
|
||||
use self::mesa_rust::pipe::transfer::*;
|
||||
use self::mesa_rust_gen::*;
|
||||
use self::rusticl_opencl_gen::*;
|
||||
|
||||
@@ -23,6 +26,7 @@ use std::cmp::min;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::env;
|
||||
use std::os::raw::*;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::MutexGuard;
|
||||
@@ -42,6 +46,67 @@ pub struct Device {
|
||||
helper_ctx: Mutex<Arc<PipeContext>>,
|
||||
}
|
||||
|
||||
pub trait HelperContextWrapper {
|
||||
#[must_use]
|
||||
fn exec<F>(&self, func: F) -> PipeFence
|
||||
where
|
||||
F: Fn(&HelperContext);
|
||||
|
||||
fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer;
|
||||
fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer;
|
||||
fn unmap(&self, tx: PipeTransfer);
|
||||
}
|
||||
|
||||
pub struct HelperContext<'a> {
|
||||
lock: MutexGuard<'a, Arc<PipeContext>>,
|
||||
}
|
||||
|
||||
impl<'a> HelperContext<'a> {
|
||||
pub fn buffer_subdata(
|
||||
&self,
|
||||
res: &PipeResource,
|
||||
offset: c_uint,
|
||||
data: *const c_void,
|
||||
size: c_uint,
|
||||
) {
|
||||
self.lock.buffer_subdata(res, offset, data, size)
|
||||
}
|
||||
|
||||
pub fn texture_subdata(
|
||||
&self,
|
||||
res: &PipeResource,
|
||||
bx: &pipe_box,
|
||||
data: *const c_void,
|
||||
stride: u32,
|
||||
layer_stride: u32,
|
||||
) {
|
||||
self.lock
|
||||
.texture_subdata(res, bx, data, stride, layer_stride)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> HelperContextWrapper for HelperContext<'a> {
|
||||
fn exec<F>(&self, func: F) -> PipeFence
|
||||
where
|
||||
F: Fn(&HelperContext),
|
||||
{
|
||||
func(self);
|
||||
self.lock.flush()
|
||||
}
|
||||
|
||||
fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer {
|
||||
self.lock.buffer_map(res, offset, size, false)
|
||||
}
|
||||
|
||||
fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer {
|
||||
self.lock.texture_map(res, bx, false)
|
||||
}
|
||||
|
||||
fn unmap(&self, tx: PipeTransfer) {
|
||||
tx.with_ctx(&self.lock);
|
||||
}
|
||||
}
|
||||
|
||||
impl_cl_type_trait!(cl_device_id, Device, CL_INVALID_DEVICE);
|
||||
|
||||
impl Device {
|
||||
@@ -534,8 +599,10 @@ impl Device {
|
||||
id as u32
|
||||
}
|
||||
|
||||
pub fn helper_ctx(&self) -> MutexGuard<Arc<PipeContext>> {
|
||||
self.helper_ctx.lock().unwrap()
|
||||
pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
|
||||
HelperContext {
|
||||
lock: self.helper_ctx.lock().unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cl_features(&self) -> clc_optional_features {
|
||||
|
@@ -467,12 +467,12 @@ impl Kernel {
|
||||
.resource_create_buffer(buf.len() as u32)
|
||||
.unwrap(),
|
||||
);
|
||||
q.device.helper_ctx().buffer_subdata(
|
||||
&res,
|
||||
0,
|
||||
buf.as_ptr().cast(),
|
||||
buf.len() as u32,
|
||||
);
|
||||
q.device
|
||||
.helper_ctx()
|
||||
.exec(|ctx| {
|
||||
ctx.buffer_subdata(&res, 0, buf.as_ptr().cast(), buf.len() as u32)
|
||||
})
|
||||
.wait();
|
||||
resource_info.push((Some(res), arg.offset));
|
||||
}
|
||||
InternalKernelArgType::GlobalWorkOffsets => {
|
||||
|
@@ -216,16 +216,10 @@ impl Mem {
|
||||
let buffer = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
|
||||
context.create_buffer_from_user(size, host_ptr)
|
||||
} else {
|
||||
context.create_buffer(size)
|
||||
assert_eq!(bit_check(flags, CL_MEM_COPY_HOST_PTR), !host_ptr.is_null());
|
||||
context.create_buffer(size, host_ptr)
|
||||
}?;
|
||||
|
||||
if bit_check(flags, CL_MEM_COPY_HOST_PTR) {
|
||||
for (d, r) in &buffer {
|
||||
d.helper_ctx()
|
||||
.buffer_subdata(r, 0, host_ptr, size.try_into().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
|
||||
host_ptr
|
||||
} else {
|
||||
@@ -313,20 +307,10 @@ impl Mem {
|
||||
let texture = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
|
||||
context.create_texture_from_user(&image_desc, image_format, host_ptr)
|
||||
} else {
|
||||
context.create_texture(&image_desc, image_format)
|
||||
assert_eq!(bit_check(flags, CL_MEM_COPY_HOST_PTR), !host_ptr.is_null());
|
||||
context.create_texture(&image_desc, image_format, host_ptr)
|
||||
}?;
|
||||
|
||||
if bit_check(flags, CL_MEM_COPY_HOST_PTR) {
|
||||
let bx = image_desc.bx()?;
|
||||
let stride = image_desc.row_pitch()?;
|
||||
let layer_stride = image_desc.slice_pitch()?;
|
||||
|
||||
for (d, r) in &texture {
|
||||
d.helper_ctx()
|
||||
.texture_subdata(r, &bx, host_ptr, stride, layer_stride);
|
||||
}
|
||||
}
|
||||
|
||||
let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
|
||||
host_ptr
|
||||
} else {
|
||||
@@ -356,38 +340,67 @@ impl Mem {
|
||||
self.mem_type == CL_MEM_OBJECT_BUFFER
|
||||
}
|
||||
|
||||
fn tx(
|
||||
fn tx_raw(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: &PipeContext,
|
||||
ctx: Option<&PipeContext>,
|
||||
mut offset: usize,
|
||||
size: usize,
|
||||
blocking: bool,
|
||||
) -> CLResult<PipeTransfer> {
|
||||
let b = self.to_parent(&mut offset);
|
||||
let r = b.get_res()?.get(&q.device).unwrap();
|
||||
|
||||
assert!(self.is_buffer());
|
||||
|
||||
Ok(ctx.buffer_map(
|
||||
r,
|
||||
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
blocking,
|
||||
))
|
||||
Ok(if let Some(ctx) = ctx {
|
||||
ctx.buffer_map(
|
||||
r,
|
||||
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
true,
|
||||
)
|
||||
} else {
|
||||
q.device.helper_ctx().buffer_map_async(
|
||||
r,
|
||||
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn tx_image(
|
||||
fn tx<'a>(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: &PipeContext,
|
||||
ctx: &'a PipeContext,
|
||||
offset: usize,
|
||||
size: usize,
|
||||
) -> CLResult<GuardedPipeTransfer<'a>> {
|
||||
Ok(self.tx_raw(q, Some(ctx), offset, size)?.with_ctx(ctx))
|
||||
}
|
||||
|
||||
fn tx_image_raw(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: Option<&PipeContext>,
|
||||
bx: &pipe_box,
|
||||
blocking: bool,
|
||||
) -> CLResult<PipeTransfer> {
|
||||
assert!(!self.is_buffer());
|
||||
|
||||
let r = self.get_res()?.get(&q.device).unwrap();
|
||||
Ok(ctx.texture_map(r, bx, blocking))
|
||||
Ok(if let Some(ctx) = ctx {
|
||||
ctx.texture_map(r, bx, true)
|
||||
} else {
|
||||
q.device.helper_ctx().texture_map_async(r, bx)
|
||||
})
|
||||
}
|
||||
|
||||
fn tx_image<'a>(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: &'a PipeContext,
|
||||
bx: &pipe_box,
|
||||
) -> CLResult<GuardedPipeTransfer<'a>> {
|
||||
Ok(self.tx_image_raw(q, Some(ctx), bx)?.with_ctx(ctx))
|
||||
}
|
||||
|
||||
pub fn has_same_parent(&self, other: &Self) -> bool {
|
||||
@@ -428,7 +441,7 @@ impl Mem {
|
||||
) -> CLResult<()> {
|
||||
assert!(self.is_buffer());
|
||||
|
||||
let tx = self.tx(q, ctx, offset, size, true)?;
|
||||
let tx = self.tx(q, ctx, offset, size)?;
|
||||
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(tx.ptr(), ptr, size);
|
||||
@@ -469,17 +482,59 @@ impl Mem {
|
||||
) -> CLResult<()> {
|
||||
let src = self.to_parent(&mut src_origin[0]);
|
||||
let dst = dst.to_parent(&mut dst_origin[0]);
|
||||
let bx = create_box(&src_origin, region, self.mem_type)?;
|
||||
let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
|
||||
|
||||
let src_res = src.get_res()?.get(&q.device).unwrap();
|
||||
let dst_res = dst.get_res()?.get(&q.device).unwrap();
|
||||
|
||||
if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
|
||||
(dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
|
||||
}
|
||||
if self.is_buffer() && !dst.is_buffer() || !self.is_buffer() && dst.is_buffer() {
|
||||
let tx_src;
|
||||
let tx_dst;
|
||||
|
||||
ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
|
||||
if self.is_buffer() {
|
||||
let bpp = dst.image_format.pixel_size().unwrap() as usize;
|
||||
tx_src = self.tx(q, ctx, src_origin[0], region.pixels() * bpp)?;
|
||||
tx_dst = dst.tx_image(q, ctx, &create_box(&dst_origin, region, dst.mem_type)?)?;
|
||||
|
||||
sw_copy(
|
||||
tx_src.ptr(),
|
||||
tx_dst.ptr(),
|
||||
region,
|
||||
&CLVec::default(),
|
||||
region[0] * bpp,
|
||||
region[0] * region[1] * bpp,
|
||||
&CLVec::default(),
|
||||
tx_dst.row_pitch() as usize,
|
||||
tx_dst.slice_pitch() as usize,
|
||||
bpp as u8,
|
||||
)
|
||||
} else {
|
||||
let bpp = self.image_format.pixel_size().unwrap() as usize;
|
||||
tx_src = self.tx_image(q, ctx, &create_box(&src_origin, region, self.mem_type)?)?;
|
||||
tx_dst = dst.tx(q, ctx, dst_origin[0], region.pixels() * bpp)?;
|
||||
|
||||
sw_copy(
|
||||
tx_src.ptr(),
|
||||
tx_dst.ptr(),
|
||||
region,
|
||||
&CLVec::default(),
|
||||
tx_src.row_pitch() as usize,
|
||||
tx_src.slice_pitch() as usize,
|
||||
&CLVec::default(),
|
||||
region[0] * bpp,
|
||||
region[0] * region[1] * bpp,
|
||||
bpp as u8,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
let bx = create_box(&src_origin, region, self.mem_type)?;
|
||||
let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
|
||||
|
||||
if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
|
||||
(dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
|
||||
}
|
||||
|
||||
ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -546,7 +601,7 @@ impl Mem {
|
||||
dst_slice_pitch: usize,
|
||||
) -> CLResult<()> {
|
||||
if self.is_buffer() {
|
||||
let tx = self.tx(q, ctx, 0, self.size, true)?;
|
||||
let tx = self.tx(q, ctx, 0, self.size)?;
|
||||
sw_copy(
|
||||
src,
|
||||
tx.ptr(),
|
||||
@@ -603,13 +658,13 @@ impl Mem {
|
||||
let pixel_size;
|
||||
|
||||
if self.is_buffer() {
|
||||
tx = self.tx(q, ctx, 0, self.size, true)?;
|
||||
tx = self.tx(q, ctx, 0, self.size)?;
|
||||
pixel_size = 1;
|
||||
} else {
|
||||
assert!(dst_origin == &CLVec::default());
|
||||
|
||||
let bx = create_box(src_origin, region, self.mem_type)?;
|
||||
tx = self.tx_image(q, ctx, &bx, true)?;
|
||||
tx = self.tx_image(q, ctx, &bx)?;
|
||||
src_row_pitch = tx.row_pitch() as usize;
|
||||
src_slice_pitch = tx.slice_pitch() as usize;
|
||||
|
||||
@@ -646,9 +701,10 @@ impl Mem {
|
||||
dst_slice_pitch: usize,
|
||||
) -> CLResult<()> {
|
||||
assert!(self.is_buffer());
|
||||
assert!(dst.is_buffer());
|
||||
|
||||
let tx_src = self.tx(q, ctx, 0, self.size, true)?;
|
||||
let tx_dst = dst.tx(q, ctx, 0, self.size, true)?;
|
||||
let tx_src = self.tx(q, ctx, 0, self.size)?;
|
||||
let tx_dst = dst.tx(q, ctx, 0, self.size)?;
|
||||
|
||||
// TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
|
||||
sw_copy(
|
||||
@@ -670,16 +726,15 @@ impl Mem {
|
||||
fn map<'a>(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: &PipeContext,
|
||||
ctx: Option<&PipeContext>,
|
||||
lock: &'a mut MutexGuard<Mappings>,
|
||||
block: bool,
|
||||
) -> CLResult<&'a PipeTransfer> {
|
||||
if !lock.tx.contains_key(&q.device) {
|
||||
let tx = if self.is_buffer() {
|
||||
self.tx(q, ctx, 0, self.size, block)?
|
||||
self.tx_raw(q, ctx, 0, self.size)?
|
||||
} else {
|
||||
let bx = self.image_desc.bx()?;
|
||||
self.tx_image(q, ctx, &bx, block)?
|
||||
self.tx_image_raw(q, ctx, &bx)?
|
||||
};
|
||||
|
||||
lock.tx.insert(q.device.clone(), (tx, 0));
|
||||
@@ -694,14 +749,14 @@ impl Mem {
|
||||
pub fn map_buffer(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: Option<&PipeContext>,
|
||||
offset: usize,
|
||||
_size: usize,
|
||||
block: bool,
|
||||
) -> CLResult<*mut c_void> {
|
||||
assert!(self.is_buffer());
|
||||
|
||||
let mut lock = self.maps.lock().unwrap();
|
||||
let tx = self.map(q, &q.device.helper_ctx(), &mut lock, block)?;
|
||||
let tx = self.map(q, ctx, &mut lock)?;
|
||||
let ptr = unsafe { tx.ptr().add(offset) };
|
||||
|
||||
if let Some(e) = lock.maps.get_mut(&ptr) {
|
||||
@@ -716,16 +771,16 @@ impl Mem {
|
||||
pub fn map_image(
|
||||
&self,
|
||||
q: &Arc<Queue>,
|
||||
ctx: Option<&PipeContext>,
|
||||
origin: &CLVec<usize>,
|
||||
_region: &CLVec<usize>,
|
||||
row_pitch: &mut usize,
|
||||
slice_pitch: &mut usize,
|
||||
block: bool,
|
||||
) -> CLResult<*mut c_void> {
|
||||
assert!(!self.is_buffer());
|
||||
|
||||
let mut lock = self.maps.lock().unwrap();
|
||||
let tx = self.map(q, &q.device.helper_ctx(), &mut lock, block)?;
|
||||
let tx = self.map(q, ctx, &mut lock)?;
|
||||
|
||||
*row_pitch = tx.row_pitch() as usize;
|
||||
*slice_pitch = tx.slice_pitch() as usize;
|
||||
@@ -786,7 +841,7 @@ impl Drop for Mem {
|
||||
.for_each(|cb| cb(cl));
|
||||
|
||||
for (d, tx) in self.maps.lock().unwrap().tx.drain() {
|
||||
tx.0.with_ctx(&d.helper_ctx());
|
||||
d.helper_ctx().unmap(tx.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user