rusticl/mem: implement copies between buffers and images

v2: Use the pitches from the pipe_transfer_map (Jason)

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
Karol Herbst
2022-03-25 14:40:26 +01:00
committed by Marge Bot
parent 431a23b212
commit 29026053db
7 changed files with 453 additions and 117 deletions

View File

@@ -1073,33 +1073,51 @@ extern "C" fn cl_enqueue_copy_image(
}
extern "C" fn cl_enqueue_copy_image_to_buffer(
_command_queue: cl_command_queue,
_src_image: cl_mem,
_dst_buffer: cl_mem,
_src_origin: *const usize,
_region: *const usize,
_dst_offset: usize,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
command_queue: cl_command_queue,
src_image: cl_mem,
dst_buffer: cl_mem,
src_origin: *const usize,
region: *const usize,
dst_offset: usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> cl_int {
println!("cl_enqueue_copy_image_to_buffer not implemented");
CL_OUT_OF_HOST_MEMORY
match_err!(enqueue_copy_image_to_buffer(
command_queue,
src_image,
dst_buffer,
src_origin,
region,
dst_offset,
num_events_in_wait_list,
event_wait_list,
event,
))
}
extern "C" fn cl_enqueue_copy_buffer_to_image(
_command_queue: cl_command_queue,
_src_buffer: cl_mem,
_dst_image: cl_mem,
_src_offset: usize,
_dst_origin: *const usize,
_region: *const usize,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
command_queue: cl_command_queue,
src_buffer: cl_mem,
dst_image: cl_mem,
src_offset: usize,
dst_origin: *const usize,
region: *const usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> cl_int {
println!("cl_enqueue_copy_buffer_to_image not implemented");
CL_OUT_OF_HOST_MEMORY
match_err!(enqueue_copy_buffer_to_image(
command_queue,
src_buffer,
dst_image,
src_offset,
dst_origin,
region,
num_events_in_wait_list,
event_wait_list,
event,
))
}
extern "C" fn cl_enqueue_map_buffer(

View File

@@ -16,6 +16,7 @@ use self::mesa_rust_util::properties::Properties;
use self::mesa_rust_util::ptr::*;
use self::rusticl_opencl_gen::*;
use std::cell::Cell;
use std::cmp::Ordering;
use std::os::raw::c_void;
use std::ptr;
@@ -1502,11 +1503,11 @@ pub fn enqueue_map_buffer(
event: *mut cl_event,
) -> CLResult<*mut c_void> {
let q = command_queue.get_arc()?;
let b = buffer.get_ref()?;
let b = buffer.get_arc()?;
let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
validate_map_flags(b, map_flags)?;
validate_map_flags(&b, map_flags)?;
// CL_INVALID_VALUE if region being mapped given by (offset, size) is out of bounds or if size
// is 0
@@ -1525,17 +1526,37 @@ pub fn enqueue_map_buffer(
return Err(CL_INVALID_CONTEXT);
}
create_and_queue(
q.clone(),
CL_COMMAND_MAP_BUFFER,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(|_, _| Ok(())),
)?;
if block {
let ptr = Arc::new(Cell::new(Ok(ptr::null_mut())));
let cloned = ptr.clone();
create_and_queue(
q,
CL_COMMAND_MAP_BUFFER,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(move |q, ctx| {
cloned.set(b.map_buffer(q, Some(ctx), offset, size));
Ok(())
}),
)?;
ptr.get()
} else {
create_and_queue(
q.clone(),
CL_COMMAND_MAP_BUFFER,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(|_, _| Ok(())),
)?;
b.map_buffer(&q, None, offset, size)
}
b.map_buffer(&q, offset, size, block)
// TODO
// CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for the device associated with queue. This error code is missing before version 1.1.
// CL_MAP_FAILURE if there is a failure to map the requested region into the host address space. This error cannot occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR.
@@ -1815,6 +1836,106 @@ pub fn enqueue_fill_image(
//image are not supported by device associated with queue.
}
pub fn enqueue_copy_buffer_to_image(
command_queue: cl_command_queue,
src_buffer: cl_mem,
dst_image: cl_mem,
src_offset: usize,
dst_origin: *const usize,
region: *const usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> CLResult<()> {
let q = command_queue.get_arc()?;
let src = src_buffer.get_arc()?;
let dst = dst_image.get_arc()?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
// CL_INVALID_CONTEXT if the context associated with command_queue, src_buffer and dst_image
// are not the same
if q.context != src.context || q.context != dst.context {
return Err(CL_INVALID_CONTEXT);
}
// CL_INVALID_VALUE if dst_origin or region is NULL.
if dst_origin.is_null() || region.is_null() {
return Err(CL_INVALID_VALUE);
}
let region = unsafe { CLVec::from_raw(region) };
let src_origin = CLVec::new([src_offset, 0, 0]);
let dst_origin = unsafe { CLVec::from_raw(dst_origin) };
create_and_queue(
q,
CL_COMMAND_COPY_BUFFER_TO_IMAGE,
evs,
event,
false,
Box::new(move |q, ctx| src.copy_to(q, ctx, &dst, src_origin, dst_origin, &region)),
)
//• CL_INVALID_MEM_OBJECT if src_buffer is not a valid buffer object or dst_image is not a valid image object or if dst_image is a 1D image buffer object created from src_buffer.
//• CL_INVALID_VALUE if the 1D, 2D or 3D rectangular region specified by dst_origin and dst_origin + region refer to a region outside dst_image, or if the region specified by src_offset and src_offset + src_cb refer to a region outside src_buffer.
//• CL_INVALID_VALUE if values in dst_origin and region do not follow rules described in the argument description for dst_origin and region.
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if src_buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for dst_image are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for dst_image are not supported by device associated with queue.
//• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with src_buffer or dst_image.
//• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE).
}
pub fn enqueue_copy_image_to_buffer(
command_queue: cl_command_queue,
src_image: cl_mem,
dst_buffer: cl_mem,
src_origin: *const usize,
region: *const usize,
dst_offset: usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> CLResult<()> {
let q = command_queue.get_arc()?;
let src = src_image.get_arc()?;
let dst = dst_buffer.get_arc()?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
// CL_INVALID_CONTEXT if the context associated with command_queue, src_image and dst_buffer
// are not the same
if q.context != src.context || q.context != dst.context {
return Err(CL_INVALID_CONTEXT);
}
// CL_INVALID_VALUE if src_origin or region is NULL.
if src_origin.is_null() || region.is_null() {
return Err(CL_INVALID_VALUE);
}
let region = unsafe { CLVec::from_raw(region) };
let src_origin = unsafe { CLVec::from_raw(src_origin) };
let dst_origin = CLVec::new([dst_offset, 0, 0]);
create_and_queue(
q,
CL_COMMAND_COPY_IMAGE_TO_BUFFER,
evs,
event,
false,
Box::new(move |q, ctx| src.copy_to(q, ctx, &dst, src_origin, dst_origin, &region)),
)
//• CL_INVALID_MEM_OBJECT if src_image is not a valid image object or dst_buffer is not a valid buffer object or if src_image is a 1D image buffer object created from dst_buffer.
//• CL_INVALID_VALUE if the 1D, 2D or 3D rectangular region specified by src_origin and src_origin + region refers to a region outside src_image, or if the region specified by dst_offset and dst_offset + dst_cb to a region outside dst_buffer.
//• CL_INVALID_VALUE if values in src_origin and region do not follow rules described in the argument description for src_origin and region.
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if dst_buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code is missing before version 1.1.
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for src_image are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for src_image are not supported by device associated with queue.
//• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with src_image or dst_buffer.
//• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE).
}
pub fn enqueue_map_image(
command_queue: cl_command_queue,
image: cl_mem,
@@ -1829,11 +1950,11 @@ pub fn enqueue_map_image(
event: *mut cl_event,
) -> CLResult<*mut ::std::os::raw::c_void> {
let q = command_queue.get_arc()?;
let i = image.get_ref()?;
let i = image.get_arc()?;
let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
validate_map_flags(i, map_flags)?;
validate_map_flags(&i, map_flags)?;
// CL_INVALID_CONTEXT if context associated with command_queue and image are not the same
if i.context != q.context {
@@ -1849,16 +1970,6 @@ pub fn enqueue_map_image(
let region = unsafe { CLVec::from_raw(region) };
let origin = unsafe { CLVec::from_raw(origin) };
create_and_queue(
q.clone(),
CL_COMMAND_MAP_IMAGE,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(|_, _| Ok(())),
)?;
let mut dummy_slice_pitch: usize = 0;
let image_slice_pitch = if image_slice_pitch.is_null() {
// CL_INVALID_VALUE if image is a 3D image, 1D or 2D image array object and
@@ -1871,14 +1982,60 @@ pub fn enqueue_map_image(
unsafe { image_slice_pitch.as_mut().unwrap() }
};
i.map_image(
&q,
&origin,
&region,
unsafe { image_row_pitch.as_mut().unwrap() },
image_slice_pitch,
block,
)
if block {
let res = Arc::new(Cell::new((Ok(ptr::null_mut()), 0, 0)));
let cloned = res.clone();
create_and_queue(
q.clone(),
CL_COMMAND_MAP_IMAGE,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(move |q, ctx| {
let mut image_row_pitch = 0;
let mut image_slice_pitch = 0;
let ptr = i.map_image(
q,
Some(ctx),
&origin,
&region,
&mut image_row_pitch,
&mut image_slice_pitch,
);
cloned.set((ptr, image_row_pitch, image_slice_pitch));
Ok(())
}),
)?;
let res = res.get();
unsafe { *image_row_pitch = res.1 };
*image_slice_pitch = res.2;
res.0
} else {
create_and_queue(
q.clone(),
CL_COMMAND_MAP_IMAGE,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(|_, _| Ok(())),
)?;
i.map_image(
&q,
None,
&origin,
&region,
unsafe { image_row_pitch.as_mut().unwrap() },
image_slice_pitch,
)
}
//• CL_INVALID_VALUE if region being mapped given by (origin, origin + region) is out of bounds or if values specified in map_flags are not valid.
//• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region.
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue.

View File

@@ -2,6 +2,8 @@ extern crate rusticl_opencl_gen;
use self::rusticl_opencl_gen::*;
use std::iter::Product;
#[macro_export]
macro_rules! cl_closure {
(|$obj:ident| $cb:ident($($arg:ident$(,)?)*)) => {
@@ -91,6 +93,13 @@ impl<T: Copy> CLVec<T> {
pub unsafe fn from_raw(v: *const T) -> Self {
Self { vals: *v.cast() }
}
pub fn pixels<'a>(&'a self) -> T
where
T: Product<&'a T>,
{
self.vals.iter().product()
}
}
impl CLVec<usize> {

View File

@@ -4,6 +4,7 @@ extern crate rusticl_opencl_gen;
use crate::api::icd::*;
use crate::core::device::*;
use crate::core::format::*;
use crate::core::memory::*;
use crate::core::util::*;
use crate::impl_cl_type_trait;
@@ -35,7 +36,11 @@ impl Context {
})
}
pub fn create_buffer(&self, size: usize) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
pub fn create_buffer(
&self,
size: usize,
user_ptr: *mut c_void,
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let mut res = HashMap::new();
for dev in &self.devs {
@@ -45,6 +50,16 @@ impl Context {
.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), Arc::new(resource?));
}
if !user_ptr.is_null() {
res.iter()
.map(|(d, r)| {
d.helper_ctx()
.exec(|ctx| ctx.buffer_subdata(r, 0, user_ptr, size.try_into().unwrap()))
})
.for_each(|f| f.wait());
}
Ok(res)
}
@@ -69,6 +84,7 @@ impl Context {
&self,
desc: &cl_image_desc,
format: &cl_image_format,
user_ptr: *mut c_void,
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
let width = desc
.image_width
@@ -97,6 +113,20 @@ impl Context {
.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), Arc::new(resource?));
}
if !user_ptr.is_null() {
let bx = desc.bx()?;
let stride = desc.row_pitch()?;
let layer_stride = desc.slice_pitch()?;
res.iter()
.map(|(d, r)| {
d.helper_ctx()
.exec(|ctx| ctx.texture_subdata(r, &bx, user_ptr, stride, layer_stride))
})
.for_each(|f| f.wait());
}
Ok(res)
}

View File

@@ -14,7 +14,10 @@ use self::mesa_rust::compiler::clc::*;
use self::mesa_rust::compiler::nir::*;
use self::mesa_rust::pipe::context::*;
use self::mesa_rust::pipe::device::load_screens;
use self::mesa_rust::pipe::fence::*;
use self::mesa_rust::pipe::resource::*;
use self::mesa_rust::pipe::screen::*;
use self::mesa_rust::pipe::transfer::*;
use self::mesa_rust_gen::*;
use self::rusticl_opencl_gen::*;
@@ -23,6 +26,7 @@ use std::cmp::min;
use std::collections::HashMap;
use std::convert::TryInto;
use std::env;
use std::os::raw::*;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::MutexGuard;
@@ -42,6 +46,67 @@ pub struct Device {
helper_ctx: Mutex<Arc<PipeContext>>,
}
pub trait HelperContextWrapper {
#[must_use]
fn exec<F>(&self, func: F) -> PipeFence
where
F: Fn(&HelperContext);
fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer;
fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer;
fn unmap(&self, tx: PipeTransfer);
}
pub struct HelperContext<'a> {
lock: MutexGuard<'a, Arc<PipeContext>>,
}
impl<'a> HelperContext<'a> {
pub fn buffer_subdata(
&self,
res: &PipeResource,
offset: c_uint,
data: *const c_void,
size: c_uint,
) {
self.lock.buffer_subdata(res, offset, data, size)
}
pub fn texture_subdata(
&self,
res: &PipeResource,
bx: &pipe_box,
data: *const c_void,
stride: u32,
layer_stride: u32,
) {
self.lock
.texture_subdata(res, bx, data, stride, layer_stride)
}
}
impl<'a> HelperContextWrapper for HelperContext<'a> {
fn exec<F>(&self, func: F) -> PipeFence
where
F: Fn(&HelperContext),
{
func(self);
self.lock.flush()
}
fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer {
self.lock.buffer_map(res, offset, size, false)
}
fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer {
self.lock.texture_map(res, bx, false)
}
fn unmap(&self, tx: PipeTransfer) {
tx.with_ctx(&self.lock);
}
}
impl_cl_type_trait!(cl_device_id, Device, CL_INVALID_DEVICE);
impl Device {
@@ -534,8 +599,10 @@ impl Device {
id as u32
}
pub fn helper_ctx(&self) -> MutexGuard<Arc<PipeContext>> {
self.helper_ctx.lock().unwrap()
pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
HelperContext {
lock: self.helper_ctx.lock().unwrap(),
}
}
pub fn cl_features(&self) -> clc_optional_features {

View File

@@ -467,12 +467,12 @@ impl Kernel {
.resource_create_buffer(buf.len() as u32)
.unwrap(),
);
q.device.helper_ctx().buffer_subdata(
&res,
0,
buf.as_ptr().cast(),
buf.len() as u32,
);
q.device
.helper_ctx()
.exec(|ctx| {
ctx.buffer_subdata(&res, 0, buf.as_ptr().cast(), buf.len() as u32)
})
.wait();
resource_info.push((Some(res), arg.offset));
}
InternalKernelArgType::GlobalWorkOffsets => {

View File

@@ -216,16 +216,10 @@ impl Mem {
let buffer = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
context.create_buffer_from_user(size, host_ptr)
} else {
context.create_buffer(size)
assert_eq!(bit_check(flags, CL_MEM_COPY_HOST_PTR), !host_ptr.is_null());
context.create_buffer(size, host_ptr)
}?;
if bit_check(flags, CL_MEM_COPY_HOST_PTR) {
for (d, r) in &buffer {
d.helper_ctx()
.buffer_subdata(r, 0, host_ptr, size.try_into().unwrap());
}
}
let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
host_ptr
} else {
@@ -313,20 +307,10 @@ impl Mem {
let texture = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
context.create_texture_from_user(&image_desc, image_format, host_ptr)
} else {
context.create_texture(&image_desc, image_format)
assert_eq!(bit_check(flags, CL_MEM_COPY_HOST_PTR), !host_ptr.is_null());
context.create_texture(&image_desc, image_format, host_ptr)
}?;
if bit_check(flags, CL_MEM_COPY_HOST_PTR) {
let bx = image_desc.bx()?;
let stride = image_desc.row_pitch()?;
let layer_stride = image_desc.slice_pitch()?;
for (d, r) in &texture {
d.helper_ctx()
.texture_subdata(r, &bx, host_ptr, stride, layer_stride);
}
}
let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
host_ptr
} else {
@@ -356,38 +340,67 @@ impl Mem {
self.mem_type == CL_MEM_OBJECT_BUFFER
}
fn tx(
fn tx_raw(
&self,
q: &Arc<Queue>,
ctx: &PipeContext,
ctx: Option<&PipeContext>,
mut offset: usize,
size: usize,
blocking: bool,
) -> CLResult<PipeTransfer> {
let b = self.to_parent(&mut offset);
let r = b.get_res()?.get(&q.device).unwrap();
assert!(self.is_buffer());
Ok(ctx.buffer_map(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
blocking,
))
Ok(if let Some(ctx) = ctx {
ctx.buffer_map(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
true,
)
} else {
q.device.helper_ctx().buffer_map_async(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
)
})
}
fn tx_image(
fn tx<'a>(
&self,
q: &Arc<Queue>,
ctx: &PipeContext,
ctx: &'a PipeContext,
offset: usize,
size: usize,
) -> CLResult<GuardedPipeTransfer<'a>> {
Ok(self.tx_raw(q, Some(ctx), offset, size)?.with_ctx(ctx))
}
fn tx_image_raw(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
bx: &pipe_box,
blocking: bool,
) -> CLResult<PipeTransfer> {
assert!(!self.is_buffer());
let r = self.get_res()?.get(&q.device).unwrap();
Ok(ctx.texture_map(r, bx, blocking))
Ok(if let Some(ctx) = ctx {
ctx.texture_map(r, bx, true)
} else {
q.device.helper_ctx().texture_map_async(r, bx)
})
}
fn tx_image<'a>(
&self,
q: &Arc<Queue>,
ctx: &'a PipeContext,
bx: &pipe_box,
) -> CLResult<GuardedPipeTransfer<'a>> {
Ok(self.tx_image_raw(q, Some(ctx), bx)?.with_ctx(ctx))
}
pub fn has_same_parent(&self, other: &Self) -> bool {
@@ -428,7 +441,7 @@ impl Mem {
) -> CLResult<()> {
assert!(self.is_buffer());
let tx = self.tx(q, ctx, offset, size, true)?;
let tx = self.tx(q, ctx, offset, size)?;
unsafe {
ptr::copy_nonoverlapping(tx.ptr(), ptr, size);
@@ -469,17 +482,59 @@ impl Mem {
) -> CLResult<()> {
let src = self.to_parent(&mut src_origin[0]);
let dst = dst.to_parent(&mut dst_origin[0]);
let bx = create_box(&src_origin, region, self.mem_type)?;
let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
let src_res = src.get_res()?.get(&q.device).unwrap();
let dst_res = dst.get_res()?.get(&q.device).unwrap();
if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
(dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
}
if self.is_buffer() && !dst.is_buffer() || !self.is_buffer() && dst.is_buffer() {
let tx_src;
let tx_dst;
ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
if self.is_buffer() {
let bpp = dst.image_format.pixel_size().unwrap() as usize;
tx_src = self.tx(q, ctx, src_origin[0], region.pixels() * bpp)?;
tx_dst = dst.tx_image(q, ctx, &create_box(&dst_origin, region, dst.mem_type)?)?;
sw_copy(
tx_src.ptr(),
tx_dst.ptr(),
region,
&CLVec::default(),
region[0] * bpp,
region[0] * region[1] * bpp,
&CLVec::default(),
tx_dst.row_pitch() as usize,
tx_dst.slice_pitch() as usize,
bpp as u8,
)
} else {
let bpp = self.image_format.pixel_size().unwrap() as usize;
tx_src = self.tx_image(q, ctx, &create_box(&src_origin, region, self.mem_type)?)?;
tx_dst = dst.tx(q, ctx, dst_origin[0], region.pixels() * bpp)?;
sw_copy(
tx_src.ptr(),
tx_dst.ptr(),
region,
&CLVec::default(),
tx_src.row_pitch() as usize,
tx_src.slice_pitch() as usize,
&CLVec::default(),
region[0] * bpp,
region[0] * region[1] * bpp,
bpp as u8,
)
}
} else {
let bx = create_box(&src_origin, region, self.mem_type)?;
let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
(dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
}
ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
}
Ok(())
}
@@ -546,7 +601,7 @@ impl Mem {
dst_slice_pitch: usize,
) -> CLResult<()> {
if self.is_buffer() {
let tx = self.tx(q, ctx, 0, self.size, true)?;
let tx = self.tx(q, ctx, 0, self.size)?;
sw_copy(
src,
tx.ptr(),
@@ -603,13 +658,13 @@ impl Mem {
let pixel_size;
if self.is_buffer() {
tx = self.tx(q, ctx, 0, self.size, true)?;
tx = self.tx(q, ctx, 0, self.size)?;
pixel_size = 1;
} else {
assert!(dst_origin == &CLVec::default());
let bx = create_box(src_origin, region, self.mem_type)?;
tx = self.tx_image(q, ctx, &bx, true)?;
tx = self.tx_image(q, ctx, &bx)?;
src_row_pitch = tx.row_pitch() as usize;
src_slice_pitch = tx.slice_pitch() as usize;
@@ -646,9 +701,10 @@ impl Mem {
dst_slice_pitch: usize,
) -> CLResult<()> {
assert!(self.is_buffer());
assert!(dst.is_buffer());
let tx_src = self.tx(q, ctx, 0, self.size, true)?;
let tx_dst = dst.tx(q, ctx, 0, self.size, true)?;
let tx_src = self.tx(q, ctx, 0, self.size)?;
let tx_dst = dst.tx(q, ctx, 0, self.size)?;
// TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
sw_copy(
@@ -670,16 +726,15 @@ impl Mem {
fn map<'a>(
&self,
q: &Arc<Queue>,
ctx: &PipeContext,
ctx: Option<&PipeContext>,
lock: &'a mut MutexGuard<Mappings>,
block: bool,
) -> CLResult<&'a PipeTransfer> {
if !lock.tx.contains_key(&q.device) {
let tx = if self.is_buffer() {
self.tx(q, ctx, 0, self.size, block)?
self.tx_raw(q, ctx, 0, self.size)?
} else {
let bx = self.image_desc.bx()?;
self.tx_image(q, ctx, &bx, block)?
self.tx_image_raw(q, ctx, &bx)?
};
lock.tx.insert(q.device.clone(), (tx, 0));
@@ -694,14 +749,14 @@ impl Mem {
pub fn map_buffer(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
offset: usize,
_size: usize,
block: bool,
) -> CLResult<*mut c_void> {
assert!(self.is_buffer());
let mut lock = self.maps.lock().unwrap();
let tx = self.map(q, &q.device.helper_ctx(), &mut lock, block)?;
let tx = self.map(q, ctx, &mut lock)?;
let ptr = unsafe { tx.ptr().add(offset) };
if let Some(e) = lock.maps.get_mut(&ptr) {
@@ -716,16 +771,16 @@ impl Mem {
pub fn map_image(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
origin: &CLVec<usize>,
_region: &CLVec<usize>,
row_pitch: &mut usize,
slice_pitch: &mut usize,
block: bool,
) -> CLResult<*mut c_void> {
assert!(!self.is_buffer());
let mut lock = self.maps.lock().unwrap();
let tx = self.map(q, &q.device.helper_ctx(), &mut lock, block)?;
let tx = self.map(q, ctx, &mut lock)?;
*row_pitch = tx.row_pitch() as usize;
*slice_pitch = tx.slice_pitch() as usize;
@@ -786,7 +841,7 @@ impl Drop for Mem {
.for_each(|cb| cb(cl));
for (d, tx) in self.maps.lock().unwrap().tx.drain() {
tx.0.with_ctx(&d.helper_ctx());
d.helper_ctx().unmap(tx.0);
}
}
}