22#include TTL_IMPORT_EXPORT_INCLUDE_H
24#define TTL_MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
35template <
typename TENSORTYPE>
49template <
typename TENSORTYPE>
63template <
typename TENSORTYPE>
70 TTL_import_pre_fill(internal_sub_tensor, const_external_tensor, &dst_address, &src_address);
73 dst_address, import_shape, internal_sub_tensor.
tensor.layout, internal_sub_tensor.
tensor.elem_size);
78 TTL_import(import_int_tensor, import_ext_tensor, event);
90template <
typename TENSORTYPE>
104template <
typename TENSORTYPE>
125 for (
int byte = 0;
byte < num;
byte++)
145 size_t num_bytes_per_element,
size_t num_elements_per_line,
146 size_t dst_total_line_length,
size_t num_lines,
size_t total_lines,
149 unsigned int left_trim_bytes = x_offset * num_bytes_per_element;
150 unsigned int right_trim_bytes = (dst_total_line_length - num_elements_per_line) * num_bytes_per_element;
152 for (
size_t plane = 0; plane < num_planes; plane++) {
153 for (
size_t line = 0; line < total_lines; line++) {
154 if ((line < y_offset) || (line >= num_lines)) {
155 TTL_local_memset(dst_ptr, 0, dst_total_line_length * num_bytes_per_element);
160 TTL_local_memset(dst_ptr + (num_elements_per_line * num_bytes_per_element), 0, right_trim_bytes);
162 dst_ptr += dst_total_line_length * num_bytes_per_element;
167template <
typename INT_TENSORTYPE,
typename EXT_TENSORTYPE>
170 TTL_local(INT_TENSORTYPE *) *
const dst_address,
171 TTL_global(EXT_TENSORTYPE *) *
const src_address) {
198 *dst_address = internal_sub_tensor.
tensor.base + x_offset +
199 (y_offset * internal_sub_tensor.
tensor.layout.row_spacing) +
200 (z_offset * internal_sub_tensor.
tensor.layout.plane_spacing);
202 *src_address = const_external_tensor.
base + x_offset + (y_offset * const_external_tensor.
layout.
row_spacing) +
208 internal_sub_tensor.
tensor.elem_size,
209 internal_sub_tensor.
tensor.shape.width - x_cut,
210 internal_sub_tensor.
tensor.layout.row_spacing,
211 internal_sub_tensor.
tensor.shape.height - y_cut,
212 internal_sub_tensor.
tensor.shape.height,
213 internal_sub_tensor.
tensor.shape.depth);
215 return TTL_shape(internal_sub_tensor.
tensor.shape.width - x_offset - x_cut,
216 internal_sub_tensor.
tensor.shape.height - y_offset - y_cut,
217 internal_sub_tensor.
tensor.shape.depth - z_offset - z_cut);
static void TTL_import_base(const TTL_int_tensor_t internal_tensor, const TTL_const_ext_tensor_t external_tensor, TTL_event_t *event)
TTL_import.
static void TTL_export_base(const TTL_const_int_tensor_t internal_tensor, const TTL_ext_tensor_t external_tensor, TTL_event_t *const event)
Begin the asynchronous export of the external tensor to the internal tensor.
static void TTL_blocking_export_base(const TTL_const_int_tensor_t internal_tensor, const TTL_ext_tensor_t external_tensor)
Export the external tensor to the internal tensor returning when complete.
static void TTL_blocking_import_base(const TTL_int_tensor_t internal_tensor, const TTL_const_ext_tensor_t external_tensor)
Export the external tensor to the internal tensor returning when complete.
void TTL_blocking_import(const TTL_tensor< TENSORTYPE > &internal_tensor, const TTL_tensor< TENSORTYPE > &external_tensor)
Import the external tensor to the internal tensor returning when complete.
static TTL_shape TTL_import_pre_fill(const TTL_sub_tensor< INT_TENSORTYPE > internal_sub_tensor, const TTL_tensor< EXT_TENSORTYPE > const_external_tensor, INT_TENSORTYPE **const dst_address, EXT_TENSORTYPE **const src_address)
void TTL_export(const TTL_tensor< TENSORTYPE > &internal_tensor, const TTL_tensor< TENSORTYPE > &external_tensor, TTL_event *event)
Export the external tensor to the internal tensor returning when complete.
void TTL_import_sub_tensor(const TTL_sub_tensor< TENSORTYPE > &internal_sub_tensor, const TTL_tensor< TENSORTYPE > const_external_tensor, TTL_event *event)
Implementation of TTL_import_sub_tensor.
static void * TTL_local_memset(void *const ptr, char value, int num)
Fill block of local memory.
void TTL_import(const TTL_tensor< TENSORTYPE > internal_tensor, const TTL_tensor< TENSORTYPE > external_tensor, TTL_event *event)
Import the external tensor to the internal tensor returning when complete.
static void TTL_clear_void_space(void *const dst, const size_t x_offset, const size_t y_offset, size_t num_bytes_per_element, size_t num_elements_per_line, size_t dst_total_line_length, size_t num_lines, size_t total_lines, size_t num_planes)
Clear any unpopulated space in the target area.
void TTL_blocking_export(const TTL_tensor< TENSORTYPE > &internal_tensor, const TTL_tensor< TENSORTYPE > &external_tensor)
Export the external tensor to the internal tensor returning when complete.
event_t TTL_event
TTL_event is a pseudonym for OpenCL event_t.
#define TTL_global(type)
Create a typed reference in the __global address space.
#define TTL_local(type)
Create a typed reference in the __local address space.
TTL_dim plane_spacing
The distance between the start of consequtive planes in units of elements.
TTL_dim row_spacing
The distance between the start of consequtive rows in units of elements.
Description of the 3D offset of an object.
TTL_offset_dim y
Offset in dimension y.
TTL_offset_dim x
Offset in dimension x.
TTL_dim height
Number of rows along dimension y.
TTL_dim width
Number of elements along dimension x.
TTL_offset sub_offset
The offset of the sub tensor from the origin sensor.
TTL_shape shape
The shape of the origin tensor in 3 dimensions.
A tensor plus its reference to its parent tensor.
TTL_tensor< TENSORTYPE > tensor
A poor mans base class for an a tensor in the passed address space.