OpenCL-TTL/html/TTL__simplex__scheme_8h_source.html

/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local void *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_void_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_void_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                 ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_void_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_void_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_void_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_void_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_void_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local void *int_base1, __local void *int_base2, __local void *int_base3, TTL_ext_void_tensor_t ext_tensor_in,

    TTL_ext_void_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_void_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_void_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_void_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_void_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_void_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_void_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_void_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                  simplex_buffer->next_exported_tile.shape,

                                                                  simplex_buffer->common.ext_tensor_out.layout,

                                                                  simplex_buffer->next_exported_tile.offset,

                                                                  simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_void_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_void_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_void_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local char *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_char_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_char_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                 ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_char_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_char_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_char_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_char_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_char_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local char *int_base1, __local char *int_base2, __local char *int_base3, TTL_ext_char_tensor_t ext_tensor_in,

    TTL_ext_char_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_char_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_char_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_char_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_char_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_char_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_char_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_char_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                  simplex_buffer->next_exported_tile.shape,

                                                                  simplex_buffer->common.ext_tensor_out.layout,

                                                                  simplex_buffer->next_exported_tile.offset,

                                                                  simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_char_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_char_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_char_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local uchar *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_uchar_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_uchar_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                  ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_uchar_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_uchar_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_uchar_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_uchar_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_uchar_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local uchar *int_base1, __local uchar *int_base2, __local uchar *int_base3, TTL_ext_uchar_tensor_t ext_tensor_in,

    TTL_ext_uchar_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_uchar_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_uchar_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_uchar_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_uchar_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_uchar_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_uchar_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_uchar_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                   simplex_buffer->next_exported_tile.shape,

                                                                   simplex_buffer->common.ext_tensor_out.layout,

                                                                   simplex_buffer->next_exported_tile.offset,

                                                                   simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_uchar_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_uchar_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_uchar_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local int *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_int_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_int_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_int_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_int_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_int_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_int_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_int_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local int *int_base1, __local int *int_base2, __local int *int_base3, TTL_ext_int_tensor_t ext_tensor_in,

    TTL_ext_int_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_int_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_int_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_int_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_int_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_int_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_int_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_int_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                 simplex_buffer->next_exported_tile.shape,

                                                                 simplex_buffer->common.ext_tensor_out.layout,

                                                                 simplex_buffer->next_exported_tile.offset,

                                                                 simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_int_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_int_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_int_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local uint *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_uint_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_uint_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                 ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_uint_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_uint_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_uint_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_uint_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_uint_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local uint *int_base1, __local uint *int_base2, __local uint *int_base3, TTL_ext_uint_tensor_t ext_tensor_in,

    TTL_ext_uint_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_uint_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_uint_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_uint_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_uint_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_uint_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_uint_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_uint_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                  simplex_buffer->next_exported_tile.shape,

                                                                  simplex_buffer->common.ext_tensor_out.layout,

                                                                  simplex_buffer->next_exported_tile.offset,

                                                                  simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_uint_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_uint_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_uint_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local short *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_short_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_short_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                  ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_short_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_short_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_short_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_short_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_short_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local short *int_base1, __local short *int_base2, __local short *int_base3, TTL_ext_short_tensor_t ext_tensor_in,

    TTL_ext_short_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_short_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_short_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_short_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_short_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_short_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_short_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_short_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                   simplex_buffer->next_exported_tile.shape,

                                                                   simplex_buffer->common.ext_tensor_out.layout,

                                                                   simplex_buffer->next_exported_tile.offset,

                                                                   simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_short_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_short_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_short_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local ushort *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_ushort_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_ushort_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                   ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_ushort_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_ushort_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_ushort_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_ushort_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_ushort_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local ushort *int_base1, __local ushort *int_base2, __local ushort *int_base3,

    TTL_ext_ushort_tensor_t ext_tensor_in, TTL_ext_ushort_tensor_t ext_tensor_out, TTL_event_t *event_in,

    TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_ushort_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_ushort_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_ushort_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_ushort_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_ushort_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_ushort_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_ushort_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                    simplex_buffer->next_exported_tile.shape,

                                                                    simplex_buffer->common.ext_tensor_out.layout,

                                                                    simplex_buffer->next_exported_tile.offset,

                                                                    simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_ushort_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_ushort_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_ushort_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local long *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_long_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_long_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                 ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_long_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_long_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_long_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_long_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_long_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local long *int_base1, __local long *int_base2, __local long *int_base3, TTL_ext_long_tensor_t ext_tensor_in,

    TTL_ext_long_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_long_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_long_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_long_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_long_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_long_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_long_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_long_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                  simplex_buffer->next_exported_tile.shape,

                                                                  simplex_buffer->common.ext_tensor_out.layout,

                                                                  simplex_buffer->next_exported_tile.offset,

                                                                  simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_long_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_long_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_long_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


/*

 * TTL_simplex_scheme.h

 *

 * Copyright (c) 2023 Mobileye

 *

 * Licensed under the Apache License, Version 2.0 (the License);

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an AS IS BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


// clang-format off

/**

 * @file

 *

 * TTL_simplex_buffering pipelines a pair of import and export transactions using

 * three internal buffers, in rotation: each buffer interchangeably serves as input

 * buffer and output buffer, such that in each iteration one buffer is used both to

 * export then import and two buffers are used by compute for reading and writing.

 *

 * With simplex buffering we're only waiting for previous iterations, so DMA

 * transactions run mostly in parallel to computation, but serially with each

 * other. Using the same buffer both for import and export is possible allowing us

 * to overlap exporting from and importing to the same buffer.

 *

 * The following table draws the pipelined actions performed in simplex buffering.

 * It specifies which tile is processed in each iteration:

 *

 * | Action\\Iteration | \#-1 | \#0 | \#1 | \#2 | \#i (2:NumOfTiles-2) | \#NumOfTiles-1 | \#NumOfTiles | \#NumOfTiles+1 |

 * |-------------------|------|-----|-----|-----|----------------------|----------------|--------------|----------------|

 * | **WaitExport**    |      |     |     | 0   | i-2                  | NumOfTiles-3   | NumOfTiles-2 | NumOfTiles-1   |

 * | **Export**        |      |     | 0   | 1   | i-1                  | NumOfTiles-2   | NumOfTiles-1 |                |

 * | **Wait Import**   |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 * | **Import**        | 0    | 1   | 2   | 3   | i+1                  |                |              |                |

 * | **Compute**       |      | 0   | 1   | 2   | i                    | NumOfTiles-1   |              |                |

 *

 * Notice the prolog (at iteration number -1) and the 2 epilogs (at iterations

 * number NumOfTiles and NumOfTiles+1) which add in total 3 extra iterations.

 *

 * @example TTL_simplex_buffering.cl

 */

// clang-format on


// This file presumes that the following have been pre included.

// this is not done here for path reasons.

// #include "TTL_core.h"

// #include "TTL_import_export.h"

// #include TTL_IMPORT_EXPORT_INCLUDE_H


/**

 * @def The structs used for this buffering type

 */

// TTL_simplex_buffering_t


typedef struct {

    struct {

        int index; /*!< Describes the current buffer index when pipelining. For single 0->1->0, for double

                 0->1->0->1... etc */

        __local ulong *int_base[3];            /*!< The internal base addresses of the pipelined tiles. */

        TTL_ext_ulong_tensor_t ext_tensor_in;  /*!< The external tensor being input */

        TTL_ext_ulong_tensor_t ext_tensor_out; /*!< The external tensor being output */

    } common;                                  ///< The information that is common to all pipeline schemes


    TTL_event_t *event_in;

    TTL_event_t *event_out;

    // Cache previous gotten tiles.

    TTL_tile_t next_exported_tile;

    TTL_int_ulong_sub_tensor_t int_prev_imported;  // Cache previously imported internal buffer.

} TTL_simplex_const_ulong_tensor_buffering_t;


/**

 * Simple declarations for file ordering purposes

 */

static inline TTL_io_ulong_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_ulong_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export);


/**

 * @brief Create a TTL_simplex_buffering_t and begin the buffering process

 *

 * @param int_base1 The address of the first buffer to be used in local memory

 * @param int_base2 The address of the second buffer to be used in local memory

 * @param int_base3 The address of the third buffer to be used in local memory

 * @param ext_tensor_in The external tensor to import the input data from

 * @param ext_tensor_out The external tensor to export the output data to

 * @param event_in A pointer to the event to use for the inward (external to

 * internal) transfer completion

 * @param event_out A pointer to the event to use for the inward (internal to

 * external) transfer completion

 * @param first_tile The first tile to fetch for the scheme

 *

 * Solid description of TTL_double_double_buffering_t buffering here

 *

 * @return The TTL_simplex_buffering_t created from the input parameters

 *

 * Example:

 * @code

 * TTL_event_t tb_e_in = TTL_get_event();

 * TTL_event_t tb_e_out = TTL_get_event();

 * TTL_simplex_buffering_t tb_scheme = TTL_start_simplex_buffering(

 *       ext_base_in, ext_base_out, l_buff1, l_buff2, l_buff3, ext_layout_in,

 *       ext_layout_out, &tb_e_in, &tb_e_out);

 * @endcode

 * \n

 *

 * This can be optimized and standardized using the TTL_step_buffering

 * call.

 *

 * @startuml

 *

 * start

 *

 *

 * stop

 *

 * @enduml

 *

 */


static inline TTL_simplex_const_ulong_tensor_buffering_t __attribute__((overloadable)) TTL_start_simplex_buffering(

    __local ulong *int_base1, __local ulong *int_base2, __local ulong *int_base3, TTL_ext_ulong_tensor_t ext_tensor_in,

    TTL_ext_ulong_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile) {

    TTL_simplex_const_ulong_tensor_buffering_t result;


    result.common.int_base[0] = int_base1;

    result.common.int_base[1] = int_base2;

    result.common.int_base[2] = int_base3;

    result.common.ext_tensor_in = ext_tensor_in;

    result.common.ext_tensor_out = ext_tensor_out;

    result.event_in = event_in;

    result.event_out = event_out;

    result.next_exported_tile = TTL_create_empty_tile();


    result.common.index = 0;


    result.int_prev_imported = TTL_create_empty_int_sub_tensor(int_base1);


    TTL_step_buffering(&result, first_tile, TTL_create_empty_tile());


    return result;

}


static inline TTL_io_ulong_tensor_t __attribute__((overloadable)) TTL_step_buffering(

    TTL_simplex_const_ulong_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import,

    TTL_tile_t tile_current_export) {

    // For performance, compute everything possible before waiting for the previous operations to finish. The current

    // index contains the tile that is to be exported, so prepare the structures before beginning the export and export.

    const TTL_layout_t next_import_layout =

        TTL_create_layout(tile_next_import.shape.width, tile_next_import.shape.height);

    const TTL_int_ulong_sub_tensor_t next_import_int_sub_tensor =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_next_import.shape,

                                  next_import_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_next_import.offset);

    const TTL_const_ext_ulong_tensor_t next_import_ext_tensor =

        TTL_create_const_ext_tensor(simplex_buffer->common.ext_tensor_in.base,

                                    tile_next_import.shape,

                                    simplex_buffer->common.ext_tensor_in.layout,

                                    tile_next_import.offset,

                                    simplex_buffer->common.ext_tensor_in.elem_size);


    const TTL_layout_t int_export_layout = TTL_create_layout(simplex_buffer->next_exported_tile.shape.width,

                                                             simplex_buffer->next_exported_tile.shape.height);

    const TTL_int_ulong_tensor_t int_export_tensor =

        TTL_create_int_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                              simplex_buffer->next_exported_tile.shape,

                              int_export_layout,

                              simplex_buffer->common.ext_tensor_out.elem_size);

    const TTL_ext_ulong_tensor_t export_to = TTL_create_ext_tensor(simplex_buffer->common.ext_tensor_out.base,

                                                                   simplex_buffer->next_exported_tile.shape,

                                                                   simplex_buffer->common.ext_tensor_out.layout,

                                                                   simplex_buffer->next_exported_tile.offset,

                                                                   simplex_buffer->common.ext_tensor_out.elem_size);


    // Wait for the previous (import/export)s to complete before starting the next.

    TTL_wait(1, simplex_buffer->event_out);

    TTL_wait(1, simplex_buffer->event_in);


    if (TTL_tile_empty(simplex_buffer->next_exported_tile) == false)

        TTL_export(*TTL_to_const_tensor(TTL_to_void_tensor(&int_export_tensor)),

                   *TTL_to_void_tensor(&export_to),

                   simplex_buffer->event_out);


    if (TTL_tile_empty(tile_next_import) == false)

        TTL_import_sub_tensor(*TTL_to_void_sub_tensor(&next_import_int_sub_tensor),

                              *TTL_to_void_tensor(TTL_to_const_tensor(&next_import_ext_tensor)),

                              simplex_buffer->event_in);


    // The import/export has been started for the current tile, Move to the next

    // tile.

    simplex_buffer->common.index =

        (simplex_buffer->common.index + 1) %

        (sizeof((simplex_buffer->common.int_base)) / sizeof((simplex_buffer->common.int_base)[0]));  // Write to.


    // Retrieve buffer imported previously to read from now.

    const TTL_int_ulong_sub_tensor_t int_curr_buff_in = simplex_buffer->int_prev_imported;

    simplex_buffer->int_prev_imported = next_import_int_sub_tensor;


    // Can write to out buffer according to size of curr_tile, rather than size

    // recently exported.

    const TTL_layout_t curr_int_layout =

        TTL_create_layout(tile_current_export.shape.width, tile_current_export.shape.width);

    const TTL_int_ulong_sub_tensor_t int_curr_buff_out =

        TTL_create_int_sub_tensor(simplex_buffer->common.int_base[simplex_buffer->common.index],

                                  tile_current_export.shape,

                                  curr_int_layout,

                                  *TTL_to_const_tensor(&simplex_buffer->common.ext_tensor_in),

                                  tile_current_export.offset);


    // Save last two tiles to prevent common repeated get_tile()'s.

    simplex_buffer->next_exported_tile = tile_current_export;


    return TTL_create_io_tensors(int_curr_buff_in, int_curr_buff_out);

}


static inline void __attribute__((overloadable)) TTL_finish_buffering(

    TTL_simplex_const_ulong_tensor_buffering_t *const simplex_buffering) {

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

    TTL_step_buffering(simplex_buffering, TTL_create_empty_tile(), TTL_create_empty_tile());

}


TTL_create_ext_tensor
static TTL_ext_void_tensor_t TTL_create_ext_tensor(__global void *base, const TTL_shape_t shape, const TTL_layout_t layout, const TTL_offset_t offset, const TTL_dim_t elem_size)
const and non-const tensor creation functions.
Definition TTL_int_ext_typed_tensors.h:76

TTL_to_const_tensor
static const TTL_const_ext_void_tensor_t * TTL_to_const_tensor(const TTL_ext_void_tensor_t *const tensor)
Definition TTL_int_ext_typed_tensors.h:101

TTL_create_int_tensor
static TTL_int_void_tensor_t TTL_create_int_tensor(__local void *base, const TTL_shape_t shape, const TTL_layout_t layout, const TTL_offset_t offset, const TTL_dim_t elem_size)
const and non-const tensor creation functions.
Definition TTL_int_ext_typed_tensors.h:7312

TTL_create_int_sub_tensor
static TTL_int_void_sub_tensor_t TTL_create_int_sub_tensor(__local void *base, const TTL_shape_t shape, const TTL_layout_t layout, const TTL_dim_t elem_size, const TTL_offset_t offset, const TTL_shape_t origin_shape, TTL_offset_t origin_offset)
const and non-const sub tensor creation functions.
Definition TTL_int_ext_typed_tensors.h:7714

TTL_to_void_tensor
static const TTL_ext_void_tensor_t * TTL_to_void_tensor(const TTL_ext_void_tensor_t *tensor)
Definition TTL_int_ext_typed_tensors.h:120

TTL_create_empty_int_sub_tensor
static TTL_int_void_sub_tensor_t TTL_create_empty_int_sub_tensor(__local void *unused)
Definition TTL_int_ext_typed_tensors.h:7794

TTL_to_void_sub_tensor
static const TTL_ext_void_sub_tensor_t * TTL_to_void_sub_tensor(const TTL_ext_void_sub_tensor_t *tensor)
Definition TTL_int_ext_typed_tensors.h:518

TTL_create_const_ext_tensor
static TTL_const_ext_void_tensor_t TTL_create_const_ext_tensor(__global const void *base, const TTL_shape_t shape, const TTL_layout_t layout, const TTL_offset_t offset, const TTL_dim_t elem_size)
create TTL_create_int_tensor_impl
Definition TTL_int_ext_typed_tensors.h:173

TTL_create_io_tensors
static TTL_io_void_tensor_t TTL_create_io_tensors(TTL_int_void_sub_tensor_t imported_to, TTL_int_void_sub_tensor_t to_export_from)
Create a TTL_io_tensors_t from a pair of tensors.
Definition TTL_schemes_common.h:78

TTL_step_buffering
static TTL_io_void_tensor_t TTL_step_buffering(TTL_simplex_const_void_tensor_buffering_t *const simplex_buffer, TTL_tile_t tile_next_import, TTL_tile_t tile_current_export)
Definition TTL_simplex_scheme.h:148

TTL_finish_buffering
static void TTL_finish_buffering(TTL_simplex_const_void_tensor_buffering_t *const simplex_buffering)
Definition TTL_simplex_scheme.h:222

TTL_start_simplex_buffering
static TTL_simplex_const_void_tensor_buffering_t TTL_start_simplex_buffering(__local void *int_base1, __local void *int_base2, __local void *int_base3, TTL_ext_void_tensor_t ext_tensor_in, TTL_ext_void_tensor_t ext_tensor_out, TTL_event_t *event_in, TTL_event_t *event_out, TTL_tile_t first_tile)
Create a TTL_simplex_buffering_t and begin the buffering process.
Definition TTL_simplex_scheme.h:125

TTL_create_layout
static TTL_layout_t TTL_create_layout(void)
Create a 1D Description of a Tensor layout in memory.
Definition TTL_tensors_common.h:96

TTL_tile_empty
static int TTL_tile_empty(TTL_tile_t tile)
Check if the tile passed is empty.
Definition TTL_tiles.h:257

TTL_create_empty_tile
static TTL_tile_t TTL_create_empty_tile()
Create an empty tile. Empty means it has all dimensions set to zero.
Definition TTL_tiles.h:267

TTL_import_sub_tensor
static void TTL_import_sub_tensor(const TTL_int_void_sub_tensor_t internal_sub_tensor, const TTL_const_ext_void_tensor_t const_external_tensor, TTL_event_t *event)
Implementation of TTL_import_sub_tensor.
Definition TTL_typed_import_export.h:86

TTL_export
static void TTL_export(const TTL_const_int_void_tensor_t internal_tensor, const TTL_ext_void_tensor_t external_tensor, TTL_event_t *event)
Export the external tensor to the internal tensor returning when complete.
Definition TTL_typed_import_export.h:115

TTL_event_t
event_t TTL_event_t
TTL_event_t is a pseudonym for OpenCL event_t.
Definition c/TTL_import_export.h:33

__local
#define __local
The opencl __local namespace is not supported in C.
Definition c/TTL_types.h:27

uchar
unsigned char uchar
opencl and so TTL supports a type called uchar which is not part of C
Definition c/TTL_types.h:25

ulong
unsigned long ulong
OpenCL supports ulong so provide the same in c.
Definition c/TTL_types.h:32

uint
unsigned int uint
OpenCL supports uint so provide the same in c.
Definition c/TTL_types.h:30

ushort
unsigned short ushort
OpenCL supports ushort so provide the same in c.
Definition c/TTL_types.h:31

TTL_wait
static void TTL_wait(const int num_events, TTL_event_t *const events)
Definition opencl/TTL_import_export.h:41

TTL_const_ext_char_tensor_t
Definition TTL_int_ext_typed_tensors.h:853

TTL_const_ext_int_tensor_t
Definition TTL_int_ext_typed_tensors.h:2461

TTL_const_ext_long_tensor_t
Definition TTL_int_ext_typed_tensors.h:5677

TTL_const_ext_short_tensor_t
Definition TTL_int_ext_typed_tensors.h:4068

TTL_const_ext_uchar_tensor_t
Definition TTL_int_ext_typed_tensors.h:1657

TTL_const_ext_uint_tensor_t
Definition TTL_int_ext_typed_tensors.h:3264

TTL_const_ext_ulong_tensor_t
Definition TTL_int_ext_typed_tensors.h:6481

TTL_const_ext_ushort_tensor_t
Definition TTL_int_ext_typed_tensors.h:4872

TTL_const_ext_void_tensor_t
Definition TTL_int_ext_typed_tensors.h:49

TTL_ext_char_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:847

TTL_ext_char_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:850

TTL_ext_char_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:849

TTL_ext_char_tensor_t::base
__global char * base
Definition TTL_int_ext_typed_tensors.h:848

TTL_ext_int_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:2455

TTL_ext_int_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:2458

TTL_ext_int_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:2457

TTL_ext_int_tensor_t::base
__global int * base
Definition TTL_int_ext_typed_tensors.h:2456

TTL_ext_long_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:5671

TTL_ext_long_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:5674

TTL_ext_long_tensor_t::base
__global long * base
Definition TTL_int_ext_typed_tensors.h:5672

TTL_ext_long_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:5673

TTL_ext_short_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:4062

TTL_ext_short_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:4064

TTL_ext_short_tensor_t::base
__global short * base
Definition TTL_int_ext_typed_tensors.h:4063

TTL_ext_short_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:4065

TTL_ext_uchar_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:1651

TTL_ext_uchar_tensor_t::base
__global uchar * base
Definition TTL_int_ext_typed_tensors.h:1652

TTL_ext_uchar_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:1654

TTL_ext_uchar_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:1653

TTL_ext_uint_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:3258

TTL_ext_uint_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:3261

TTL_ext_uint_tensor_t::base
__global uint * base
Definition TTL_int_ext_typed_tensors.h:3259

TTL_ext_uint_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:3260

TTL_ext_ulong_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:6475

TTL_ext_ulong_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:6478

TTL_ext_ulong_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:6477

TTL_ext_ulong_tensor_t::base
__global ulong * base
Definition TTL_int_ext_typed_tensors.h:6476

TTL_ext_ushort_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:4866

TTL_ext_ushort_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:4868

TTL_ext_ushort_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:4869

TTL_ext_ushort_tensor_t::base
__global ushort * base
Definition TTL_int_ext_typed_tensors.h:4867

TTL_ext_void_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:43

TTL_ext_void_tensor_t::layout
TTL_layout_t layout
Definition TTL_int_ext_typed_tensors.h:46

TTL_ext_void_tensor_t::elem_size
TTL_dim_t elem_size
Definition TTL_int_ext_typed_tensors.h:45

TTL_ext_void_tensor_t::base
__global void * base
Definition TTL_int_ext_typed_tensors.h:44

TTL_int_char_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:8462

TTL_int_char_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:8083

TTL_int_int_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:10070

TTL_int_int_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:9691

TTL_int_long_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:13286

TTL_int_long_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:12907

TTL_int_short_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:11677

TTL_int_short_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:11298

TTL_int_uchar_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:9266

TTL_int_uchar_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:8887

TTL_int_uint_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:10873

TTL_int_uint_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:10494

TTL_int_ulong_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:14090

TTL_int_ulong_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:13711

TTL_int_ushort_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:12482

TTL_int_ushort_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:12102

TTL_int_void_sub_tensor_t
const and non-const sub tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:7658

TTL_int_void_tensor_t
const and non-const tensors in the appropriate address space
Definition TTL_int_ext_typed_tensors.h:7279

TTL_io_char_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:132

TTL_io_int_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:266

TTL_io_long_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:534

TTL_io_short_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:400

TTL_io_uchar_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:199

TTL_io_uint_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:333

TTL_io_ulong_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:601

TTL_io_ushort_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:467

TTL_io_void_tensor_t
Describes a pair of internal Tensors after an operation.
Definition TTL_schemes_common.h:65

TTL_layout_t
Description of a Tensor layout in memory.
Definition TTL_tensors_common.h:50

TTL_shape_t::width
TTL_dim_t width
Number of elements along dimension x.
Definition tensors/TTL_types.h:31

TTL_shape_t::height
TTL_dim_t height
Number of rows along dimension y.
Definition tensors/TTL_types.h:32

TTL_simplex_const_char_tensor_buffering_t
Definition TTL_simplex_scheme.h:287

TTL_simplex_const_char_tensor_buffering_t::ext_tensor_out
TTL_ext_char_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:293

TTL_simplex_const_char_tensor_buffering_t::int_prev_imported
TTL_int_char_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:300

TTL_simplex_const_char_tensor_buffering_t::common
struct TTL_simplex_const_char_tensor_buffering_t::@151364276074235233353215211004324345371371234176 common
The information that is common to all pipeline schemes.

TTL_simplex_const_char_tensor_buffering_t::int_base
__local char * int_base[3]
Definition TTL_simplex_scheme.h:291

TTL_simplex_const_char_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:289

TTL_simplex_const_char_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:299

TTL_simplex_const_char_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:296

TTL_simplex_const_char_tensor_buffering_t::ext_tensor_in
TTL_ext_char_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:292

TTL_simplex_const_char_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:297

TTL_simplex_const_int_tensor_buffering_t
Definition TTL_simplex_scheme.h:739

TTL_simplex_const_int_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:749

TTL_simplex_const_int_tensor_buffering_t::int_prev_imported
TTL_int_int_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:752

TTL_simplex_const_int_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:751

TTL_simplex_const_int_tensor_buffering_t::int_base
__local int * int_base[3]
Definition TTL_simplex_scheme.h:743

TTL_simplex_const_int_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:741

TTL_simplex_const_int_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:748

TTL_simplex_const_int_tensor_buffering_t::ext_tensor_out
TTL_ext_int_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:745

TTL_simplex_const_int_tensor_buffering_t::common
struct TTL_simplex_const_int_tensor_buffering_t::@253313373052213322053276016274213340172365001033 common
The information that is common to all pipeline schemes.

TTL_simplex_const_int_tensor_buffering_t::ext_tensor_in
TTL_ext_int_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:744

TTL_simplex_const_long_tensor_buffering_t
Definition TTL_simplex_scheme.h:1644

TTL_simplex_const_long_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:1646

TTL_simplex_const_long_tensor_buffering_t::int_prev_imported
TTL_int_long_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:1657

TTL_simplex_const_long_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:1656

TTL_simplex_const_long_tensor_buffering_t::common
struct TTL_simplex_const_long_tensor_buffering_t::@127064357345325112075046301011065216033014104341 common
The information that is common to all pipeline schemes.

TTL_simplex_const_long_tensor_buffering_t::ext_tensor_out
TTL_ext_long_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:1650

TTL_simplex_const_long_tensor_buffering_t::int_base
__local long * int_base[3]
Definition TTL_simplex_scheme.h:1648

TTL_simplex_const_long_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:1654

TTL_simplex_const_long_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:1653

TTL_simplex_const_long_tensor_buffering_t::ext_tensor_in
TTL_ext_long_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:1649

TTL_simplex_const_short_tensor_buffering_t
Definition TTL_simplex_scheme.h:1191

TTL_simplex_const_short_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:1203

TTL_simplex_const_short_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:1201

TTL_simplex_const_short_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:1193

TTL_simplex_const_short_tensor_buffering_t::ext_tensor_in
TTL_ext_short_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:1196

TTL_simplex_const_short_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:1200

TTL_simplex_const_short_tensor_buffering_t::int_prev_imported
TTL_int_short_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:1204

TTL_simplex_const_short_tensor_buffering_t::ext_tensor_out
TTL_ext_short_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:1197

TTL_simplex_const_short_tensor_buffering_t::common
struct TTL_simplex_const_short_tensor_buffering_t::@115272321114137123101225013316021153121033257075 common
The information that is common to all pipeline schemes.

TTL_simplex_const_short_tensor_buffering_t::int_base
__local short * int_base[3]
Definition TTL_simplex_scheme.h:1195

TTL_simplex_const_uchar_tensor_buffering_t
Definition TTL_simplex_scheme.h:513

TTL_simplex_const_uchar_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:525

TTL_simplex_const_uchar_tensor_buffering_t::int_base
__local uchar * int_base[3]
Definition TTL_simplex_scheme.h:517

TTL_simplex_const_uchar_tensor_buffering_t::common
struct TTL_simplex_const_uchar_tensor_buffering_t::@223221137317077266265375173261160253237074234026 common
The information that is common to all pipeline schemes.

TTL_simplex_const_uchar_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:522

TTL_simplex_const_uchar_tensor_buffering_t::ext_tensor_in
TTL_ext_uchar_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:518

TTL_simplex_const_uchar_tensor_buffering_t::ext_tensor_out
TTL_ext_uchar_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:519

TTL_simplex_const_uchar_tensor_buffering_t::int_prev_imported
TTL_int_uchar_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:526

TTL_simplex_const_uchar_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:515

TTL_simplex_const_uchar_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:523

TTL_simplex_const_uint_tensor_buffering_t
Definition TTL_simplex_scheme.h:965

TTL_simplex_const_uint_tensor_buffering_t::int_base
__local uint * int_base[3]
Definition TTL_simplex_scheme.h:969

TTL_simplex_const_uint_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:977

TTL_simplex_const_uint_tensor_buffering_t::ext_tensor_out
TTL_ext_uint_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:971

TTL_simplex_const_uint_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:975

TTL_simplex_const_uint_tensor_buffering_t::ext_tensor_in
TTL_ext_uint_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:970

TTL_simplex_const_uint_tensor_buffering_t::common
struct TTL_simplex_const_uint_tensor_buffering_t::@060015144121104020011051356200213205333132324000 common
The information that is common to all pipeline schemes.

TTL_simplex_const_uint_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:974

TTL_simplex_const_uint_tensor_buffering_t::int_prev_imported
TTL_int_uint_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:978

TTL_simplex_const_uint_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:967

TTL_simplex_const_ulong_tensor_buffering_t
Definition TTL_simplex_scheme.h:1870

TTL_simplex_const_ulong_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:1872

TTL_simplex_const_ulong_tensor_buffering_t::ext_tensor_out
TTL_ext_ulong_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:1876

TTL_simplex_const_ulong_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:1882

TTL_simplex_const_ulong_tensor_buffering_t::int_base
__local ulong * int_base[3]
Definition TTL_simplex_scheme.h:1874

TTL_simplex_const_ulong_tensor_buffering_t::int_prev_imported
TTL_int_ulong_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:1883

TTL_simplex_const_ulong_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:1879

TTL_simplex_const_ulong_tensor_buffering_t::ext_tensor_in
TTL_ext_ulong_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:1875

TTL_simplex_const_ulong_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:1880

TTL_simplex_const_ulong_tensor_buffering_t::common
struct TTL_simplex_const_ulong_tensor_buffering_t::@040214027170156123057066024141166173057051310055 common
The information that is common to all pipeline schemes.

TTL_simplex_const_ushort_tensor_buffering_t
Definition TTL_simplex_scheme.h:1417

TTL_simplex_const_ushort_tensor_buffering_t::int_prev_imported
TTL_int_ushort_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:1430

TTL_simplex_const_ushort_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:1427

TTL_simplex_const_ushort_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:1426

TTL_simplex_const_ushort_tensor_buffering_t::int_base
__local ushort * int_base[3]
Definition TTL_simplex_scheme.h:1421

TTL_simplex_const_ushort_tensor_buffering_t::ext_tensor_out
TTL_ext_ushort_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:1423

TTL_simplex_const_ushort_tensor_buffering_t::ext_tensor_in
TTL_ext_ushort_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:1422

TTL_simplex_const_ushort_tensor_buffering_t::common
struct TTL_simplex_const_ushort_tensor_buffering_t::@174153011163350113115334035274116027242307070323 common
The information that is common to all pipeline schemes.

TTL_simplex_const_ushort_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:1419

TTL_simplex_const_ushort_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:1429

TTL_simplex_const_void_tensor_buffering_t
Definition TTL_simplex_scheme.h:61

TTL_simplex_const_void_tensor_buffering_t::ext_tensor_out
TTL_ext_void_tensor_t ext_tensor_out
Definition TTL_simplex_scheme.h:67

TTL_simplex_const_void_tensor_buffering_t::event_out
TTL_event_t * event_out
Definition TTL_simplex_scheme.h:71

TTL_simplex_const_void_tensor_buffering_t::int_prev_imported
TTL_int_void_sub_tensor_t int_prev_imported
Definition TTL_simplex_scheme.h:74

TTL_simplex_const_void_tensor_buffering_t::next_exported_tile
TTL_tile_t next_exported_tile
Definition TTL_simplex_scheme.h:73

TTL_simplex_const_void_tensor_buffering_t::int_base
__local void * int_base[3]
Definition TTL_simplex_scheme.h:65

TTL_simplex_const_void_tensor_buffering_t::index
int index
Definition TTL_simplex_scheme.h:63

TTL_simplex_const_void_tensor_buffering_t::ext_tensor_in
TTL_ext_void_tensor_t ext_tensor_in
Definition TTL_simplex_scheme.h:66

TTL_simplex_const_void_tensor_buffering_t::event_in
TTL_event_t * event_in
Definition TTL_simplex_scheme.h:70

TTL_simplex_const_void_tensor_buffering_t::common
struct TTL_simplex_const_void_tensor_buffering_t::@216377303123235236211140053033115236170055361077 common
The information that is common to all pipeline schemes.

TTL_tile_t
Definition TTL_tiles.h:124

TTL_tile_t::offset
TTL_offset_t offset
Definition TTL_tiles.h:126

TTL_tile_t::shape
TTL_shape_t shape
Definition TTL_tiles.h:125