aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/CLKernels.h1
-rw-r--r--arm_compute/core/CL/kernels/CLTileKernel.h78
-rw-r--r--arm_compute/core/Types.h3
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h10
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLTile.h57
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/cl_kernels/tile.cl97
-rw-r--r--src/core/CL/kernels/CLTileKernel.cpp152
-rw-r--r--src/runtime/CL/functions/CLTile.cpp42
-rw-r--r--tests/validation/CL/Tile.cpp141
-rw-r--r--tests/validation/fixtures/TileFixture.h106
-rw-r--r--tests/validation/reference/Tile.cpp76
-rw-r--r--tests/validation/reference/Tile.h44
-rw-r--r--utils/TypePrinter.h31
15 files changed, 844 insertions, 0 deletions
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index 1457d7b3ac..a045322353 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -125,6 +125,7 @@
#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
+#include "arm_compute/core/CL/kernels/CLTileKernel.h"
#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/arm_compute/core/CL/kernels/CLTileKernel.h
new file mode 100644
index 0000000000..e575b7bcb6
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLTileKernel.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTILEKERNEL_H__
+#define __ARM_COMPUTE_CLTILEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a Tile operation */
+class CLTileKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLTileKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTileKernel(const CLTileKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTileKernel &operator=(const CLTileKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTileKernel(CLTileKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTileKernel &operator=(CLTileKernel &&) = default;
+ /** Default destructor */
+ ~CLTileKernel() = default;
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+ * @param[out] output Destination tensor. Same as @p input
+ *
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+ * @param[in] output Destination tensor info. Same as @p input
+ *
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTILEKERNEL_H__ */
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 9f3857c6cd..d6122a683f 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -479,6 +479,9 @@ using PaddingInfo = std::pair<uint32_t, uint32_t>;
/** List of padding information */
using PaddingList = std::vector<PaddingInfo>;
+/** Information to produce a tiled version of a Tensor */
+using Multiples = std::vector<uint32_t>;
+
/** Region of interest */
struct ROI
{
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index da9ff56fd0..38906dfc9b 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -671,6 +671,16 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa
return padded_shape;
}
+inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples)
+{
+ TensorShape tiled_shape = input_shape;
+ for(size_t dim = 0; dim < multiples.size(); ++dim)
+ {
+ tiled_shape.set(dim, input_shape[dim] * multiples[dim]);
+ }
+ return tiled_shape;
+}
+
inline TensorShape compute_upsample_shape(const ITensorInfo &input, const Size2D &info)
{
const DataLayout data_layout = input.data_layout();
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index b4c5c2eb80..b00270cd62 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -128,6 +128,7 @@
#include "arm_compute/runtime/CL/functions/CLStridedSlice.h"
#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
#include "arm_compute/runtime/CL/functions/CLThreshold.h"
+#include "arm_compute/runtime/CL/functions/CLTile.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
#include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h"
#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h
new file mode 100644
index 0000000000..3e88896d24
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLTile.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTILE_H__
+#define __ARM_COMPUTE_CLTILE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLTileKernel */
+class CLTile : public ICLSimpleFunction
+{
+public:
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * @param[out] output Destination tensor. Same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLTile
+ *
+ * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * @param[in] output Destination tensor info. Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLTILE_H__ */
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 12944061a9..6e5e97e3e1 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -410,6 +410,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "tablelookup_S16", "tablelookup.cl" },
{ "threshold_binary", "threshold.cl" },
{ "threshold_range", "threshold.cl" },
+ { "tile", "tile.cl" },
{ "transpose", "transpose.cl" },
{ "UYVY422_to_IYUV_bt709", "color_convert.cl" },
{ "UYVY422_to_NV12_bt709", "color_convert.cl" },
@@ -849,6 +850,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/threshold.clembed"
},
{
+ "tile.cl",
+#include "./cl_kernels/tile.clembed"
+ },
+ {
"transpose.cl",
#include "./cl_kernels/transpose.clembed"
},
diff --git a/src/core/CL/cl_kernels/tile.cl b/src/core/CL/cl_kernels/tile.cl
new file mode 100644
index 0000000000..ae625d99b1
--- /dev/null
+++ b/src/core/CL/cl_kernels/tile.cl
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+#if defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(SRC_DEPTH) && defined(DST_DEPTH)
+/** Perform a floor operation on an input tensor.
+ *
+ * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note Can only take floating point data types.
+ *
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
+ * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
+ * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void tile(
+ TENSOR4D_DECLARATION(input),
+ TENSOR4D_DECLARATION(output))
+{
+ Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
+ Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH);
+
+ // For all coordinates but x, each tile copies from the input
+ const int y = get_global_id(1);
+ const int z = get_global_id(2) % DST_DEPTH;
+ const int batch = get_global_id(2) / DST_DEPTH;
+
+#if defined(VEC_SIZE) && defined(OFFSET)
+ // If we are loading/storing multiple elements at time, we need to
+ // not exceed the input boundaries. The last threads need to backtrack
+ // of OFFSET elements. Those elements cumulates for previous tiles
+ const int id = (int)(get_global_id(0));
+ int x = id * VEC_SIZE;
+
+ // Shift x based on the previous offsets
+ const int tile_number = x / SRC_WIDTH;
+ x -= (tile_number) * OFFSET;
+ int x_input = x % SRC_WIDTH;
+
+ // Shift x based on being the last tile
+ const int last_tile = (int)(x_input + VEC_SIZE > SRC_WIDTH);
+ x -= last_tile * OFFSET;
+ x_input = x % SRC_WIDTH;
+ output.ptr -= (tile_number + last_tile) * OFFSET * output_stride_x;
+
+ // Update the input pointer
+ input.ptr = tensor4D_offset(&input, x_input, y % SRC_HEIGHT, z % SRC_DEPTH, batch % SRC_BATCHES);
+
+ // Copy the data
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
+
+ VSTORE(VEC_SIZE)
+ (data, 0, (__global DATA_TYPE *)output.ptr);
+#else // !defined(VEC_SIZE) || !defined(OFFSET)
+ const int x = get_global_id(0);
+
+ // Update the input pointer
+ input.ptr = tensor4D_offset(&input, x % SRC_WIDTH, y % SRC_HEIGHT, z % SRC_DEPTH, batch % SRC_BATCHES);
+
+ *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)(input.ptr));
+#endif // defined(VEC_SIZE) && defined(OFFSET)
+}
+#endif // defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(SRC_DEPTH) && defined(DST_DEPTH)
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp
new file mode 100644
index 0000000000..7559e7ae72
--- /dev/null
+++ b/src/core/CL/kernels/CLTileKernel.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLTileKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(multiples.size() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(multiples.empty());
+ ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e)
+ {
+ return e == 0;
+ }));
+
+ // Validate output if initialized
+ if(output->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ }
+
+ return Status{};
+}
+} // namespace
+
+CLTileKernel::CLTileKernel()
+ : _input(nullptr), _output(nullptr)
+{
+}
+
+void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ // Auto initialize output
+ TensorShape tiled_shape = misc::shape_calculator::compute_tiled_shape(input->info()->tensor_shape(), multiples);
+ auto_init_if_empty(*output->info(), tiled_shape, 1, input->info()->data_type());
+
+ // Validate
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), multiples));
+
+ _input = input;
+ _output = output;
+
+ const DataType data_type = input->info()->data_type();
+ const int vec_size_x = 16 / input->info()->element_size();
+ const int input_width_x = input->info()->tensor_shape().x();
+ const unsigned int offset = ceil_to_multiple(input_width_x, vec_size_x) - input_width_x;
+ const bool multi_access_x = (input_width_x / vec_size_x > 0);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+ build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width_x));
+ build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
+ build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
+ build_opts.add_option("-DSRC_BATCHES=" + support::cpp11::to_string(input->info()->dimension(3)));
+ build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(2)));
+ build_opts.add_option_if(multi_access_x, "-DOFFSET=" + support::cpp11::to_string(offset));
+ build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("tile", build_opts.options()));
+
+ // Configure window without padding
+ Window win = calculate_max_window(*output->info());
+
+ if(multi_access_x)
+ {
+ // If multi-access is enabled, no thread should cross the tile boundaries. This means we need
+ // as many threads as those to cover a single tile times multiples[0]. Note that if threads
+ // do not cross the boundaries of the tiles, they won't cross the boundaries of the last tile, and
+ // we don't need to pad the output
+ const unsigned int size_win_x = ceil_to_multiple(input->info()->dimension(0), vec_size_x) * multiples[0];
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), size_win_x, vec_size_x));
+ }
+
+ ICLKernel::configure_internal(win);
+
+ // Set config_id for enabling LWS tuning
+ _config_id = "tile";
+ _config_id += "_";
+ _config_id += lower_string(string_from_data_type(input->info()->data_type()));
+ for(unsigned int i = 0; i < multiples.size(); ++i)
+ {
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(input->info()->dimension(i));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(multiples[i]);
+ }
+}
+
+Status CLTileKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, multiples));
+ return Status{};
+}
+
+void CLTileKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = collapsed.first_slice_window_4D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, _input, slice);
+ add_4D_tensor_argument(idx, _output, slice);
+ enqueue(queue, *this, slice);
+ }
+ while(collapsed.slide_window_slice_4D(slice));
+}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLTile.cpp b/src/runtime/CL/functions/CLTile.cpp
new file mode 100644
index 0000000000..ec6a4abb6a
--- /dev/null
+++ b/src/runtime/CL/functions/CLTile.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLTile.h"
+
+#include "arm_compute/core/CL/kernels/CLTileKernel.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+void CLTile::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLTileKernel>();
+ k->configure(input, output, multiples);
+ _kernel = std::move(k);
+}
+
+Status CLTile::validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples)
+{
+ return CLTileKernel::validate(input, output, multiples);
+}
+} // namespace arm_compute
diff --git a/tests/validation/CL/Tile.cpp b/tests/validation/CL/Tile.cpp
new file mode 100644
index 0000000000..cb8590c212
--- /dev/null
+++ b/tests/validation/CL/Tile.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLTile.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/TileFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto MultiplesDataset = framework::dataset::make("Multiples", { Multiples{ 3 },
+ Multiples{ 2, 2 },
+ Multiples{ 2, 3, 4, 5 },
+ Multiples{ 2, 1, 2, 2 },
+ Multiples{ 2, 1, 3 },
+ Multiples{ 3, 3, 3 },
+ Multiples{ 2, 2, 2 }
+ });
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(Tile)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(10, 10), 1, DataType::F32),
+ TensorInfo(TensorShape(10, 10), 1, DataType::F32), // Mismatching shape
+ TensorInfo(TensorShape(10, 10), 1, DataType::F16), // Mismatching type
+ TensorInfo(TensorShape(10, 10), 1, DataType::F32)}), // Wrong multiples
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(10, 20), 1, DataType::F32),
+ TensorInfo(TensorShape(20, 20), 1, DataType::F32),
+ TensorInfo(TensorShape(20, 20), 1, DataType::F32),
+ TensorInfo(TensorShape(10, 20), 1, DataType::F32)})),
+ framework::dataset::make("Multiples",{ Multiples{1, 2}, Multiples{1, 2}, Multiples{0, 1} })),
+ framework::dataset::make("Expected", {true, false, false, false })),
+ input_info, output_info, multiples, expected)
+{
+ const Status status = CLTile::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), multiples);
+ ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLTileFixture = TileValidationFixture<CLTensor, CLAccessor, CLTile, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTileFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)),
+ MultiplesDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLTileFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), MultiplesDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTileFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)),
+ MultiplesDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLTileFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)),
+ MultiplesDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Integer)
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTileFixture<int8_t>, framework::DatasetMode::ALL,
+ combine(
+ combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::S8 })),
+ MultiplesDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S8
+TEST_SUITE_END() // Integer
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTileFixture<uint8_t>, framework::DatasetMode::ALL,
+ combine(
+ combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })),
+ MultiplesDataset))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END() // Tile
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/TileFixture.h b/tests/validation/fixtures/TileFixture.h
new file mode 100644
index 0000000000..cb70a6c160
--- /dev/null
+++ b/tests/validation/fixtures/TileFixture.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_TILE_FIXTURE
+#define ARM_COMPUTE_TEST_TILE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Tile.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class TileValidationFixture : public framework::Fixture
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, DataType data_type, const Multiples &multiples)
+ {
+ _target = compute_target(shape, data_type, multiples);
+ _reference = compute_reference(shape, data_type, multiples);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ library->fill_tensor_uniform(tensor, 0);
+ }
+
+ TensorType compute_target(const TensorShape &shape, DataType data_type, const Multiples &multiples)
+ {
+ // Create tensors
+ TensorType src = create_tensor<TensorType>(shape, data_type);
+ TensorType dst;
+
+ // Create and configure function
+ FunctionType tile_func;
+ tile_func.configure(&src, &dst, multiples);
+
+ ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Fill tensors
+ fill(AccessorType(src));
+
+ // Compute function
+ tile_func.run();
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, const Multiples &multiples)
+ {
+ // Create reference
+ SimpleTensor<T> src{ shape, data_type };
+
+ // Fill reference
+ fill(src);
+
+ return reference::tile<T>(src, multiples);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_TILE_FIXTURE */
diff --git a/tests/validation/reference/Tile.cpp b/tests/validation/reference/Tile.cpp
new file mode 100644
index 0000000000..e87e515a51
--- /dev/null
+++ b/tests/validation/reference/Tile.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Tile.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> tile(const SimpleTensor<T> &src, const Multiples &multiples)
+{
+ // Create reference
+ const auto src_shape = src.shape();
+ const auto tiled_shape = misc::shape_calculator::compute_tiled_shape(src.shape(), multiples);
+
+ SimpleTensor<T> dst{ tiled_shape, src.data_type() };
+
+ for(int idx = 0; idx < dst.num_elements(); idx++)
+ {
+ Coordinates coord = index2coord(tiled_shape, idx);
+
+ const size_t x = coord.x();
+ const size_t y = coord.y();
+ const size_t z = coord.z();
+ const size_t w = coord[3];
+
+ Coordinates src_coords{ x % src_shape[0], y % src_shape[1], z % src_shape[2], w % src_shape[3] };
+ int src_idx = coord2index(src_shape, src_coords);
+
+ dst[idx] = src[src_idx];
+ }
+
+ return dst;
+}
+
+template SimpleTensor<uint8_t> tile(const SimpleTensor<uint8_t> &src, const Multiples &multiples);
+template SimpleTensor<int8_t> tile(const SimpleTensor<int8_t> &src, const Multiples &multiples);
+template SimpleTensor<uint16_t> tile(const SimpleTensor<uint16_t> &src, const Multiples &multiples);
+template SimpleTensor<int16_t> tile(const SimpleTensor<int16_t> &src, const Multiples &multiples);
+template SimpleTensor<uint32_t> tile(const SimpleTensor<uint32_t> &src, const Multiples &multiples);
+template SimpleTensor<int32_t> tile(const SimpleTensor<int32_t> &src, const Multiples &multiples);
+template SimpleTensor<half> tile(const SimpleTensor<half> &src, const Multiples &multiples);
+template SimpleTensor<float> tile(const SimpleTensor<float> &src, const Multiples &multiples);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Tile.h b/tests/validation/reference/Tile.h
new file mode 100644
index 0000000000..b8d15f639c
--- /dev/null
+++ b/tests/validation/reference/Tile.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TILE_H__
+#define __ARM_COMPUTE_TEST_TILE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> tile(const SimpleTensor<T> &src, const Multiples &multiples);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_TILE_H__ */
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index d089a5b7ea..52bda2cfb3 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -882,6 +882,24 @@ inline ::std::ostream &operator<<(::std::ostream &os, const PaddingList &padding
return os;
}
+/** Formatted output of the Multiples type.
+ *
+ * @param[out] os Output stream.
+ * @param[in] multiples Type to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const Multiples &multiples)
+{
+ os << "(";
+ for(size_t i = 0; i < multiples.size() - 1; i++)
+ {
+ os << multiples[i] << ", ";
+ }
+ os << multiples.back() << ")";
+ return os;
+}
+
/** Formatted output of the InterpolationPolicy type.
*
* @param[out] os Output stream.
@@ -1234,6 +1252,19 @@ inline std::string to_string(const PaddingList &padding)
return str.str();
}
+/** Formatted output of the Multiples type.
+ *
+ * @param[in] multiples Type to output.
+ *
+ * @return Formatted string.
+ */
+inline std::string to_string(const Multiples &multiples)
+{
+ std::stringstream str;
+ str << multiples;
+ return str.str();
+}
+
/** Formatted output of the InterpolationPolicy type.
*
* @param[in] policy Type to output.