aboutsummaryrefslogtreecommitdiff
path: root/src/core/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/gpu')
-rw-r--r--src/core/gpu/cl/kernels/ClCopyKernel.cpp170
-rw-r--r--src/core/gpu/cl/kernels/ClCopyKernel.h71
-rw-r--r--src/core/gpu/cl/kernels/ClCropKernel.cpp137
-rw-r--r--src/core/gpu/cl/kernels/ClCropKernel.h98
-rw-r--r--src/core/gpu/cl/kernels/ClFillKernel.cpp122
-rw-r--r--src/core/gpu/cl/kernels/ClFillKernel.h77
-rw-r--r--src/core/gpu/cl/kernels/ClPermuteKernel.cpp157
-rw-r--r--src/core/gpu/cl/kernels/ClPermuteKernel.h86
-rw-r--r--src/core/gpu/cl/kernels/ClReshapeKernel.cpp131
-rw-r--r--src/core/gpu/cl/kernels/ClReshapeKernel.h66
10 files changed, 1115 insertions, 0 deletions
diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.cpp b/src/core/gpu/cl/kernels/ClCopyKernel.cpp
new file mode 100644
index 0000000000..d6c87f8fad
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClCopyKernel.cpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ // Validate dst if initialized
+ if(dst->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ if(dst_window == nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(src->tensor_shape(), dst->tensor_shape());
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(src->tensor_shape(), dst_window->shape());
+ }
+ }
+
+ return Status{};
+}
+
+} // namespace
+
+void ClCopyKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Window *dst_window)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, dst_window));
+
+ auto padding_info = get_padding_info({ src, dst });
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*dst, *src);
+
+ // Configure window
+ const unsigned int vec_size_x = adjust_vec_size(16 / src->element_size(), src->dimension(0));
+
+ const Window win_config = calculate_max_window(*src, Steps(vec_size_x));
+
+ if(dst_window != nullptr)
+ {
+ _has_dst_window = true;
+ _dst_window = Window(*dst_window);
+ const int width_x = dst_window->num_iterations(0);
+ const int vec_size_x_leftover = width_x % vec_size_x;
+ const bool multi_access_x = width_x >= static_cast<int32_t>(vec_size_x);
+
+ if(multi_access_x)
+ {
+ _dst_window.set(Window::DimX, Window::Dimension(dst_window->x().start(), ceil_to_multiple(dst_window->x().end(), vec_size_x), vec_size_x));
+ }
+
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_x_leftover));
+ }
+ else
+ {
+ const int width_x = src->tensor_shape().x();
+ const int vec_size_x_leftover = width_x % vec_size_x;
+
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_x_leftover));
+ }
+
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+
+ // Build kernel
+ _kernel = create_kernel(compile_context, "copy_tensor", build_opts.options());
+
+ // Validate and set the window
+ ICLKernel::configure_internal(win_config);
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+Status ClCopyKernel::validate(const arm_compute::ITensorInfo *src, const arm_compute::ITensorInfo *dst, Window *dst_window)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, dst_window));
+
+ return Status{};
+}
+
+void ClCopyKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ Window slice;
+
+ if(_has_dst_window)
+ {
+ slice = window.first_slice_window_3D();
+ Window out_slice = _dst_window.first_slice_window_3D();
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, out_slice);
+ enqueue(queue, *this, slice, lws_hint());
+ }
+ while(window.slide_window_slice_3D(slice) && _dst_window.slide_window_slice_3D(out_slice));
+ }
+ else
+ {
+ Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ slice = collapsed.first_slice_window_3D();
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ }
+ while(collapsed.slide_window_slice_3D(slice));
+ }
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.h b/src/core/gpu/cl/kernels/ClCopyKernel.h
new file mode 100644
index 0000000000..9d5457d72d
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClCopyKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_COPY_KERNEL_H
+#define ARM_COMPUTE_CL_COPY_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** OpenCL kernel to perform a copy between two tensors */
+class ClCopyKernel : public ICLKernel
+{
+public:
+ ClCopyKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCopyKernel);
+ /** Initialize the kernel's src, dst.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[out] dst Destination tensor info. Data types supported: same as @p src.
+ * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Window *dst_window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClCopyKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src.
+ * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ Window _dst_window{};
+ bool _has_dst_window{};
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_COPY_KERNEL_H */
diff --git a/src/core/gpu/cl/kernels/ClCropKernel.cpp b/src/core/gpu/cl/kernels/ClCropKernel.cpp
new file mode 100644
index 0000000000..20e94b2876
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClCropKernel.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClCropKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+#include "support/StringSupport.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+void ClCropKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), src, dst, start, end, batch_index, extrapolation_value, dst_window);
+}
+
+void ClCropKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index,
+ float extrapolation_value, Window *dst_window)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, start, end, batch_index, extrapolation_value, dst_window));
+
+ _start = start;
+ _batch_index = batch_index;
+ _extrapolation_value = extrapolation_value;
+
+ const int vec_size_x = 4;
+ // Create and update the window (if needed)
+ Window win = calculate_max_window(*dst);
+
+ if(dst_window != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *dst_window);
+ win = *dst_window;
+ }
+
+ const int dst_width_x = win.num_iterations(0);
+ const bool multi_access_x = dst_width_x >= vec_size_x;
+ const bool remainder_x = dst_width_x % vec_size_x > 0;
+
+ if(multi_access_x)
+ {
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+ }
+ ICLKernel::configure_internal(win);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+ build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(dst_width_x - vec_size_x, 0)));
+ build_opts.add_option_if(start.x > end.x, "-DWIDTH_FLIPPED=");
+ build_opts.add_option_if(start.y > end.y, "-DHEIGHT_FLIPPED=");
+ _kernel = create_kernel(compile_context, "crop_tensor", build_opts.options());
+}
+
+Status ClCropKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window)
+{
+ ARM_COMPUTE_UNUSED(extrapolation_value, dst_window);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(start.x < 0 || start.y < 0 || end.x < 0 || end.y < 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(start.x >= static_cast<int32_t>(src->dimension(1)) || start.y >= static_cast<int32_t>(src->dimension(2))
+ || end.x >= static_cast<int32_t>(src->dimension(1)) || end.y >= static_cast<int32_t>(src->dimension(2)));
+ ARM_COMPUTE_RETURN_ERROR_ON(batch_index >= src->dimension(3));
+ if(dst_window != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(dst_window->x().step() != 1);
+ }
+ if(dst->total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(dst, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->num_dimensions() > 3);
+ }
+ return Status{};
+}
+
+void ClCropKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ Window in_slice = Window();
+ in_slice.use_tensor_dimensions(src->info()->tensor_shape());
+ in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), ceil_to_multiple(in_slice.x().end(), window.x().step()), window.x().step()));
+ in_slice.set(3, Window::Dimension(_batch_index, _batch_index + 1, 1));
+
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, src, in_slice);
+ add_3D_tensor_argument(idx, dst, window);
+ add_argument(idx, _start.x);
+ add_argument(idx, _start.y);
+ enqueue(queue, *this, window, lws_hint());
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClCropKernel.h b/src/core/gpu/cl/kernels/ClCropKernel.h
new file mode 100644
index 0000000000..92f94d471b
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClCropKernel.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCROPKERNEL_H
+#define ARM_COMPUTE_CLCROPKERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** OpenCL kernel to perform a copy between two tensors */
+class ClCropKernel : public ICLKernel
+{
+public:
+ ClCropKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCropKernel);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[out] dst Destination tensor info. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ void configure(const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *dst_window = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[out] dst Destination tensor info. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *dst_window = nullptr);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[in] dst Destination tensor info. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *dst_window = nullptr);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ Coordinates2D _start{};
+ uint32_t _batch_index{};
+ float _extrapolation_value{};
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLCROPKERNEL_H */
diff --git a/src/core/gpu/cl/kernels/ClFillKernel.cpp b/src/core/gpu/cl/kernels/ClFillKernel.cpp
new file mode 100644
index 0000000000..b194ee549b
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClFillKernel.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClFillKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+void ClFillKernel::configure(ITensorInfo *tensor,
+ const PixelValue &constant_value,
+ Window *window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window);
+}
+
+void ClFillKernel::configure(const CLCompileContext &compile_context, ITensorInfo *tensor,
+ const PixelValue &constant_value,
+ Window *window)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(tensor, constant_value, window));
+
+ const DataType data_type = tensor->data_type();
+ const int vec_size_x = 16 / tensor->element_size();
+
+ // Create and update the window (if needed)
+ _full_window = calculate_max_window(*tensor);
+ Window win = _full_window;
+ if(window != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *window);
+ win = *window;
+ }
+
+ const int output_width_x = win.num_iterations(0);
+ const bool multi_access_x = output_width_x >= vec_size_x;
+ const bool remainder_x = output_width_x % vec_size_x > 0;
+
+ if(multi_access_x)
+ {
+ win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+ }
+ ICLKernel::configure_internal(win);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+ build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(constant_value, data_type));
+ build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ _kernel = create_kernel(compile_context, "memset", build_opts.options());
+}
+
+Status ClFillKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window)
+{
+ ARM_COMPUTE_UNUSED(tensor);
+ ARM_COMPUTE_UNUSED(constant_value);
+ if(window != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(window->x().step() != 1);
+ }
+ return Status{};
+}
+
+void ClFillKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+
+ // Collapse all the batches on the third
+ Window collapsed = window.collapse_if_possible(_full_window, Window::DimZ);
+ Window slice = collapsed.first_slice_window_3D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, tensor, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ }
+ while(collapsed.slide_window_slice_3D(slice));
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClFillKernel.h b/src/core/gpu/cl/kernels/ClFillKernel.h
new file mode 100644
index 0000000000..136c1243fe
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClFillKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_FILL_KERNEL_H
+#define ARM_COMPUTE_CL_FILL_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for filling the planes of a tensor */
+class ClFillKernel : public ICLKernel
+{
+public:
+ ClFillKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClFillKernel);
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in,out] tensor Input tensor info. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ */
+ void configure(ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Input tensor info. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClFillKernel
+ *
+ * @param[in] tensor Source tensor info. Data types supported: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ Window _full_window{};
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.cpp b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
new file mode 100644
index 0000000000..992c2a89d3
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClPermuteKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+TensorShape get_dst_shape(const ITensorInfo *src, const PermutationVector &perm)
+{
+ TensorShape dst_shape = src->tensor_shape();
+ permute(dst_shape, perm);
+ return dst_shape;
+}
+
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->num_dimensions() < 1 || src->num_dimensions() > 4,
+ "Permutation upto 4-D src tensor is supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(perm.num_dimensions() < 1 || perm.num_dimensions() > 4,
+ "Permutation vector size should be less than or equal to 4");
+ for(const auto &p : perm)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(p >= perm.num_dimensions(), "Permutation vector has invalid values");
+ }
+
+ // Validate configured dst
+ if(dst->total_size() != 0)
+ {
+ const TensorShape dst_shape = misc::shape_calculator::compute_permutation_output_shape(*src, perm);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), dst_shape);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ }
+ return Status{};
+}
+} // namespace
+
+void ClPermuteKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), src, dst, perm);
+}
+
+void ClPermuteKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ auto padding_info = get_padding_info({ src, dst });
+ const TensorShape dst_shape = get_dst_shape(src, perm);
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*dst, src->clone()->set_tensor_shape(dst_shape));
+
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, perm));
+
+ _perm = perm;
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(src->data_type())));
+ build_opts.add_option("-DDEPTH_IN=" + support::cpp11::to_string(src->dimension(2)));
+ // New positions of width(W), height(H), channel(C) and batch(D) based on permutation vector
+ build_opts.add_option("-DP1=" + support::cpp11::to_string((_perm.num_dimensions() >= 1) ? perm[0] : 0));
+ build_opts.add_option("-DP2=" + support::cpp11::to_string((_perm.num_dimensions() >= 2) ? perm[1] : 1));
+ build_opts.add_option("-DP3=" + support::cpp11::to_string((_perm.num_dimensions() >= 3) ? perm[2] : 2));
+ build_opts.add_option("-DP4=" + support::cpp11::to_string((_perm.num_dimensions() >= 4) ? perm[3] : 3));
+
+ _kernel = create_kernel(compile_context, "permute", build_opts.options());
+
+ // Configure kernel window
+ Window win = calculate_max_window(*src, Steps());
+
+ // The CLPermute doesn't need padding so update_window_and_padding() can be skipped
+ Coordinates coord;
+ coord.set_num_dimensions(dst->num_dimensions());
+ dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
+
+ ICLKernel::configure_internal(win);
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+Status ClPermuteKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, perm));
+
+ return Status{};
+}
+
+void ClPermuteKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
+
+ // Setup dst slice
+ Window slice_out(slice_in);
+ slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
+ slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
+ slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
+ slice_out.set(3, Window::Dimension(0, 0, 0));
+
+ do
+ {
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, src, slice_in);
+ add_4D_tensor_argument(idx, dst, slice_out);
+ enqueue(queue, *this, slice_in, lws_hint());
+ }
+ while(window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.h b/src/core/gpu/cl/kernels/ClPermuteKernel.h
new file mode 100644
index 0000000000..4cc72491bd
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_PERMUTE_KERNEL_H
+#define ARM_COMPUTE_CL_PERMUTE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** OpenCL kernel to perform tensor permutation.
+ *
+ * Permutes given a permutation vector
+ */
+class ClPermuteKernel : public ICLKernel
+{
+public:
+ ClPermuteKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPermuteKernel);
+ /** Set the src and dst of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] src The src tensor info. Data types supported: All.
+ * @param[in] dst The dst tensor info. Data types supported: Same as @p src
+ * @param[in] perm Permutation vector
+ */
+ void configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
+ /** Set the src and dst of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src The src tensor info. Data types supported: All.
+ * @param[in] dst The dst tensor info. Data types supported: Same as @p src
+ * @param[in] perm Permutation vector
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClPermuteKernel
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] src The src tensor info. Data types supported: All.
+ * @param[in] dst The dst tensor info. Data types supported: same as @p src.
+ * @param[in] perm Permutation vector
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ PermutationVector _perm{};
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_PERMUTE_KERNEL_H */
diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.cpp b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
new file mode 100644
index 0000000000..4da3fa0e03
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClReshapeKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+
+#include <string>
+
+/** [ClReshapeKernel Kernel] **/
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() != dst->tensor_shape().total_size());
+
+ return Status{};
+}
+} // namespace
+
+void ClReshapeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
+
+ auto padding_info = get_padding_info({ src, dst });
+
+ // Create kernel
+ std::set<std::string> build_opts = { "-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(src->element_size()) };
+ _kernel = create_kernel(compile_context, "reshape_layer", build_opts);
+
+ // Add static arguments
+ const cl_int2 src_shape =
+ {
+ {
+ static_cast<cl_int>(src->tensor_shape()[0]),
+ static_cast<cl_int>(src->tensor_shape()[1])
+ }
+ };
+ const cl_int2 dst_shape =
+ {
+ {
+ static_cast<cl_int>(dst->tensor_shape()[0]),
+ static_cast<cl_int>(dst->tensor_shape()[1])
+ }
+ };
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
+ _kernel.setArg<cl_int2>(idx++, src_shape);
+ _kernel.setArg<cl_int2>(idx++, dst_shape);
+
+ // Configure kernel window
+ Window win = calculate_max_window(*src);
+
+ // Set the dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+ ICLKernel::configure_internal(win);
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+Status ClReshapeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
+
+ return Status{};
+}
+
+void ClReshapeKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_3D();
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ // Set srcs
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, src, window_collapsed);
+ add_3D_tensor_argument(idx, dst, window_collapsed);
+ enqueue(queue, *this, slice, lws_hint());
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+/** [ClReshapeKernel Kernel] **/
diff --git a/src/core/gpu/cl/kernels/ClReshapeKernel.h b/src/core/gpu/cl/kernels/ClReshapeKernel.h
new file mode 100644
index 0000000000..ee835c0fd3
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_RESHAPE_KERNEL_H
+#define ARM_COMPUTE_CL_RESHAPE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the kernel to perform tensor reshaping */
+class ClReshapeKernel : public ICLKernel
+{
+public:
+ ClReshapeKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClReshapeKernel);
+ /** Set the src and dst of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data type supported: All.
+ * @param[out] dst Destination tensor info. Data type supported: Same as @p src
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref ClReshapeKernel
+ *
+ * @param[in] src Source tensor info. Data type supported: All
+ * @param[in] dst Destination tensor info. Data type supported: Same as @p src
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace opencl
+} // namespace kernels
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_RESHAPE_KERNEL_H */