aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/CLKernels.h5
-rw-r--r--src/core/CL/kernels/CLCopyKernel.h83
-rw-r--r--src/core/CL/kernels/CLPermuteKernel.h90
-rw-r--r--src/core/gpu/cl/kernels/ClCopyKernel.cpp (renamed from src/core/CL/kernels/CLCopyKernel.cpp)97
-rw-r--r--src/core/gpu/cl/kernels/ClCopyKernel.h71
-rw-r--r--src/core/gpu/cl/kernels/ClCropKernel.cpp (renamed from src/core/CL/kernels/CLCropKernel.cpp)89
-rw-r--r--src/core/gpu/cl/kernels/ClCropKernel.h (renamed from src/core/CL/kernels/CLCropKernel.h)73
-rw-r--r--src/core/gpu/cl/kernels/ClFillKernel.cpp (renamed from src/core/CL/kernels/CLMemsetKernel.cpp)54
-rw-r--r--src/core/gpu/cl/kernels/ClFillKernel.h (renamed from src/core/CL/kernels/CLMemsetKernel.h)52
-rw-r--r--src/core/gpu/cl/kernels/ClPermuteKernel.cpp (renamed from src/core/CL/kernels/CLPermuteKernel.cpp)98
-rw-r--r--src/core/gpu/cl/kernels/ClPermuteKernel.h86
-rw-r--r--src/core/gpu/cl/kernels/ClReshapeKernel.cpp (renamed from src/core/CL/kernels/CLReshapeLayerKernel.cpp)70
-rw-r--r--src/core/gpu/cl/kernels/ClReshapeKernel.h (renamed from src/core/CL/kernels/CLReshapeLayerKernel.h)41
13 files changed, 457 insertions, 452 deletions
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index ac051684a1..7383dce40f 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -42,8 +42,6 @@
#include "src/core/CL/kernels/CLComparisonKernel.h"
#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
#include "src/core/CL/kernels/CLConvolutionKernel.h"
-#include "src/core/CL/kernels/CLCopyKernel.h"
-#include "src/core/CL/kernels/CLCropKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
@@ -97,7 +95,6 @@
#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
-#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
@@ -105,7 +102,6 @@
#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "src/core/CL/kernels/CLPadLayerKernel.h"
-#include "src/core/CL/kernels/CLPermuteKernel.h"
#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
@@ -116,7 +112,6 @@
#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/CL/kernels/CLRemapKernel.h"
#include "src/core/CL/kernels/CLReorgLayerKernel.h"
-#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
#include "src/core/CL/kernels/CLReverseKernel.h"
#include "src/core/CL/kernels/CLScaleKernel.h"
#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
diff --git a/src/core/CL/kernels/CLCopyKernel.h b/src/core/CL/kernels/CLCopyKernel.h
deleted file mode 100644
index 9a20b88884..0000000000
--- a/src/core/CL/kernels/CLCopyKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOPYKERNEL_H
-#define ARM_COMPUTE_CLCOPYKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a copy between two tensors */
-class CLCopyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCopyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCopyKernel(const CLCopyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCopyKernel &operator=(const CLCopyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCopyKernel(CLCopyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCopyKernel &operator=(CLCopyKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- */
- void configure(const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr);
- /** Initialize the kernel's input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Window _output_window;
- bool _has_output_window;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */
diff --git a/src/core/CL/kernels/CLPermuteKernel.h b/src/core/CL/kernels/CLPermuteKernel.h
deleted file mode 100644
index d1bb875d7a..0000000000
--- a/src/core/CL/kernels/CLPermuteKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H
-#define ARM_COMPUTE_CLPERMUTEKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class CLPermuteKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPermuteKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteKernel(const CLPermuteKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteKernel &operator=(const CLPermuteKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPermuteKernel(CLPermuteKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPermuteKernel &operator=(CLPermuteKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All.
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to permute. Data types supported: All.
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input First tensor input info. Data types supported: All.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PermutationVector _perm;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */
diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/gpu/cl/kernels/ClCopyKernel.cpp
index ca38b65df4..d6c87f8fad 100644
--- a/src/core/CL/kernels/CLCopyKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClCopyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,36 +21,45 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Validate output if initialized
- if(output->total_size() != 0)
+ // Validate dst if initialized
+ if(dst->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- if(output_window == nullptr)
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ if(dst_window == nullptr)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(src->tensor_shape(), dst->tensor_shape());
}
else
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output_window->shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(src->tensor_shape(), dst_window->shape());
}
}
@@ -59,56 +68,43 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, W
} // namespace
-CLCopyKernel::CLCopyKernel()
- : _input(nullptr), _output(nullptr), _output_window(), _has_output_window(false)
-{
-}
-
-void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output, Window *output_window)
+void ClCopyKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Window *dst_window)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, output_window);
-}
-
-void CLCopyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), output_window));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, dst_window));
- auto padding_info = get_padding_info({ input, output });
-
- _input = input;
- _output = output;
+ auto padding_info = get_padding_info({ src, dst });
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*(output->info()), *(input->info()));
+ auto_init_if_empty(*dst, *src);
// Configure window
- const unsigned int vec_size_x = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0));
+ const unsigned int vec_size_x = adjust_vec_size(16 / src->element_size(), src->dimension(0));
- const Window win_config = calculate_max_window(*(input->info()), Steps(vec_size_x));
+ const Window win_config = calculate_max_window(*src, Steps(vec_size_x));
- if(output_window != nullptr)
+ if(dst_window != nullptr)
{
- _has_output_window = true;
- _output_window = Window(*output_window);
- const int width_x = output_window->num_iterations(0);
+ _has_dst_window = true;
+ _dst_window = Window(*dst_window);
+ const int width_x = dst_window->num_iterations(0);
const int vec_size_x_leftover = width_x % vec_size_x;
const bool multi_access_x = width_x >= static_cast<int32_t>(vec_size_x);
if(multi_access_x)
{
- _output_window.set(Window::DimX, Window::Dimension(output_window->x().start(), ceil_to_multiple(output_window->x().end(), vec_size_x), vec_size_x));
+ _dst_window.set(Window::DimX, Window::Dimension(dst_window->x().start(), ceil_to_multiple(dst_window->x().end(), vec_size_x), vec_size_x));
}
build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_x_leftover));
}
else
{
- const int width_x = input->info()->tensor_shape().x();
+ const int width_x = src->tensor_shape().x();
const int vec_size_x_leftover = width_x % vec_size_x;
build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_x_leftover));
@@ -125,32 +121,35 @@ void CLCopyKernel::configure(const CLCompileContext &compile_context, const ICLT
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLCopyKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output, Window *output_window)
+Status ClCopyKernel::validate(const arm_compute::ITensorInfo *src, const arm_compute::ITensorInfo *dst, Window *dst_window)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, output_window));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, dst_window));
return Status{};
}
-void CLCopyKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClCopyKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
Window slice;
- if(_has_output_window)
+ if(_has_dst_window)
{
slice = window.first_slice_window_3D();
- Window out_slice = _output_window.first_slice_window_3D();
+ Window out_slice = _dst_window.first_slice_window_3D();
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, out_slice);
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, out_slice);
enqueue(queue, *this, slice, lws_hint());
}
- while(window.slide_window_slice_3D(slice) && _output_window.slide_window_slice_3D(out_slice));
+ while(window.slide_window_slice_3D(slice) && _dst_window.slide_window_slice_3D(out_slice));
}
else
{
@@ -159,11 +158,13 @@ void CLCopyKernel::run(const Window &window, cl::CommandQueue &queue)
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClCopyKernel.h b/src/core/gpu/cl/kernels/ClCopyKernel.h
new file mode 100644
index 0000000000..9d5457d72d
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClCopyKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_COPY_KERNEL_H
+#define ARM_COMPUTE_CL_COPY_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** OpenCL kernel to perform a copy between two tensors */
+class ClCopyKernel : public ICLKernel
+{
+public:
+ ClCopyKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCopyKernel);
+ /** Initialize the kernel's src, dst.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[out] dst Destination tensor info. Data types supported: same as @p src.
+ * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Window *dst_window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClCopyKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src.
+ * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ Window _dst_window{};
+ bool _has_dst_window{};
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_COPY_KERNEL_H */
diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/gpu/cl/kernels/ClCropKernel.cpp
index 9cf15ff93b..20e94b2876 100644
--- a/src/core/CL/kernels/CLCropKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClCropKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,57 +21,55 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLCropKernel.h"
+#include "src/core/gpu/cl/kernels/ClCropKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
#include "src/core/CPP/Validate.h"
#include "src/core/helpers/WindowHelpers.h"
-
+#include "support/Cast.h"
#include "support/StringSupport.h"
#include <map>
namespace arm_compute
{
-CLCropKernel::CLCropKernel()
- : _input(nullptr), _output(nullptr), _start(), _batch_index(0), _extrapolation_value(0)
+namespace opencl
{
-}
-
-void CLCropKernel::configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *output_window)
+namespace kernels
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, start, end, batch_index, extrapolation_value, output_window);
+void ClCropKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), src, dst, start, end, batch_index, extrapolation_value, dst_window);
}
-void CLCropKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index,
- float extrapolation_value, Window *output_window)
+void ClCropKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index,
+ float extrapolation_value, Window *dst_window)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), start, end, batch_index, extrapolation_value, output_window));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, start, end, batch_index, extrapolation_value, dst_window));
- _input = input;
- _output = output;
_start = start;
_batch_index = batch_index;
_extrapolation_value = extrapolation_value;
const int vec_size_x = 4;
// Create and update the window (if needed)
- Window win = calculate_max_window(*output->info());
+ Window win = calculate_max_window(*dst);
- if(output_window != nullptr)
+ if(dst_window != nullptr)
{
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *output_window);
- win = *output_window;
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *dst_window);
+ win = *dst_window;
}
- const int output_width_x = win.num_iterations(0);
- const bool multi_access_x = output_width_x >= vec_size_x;
- const bool remainder_x = output_width_x % vec_size_x > 0;
+ const int dst_width_x = win.num_iterations(0);
+ const bool multi_access_x = dst_width_x >= vec_size_x;
+ const bool remainder_x = dst_width_x % vec_size_x > 0;
if(multi_access_x)
{
@@ -82,53 +80,58 @@ void CLCropKernel::configure(const CLCompileContext &compile_context, const ICLT
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
- build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(dst_width_x - vec_size_x, 0)));
build_opts.add_option_if(start.x > end.x, "-DWIDTH_FLIPPED=");
build_opts.add_option_if(start.y > end.y, "-DHEIGHT_FLIPPED=");
_kernel = create_kernel(compile_context, "crop_tensor", build_opts.options());
}
-Status CLCropKernel::validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *output_window)
+Status ClCropKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window)
{
- ARM_COMPUTE_UNUSED(extrapolation_value, output_window);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4);
+ ARM_COMPUTE_UNUSED(extrapolation_value, dst_window);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(start.x < 0 || start.y < 0 || end.x < 0 || end.y < 0);
- ARM_COMPUTE_RETURN_ERROR_ON(start.x >= static_cast<int32_t>(input->dimension(1)) || start.y >= static_cast<int32_t>(input->dimension(2))
- || end.x >= static_cast<int32_t>(input->dimension(1)) || end.y >= static_cast<int32_t>(input->dimension(2)));
- ARM_COMPUTE_RETURN_ERROR_ON(batch_index >= input->dimension(3));
- if(output_window != nullptr)
+ ARM_COMPUTE_RETURN_ERROR_ON(start.x >= static_cast<int32_t>(src->dimension(1)) || start.y >= static_cast<int32_t>(src->dimension(2))
+ || end.x >= static_cast<int32_t>(src->dimension(1)) || end.y >= static_cast<int32_t>(src->dimension(2)));
+ ARM_COMPUTE_RETURN_ERROR_ON(batch_index >= src->dimension(3));
+ if(dst_window != nullptr)
{
- ARM_COMPUTE_RETURN_ERROR_ON(output_window->x().step() != 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst_window->x().step() != 1);
}
- if(output->total_size() > 0)
+ if(dst->total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 3);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(dst, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->num_dimensions() > 3);
}
return Status{};
}
-void CLCropKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClCropKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
Window in_slice = Window();
- in_slice.use_tensor_dimensions(_input->info()->tensor_shape());
+ in_slice.use_tensor_dimensions(src->info()->tensor_shape());
in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), ceil_to_multiple(in_slice.x().end(), window.x().step()), window.x().step()));
in_slice.set(3, Window::Dimension(_batch_index, _batch_index + 1, 1));
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, in_slice);
- add_3D_tensor_argument(idx, _output, window);
+ add_3D_tensor_argument(idx, src, in_slice);
+ add_3D_tensor_argument(idx, dst, window);
add_argument(idx, _start.x);
add_argument(idx, _start.y);
enqueue(queue, *this, window, lws_hint());
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLCropKernel.h b/src/core/gpu/cl/kernels/ClCropKernel.h
index cbfada58ab..92f94d471b 100644
--- a/src/core/CL/kernels/CLCropKernel.h
+++ b/src/core/gpu/cl/kernels/ClCropKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,80 +24,75 @@
#ifndef ARM_COMPUTE_CLCROPKERNEL_H
#define ARM_COMPUTE_CLCROPKERNEL_H
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
namespace arm_compute
{
-class ICLTensor;
-
+namespace opencl
+{
+namespace kernels
+{
/** OpenCL kernel to perform a copy between two tensors */
-class CLCropKernel : public ICLKernel
+class ClCropKernel : public ICLKernel
{
public:
- /** Default constructor */
- CLCropKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCropKernel(const CLCropKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCropKernel &operator=(const CLCropKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCropKernel(CLCropKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCropKernel &operator=(CLCropKernel &&) = default;
+ ClCropKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClCropKernel);
/** Configure kernel
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] output Destination tensor. Data type supported: F32
+ * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[out] dst Destination tensor info. Data type supported: F32
* @param[in] start Coordinates of where to start cropping the image.
* @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
* @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
*/
- void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr);
+ void configure(const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *dst_window = nullptr);
/** Configure kernel
*
* @note Supported tensor rank: up to 4
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] output Destination tensor. Data type supported: F32
+ * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[out] dst Destination tensor info. Data type supported: F32
* @param[in] start Coordinates of where to start cropping the image.
* @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
* @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *output_window = nullptr);
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *dst_window = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
*
* @note Supported tensor rank: up to 4
*
- * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC.
- * @param[in] output Destination tensor info. Data type supported: F32
+ * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[in] dst Destination tensor info. Data type supported: F32
* @param[in] start Coordinates of where to start cropping the image.
* @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
* @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *output_window = nullptr);
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *dst_window = nullptr);
// Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Coordinates2D _start;
- uint32_t _batch_index;
- float _extrapolation_value;
+ Coordinates2D _start{};
+ uint32_t _batch_index{};
+ float _extrapolation_value{};
};
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLCROPKERNEL_H */
diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/gpu/cl/kernels/ClFillKernel.cpp
index 2543b07a1a..b194ee549b 100644
--- a/src/core/CL/kernels/CLMemsetKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClFillKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,40 +21,46 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/gpu/cl/kernels/ClFillKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
{
-CLMemsetKernel::CLMemsetKernel()
- : ICLKernel(), _tensor(nullptr), _full_window()
+namespace opencl
{
-}
-
-void CLMemsetKernel::configure(ICLTensor *tensor,
- const PixelValue &constant_value,
- Window *window)
+namespace kernels
+{
+void ClFillKernel::configure(ITensorInfo *tensor,
+ const PixelValue &constant_value,
+ Window *window)
{
configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window);
}
-void CLMemsetKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor,
- const PixelValue &constant_value,
- Window *window)
+void ClFillKernel::configure(const CLCompileContext &compile_context, ITensorInfo *tensor,
+ const PixelValue &constant_value,
+ Window *window)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
- ARM_COMPUTE_ERROR_THROW_ON(validate(tensor->info(), constant_value, window));
-
- _tensor = tensor;
+ ARM_COMPUTE_ERROR_THROW_ON(validate(tensor, constant_value, window));
- const DataType data_type = tensor->info()->data_type();
- const int vec_size_x = 16 / tensor->info()->element_size();
+ const DataType data_type = tensor->data_type();
+ const int vec_size_x = 16 / tensor->element_size();
// Create and update the window (if needed)
- _full_window = calculate_max_window(*tensor->info());
+ _full_window = calculate_max_window(*tensor);
Window win = _full_window;
if(window != nullptr)
{
@@ -81,7 +87,7 @@ void CLMemsetKernel::configure(const CLCompileContext &compile_context, ICLTenso
_kernel = create_kernel(compile_context, "memset", build_opts.options());
}
-Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window)
+Status ClFillKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window)
{
ARM_COMPUTE_UNUSED(tensor);
ARM_COMPUTE_UNUSED(constant_value);
@@ -92,11 +98,13 @@ Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &con
return Status{};
}
-void CLMemsetKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClFillKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+ const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+
// Collapse all the batches on the third
Window collapsed = window.collapse_if_possible(_full_window, Window::DimZ);
Window slice = collapsed.first_slice_window_3D();
@@ -104,9 +112,11 @@ void CLMemsetKernel::run(const Window &window, cl::CommandQueue &queue)
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _tensor, slice);
+ add_3D_tensor_argument(idx, tensor, slice);
enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLMemsetKernel.h b/src/core/gpu/cl/kernels/ClFillKernel.h
index dc103f580f..136c1243fe 100644
--- a/src/core/CL/kernels/CLMemsetKernel.h
+++ b/src/core/gpu/cl/kernels/ClFillKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,50 +21,41 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
-#define ARM_COMPUTE_CLMEMSETKERNEL_H
+#ifndef ARM_COMPUTE_CL_FILL_KERNEL_H
+#define ARM_COMPUTE_CL_FILL_KERNEL_H
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
namespace arm_compute
{
-class ICLTensor;
-
+namespace opencl
+{
+namespace kernels
+{
/** Interface for filling the planes of a tensor */
-class CLMemsetKernel : public ICLKernel
+class ClFillKernel : public ICLKernel
{
public:
- /** Default constructor */
- CLMemsetKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMemsetKernel(const CLMemsetKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMemsetKernel(CLMemsetKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
- /** Default destructor */
- ~CLMemsetKernel() = default;
-
+ ClFillKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClFillKernel);
/** Initialise the kernel's tensor and filling value
*
- * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in,out] tensor Input tensor info. Supported data types: All.
* @param[in] constant_value The value used to fill the planes of the tensor
* @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
*/
- void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ void configure(ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
/** Initialise the kernel's tensor and filling value
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in,out] tensor Input tensor info. Supported data types: All.
* @param[in] constant_value The value used to fill the planes of the tensor
* @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel
+ void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClFillKernel
*
* @param[in] tensor Source tensor info. Data types supported: All.
* @param[in] constant_value The value used to fill the planes of the tensor
@@ -75,11 +66,12 @@ public:
static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
// Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
- ICLTensor *_tensor;
- Window _full_window;
+ Window _full_window{};
};
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
index 07d83bddc2..992c2a89d3 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,34 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/gpu/cl/kernels/ClPermuteKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
{
-CLPermuteKernel::CLPermuteKernel()
- : _input(nullptr), _output(nullptr), _perm()
+namespace opencl
+{
+namespace kernels
{
-}
namespace
{
-TensorShape get_output_shape(const ITensorInfo *input, const PermutationVector &perm)
+TensorShape get_dst_shape(const ITensorInfo *src, const PermutationVector &perm)
{
- TensorShape output_shape = input->tensor_shape();
- permute(output_shape, perm);
- return output_shape;
+ TensorShape dst_shape = src->tensor_shape();
+ permute(dst_shape, perm);
+ return dst_shape;
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() < 1 || input->num_dimensions() > 4,
- "Permutation upto 4-D input tensor is supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->num_dimensions() < 1 || src->num_dimensions() > 4,
+ "Permutation upto 4-D src tensor is supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(perm.num_dimensions() < 1 || perm.num_dimensions() > 4,
"Permutation vector size should be less than or equal to 4");
for(const auto &p : perm)
@@ -56,41 +65,39 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MSG(p >= perm.num_dimensions(), "Permutation vector has invalid values");
}
- // Validate configured output
- if(output->total_size() != 0)
+ // Validate configured dst
+ if(dst->total_size() != 0)
{
- const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ const TensorShape dst_shape = misc::shape_calculator::compute_permutation_output_shape(*src, perm);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), dst_shape);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
}
return Status{};
}
} // namespace
-void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
+void ClPermuteKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, perm);
+ configure(CLKernelLibrary::get().get_compile_context(), src, dst, perm);
}
-void CLPermuteKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
+void ClPermuteKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- auto padding_info = get_padding_info({ input, output });
- const TensorShape output_shape = get_output_shape(input->info(), perm);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ auto padding_info = get_padding_info({ src, dst });
+ const TensorShape dst_shape = get_dst_shape(src, perm);
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
+ auto_init_if_empty(*dst, src->clone()->set_tensor_shape(dst_shape));
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, perm));
- _input = input;
- _output = output;
- _perm = perm;
+ _perm = perm;
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
- build_opts.add_option("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2)));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(src->data_type())));
+ build_opts.add_option("-DDEPTH_IN=" + support::cpp11::to_string(src->dimension(2)));
// New positions of width(W), height(H), channel(C) and batch(D) based on permutation vector
build_opts.add_option("-DP1=" + support::cpp11::to_string((_perm.num_dimensions() >= 1) ? perm[0] : 0));
build_opts.add_option("-DP2=" + support::cpp11::to_string((_perm.num_dimensions() >= 2) ? perm[1] : 1));
@@ -100,33 +107,36 @@ void CLPermuteKernel::configure(const CLCompileContext &compile_context, const I
_kernel = create_kernel(compile_context, "permute", build_opts.options());
// Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
+ Window win = calculate_max_window(*src, Steps());
// The CLPermute doesn't need padding so update_window_and_padding() can be skipped
Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+ coord.set_num_dimensions(dst->num_dimensions());
+ dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
ICLKernel::configure_internal(win);
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLPermuteKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
+Status ClPermuteKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, perm));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, perm));
return Status{};
}
-void CLPermuteKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClPermuteKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
- // Setup output slice
+ // Setup dst slice
Window slice_out(slice_in);
slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
@@ -136,10 +146,12 @@ void CLPermuteKernel::run(const Window &window, cl::CommandQueue &queue)
do
{
unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
+ add_4D_tensor_argument(idx, src, slice_in);
+ add_4D_tensor_argument(idx, dst, slice_out);
enqueue(queue, *this, slice_in, lws_hint());
}
while(window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/gpu/cl/kernels/ClPermuteKernel.h b/src/core/gpu/cl/kernels/ClPermuteKernel.h
new file mode 100644
index 0000000000..4cc72491bd
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClPermuteKernel.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_PERMUTE_KERNEL_H
+#define ARM_COMPUTE_CL_PERMUTE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** OpenCL kernel to perform tensor permutation.
+ *
+ * Permutes given a permutation vector
+ */
+class ClPermuteKernel : public ICLKernel
+{
+public:
+ ClPermuteKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPermuteKernel);
+ /** Set the src and dst of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] src The src tensor info. Data types supported: All.
+ * @param[in] dst The dst tensor info. Data types supported: Same as @p src
+ * @param[in] perm Permutation vector
+ */
+ void configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
+ /** Set the src and dst of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src The src tensor info. Data types supported: All.
+ * @param[in] dst The dst tensor info. Data types supported: Same as @p src
+ * @param[in] perm Permutation vector
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClPermuteKernel
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] src The src tensor info. Data types supported: All.
+ * @param[in] dst The dst tensor info. Data types supported: same as @p src.
+ * @param[in] perm Permutation vector
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ PermutationVector _perm{};
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_PERMUTE_KERNEL_H */
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
index 58d7843624..4da3fa0e03 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClReshapeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -38,73 +38,77 @@
#include <string>
-/** [CLReshapeLayerKernel Kernel] **/
+/** [ClReshapeKernel Kernel] **/
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() != output->tensor_shape().total_size());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() != dst->tensor_shape().total_size());
return Status{};
}
} // namespace
-void CLReshapeLayerKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output)
+void ClReshapeKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({ src, dst });
// Create kernel
- std::set<std::string> build_opts = { "-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size()) };
+ std::set<std::string> build_opts = { "-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(src->element_size()) };
_kernel = create_kernel(compile_context, "reshape_layer", build_opts);
// Add static arguments
- const cl_int2 input_shape =
+ const cl_int2 src_shape =
{
{
- static_cast<cl_int>(input->tensor_shape()[0]),
- static_cast<cl_int>(input->tensor_shape()[1])
+ static_cast<cl_int>(src->tensor_shape()[0]),
+ static_cast<cl_int>(src->tensor_shape()[1])
}
};
- const cl_int2 output_shape =
+ const cl_int2 dst_shape =
{
{
- static_cast<cl_int>(output->tensor_shape()[0]),
- static_cast<cl_int>(output->tensor_shape()[1])
+ static_cast<cl_int>(dst->tensor_shape()[0]),
+ static_cast<cl_int>(dst->tensor_shape()[1])
}
};
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
- _kernel.setArg<cl_int2>(idx++, input_shape);
- _kernel.setArg<cl_int2>(idx++, output_shape);
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
+ _kernel.setArg<cl_int2>(idx++, src_shape);
+ _kernel.setArg<cl_int2>(idx++, dst_shape);
// Configure kernel window
- Window win = calculate_max_window(*input);
+ Window win = calculate_max_window(*src);
- // Set the output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set the dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ICLKernel::configure_internal(win);
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLReshapeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+Status ClReshapeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
return Status{};
}
-void CLReshapeLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClReshapeKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
@@ -115,11 +119,13 @@ void CLReshapeLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl
const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
- // Set inputs
+ // Set srcs
unsigned int idx = 0;
add_3D_tensor_argument(idx, src, window_collapsed);
add_3D_tensor_argument(idx, dst, window_collapsed);
enqueue(queue, *this, slice, lws_hint());
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
-/** [CLReshapeLayerKernel Kernel] **/
+/** [ClReshapeKernel Kernel] **/
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.h b/src/core/gpu/cl/kernels/ClReshapeKernel.h
index 902c44649b..ee835c0fd3 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.h
+++ b/src/core/gpu/cl/kernels/ClReshapeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,39 +21,46 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
-#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
+#ifndef ARM_COMPUTE_CL_RESHAPE_KERNEL_H
+#define ARM_COMPUTE_CL_RESHAPE_KERNEL_H
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
namespace arm_compute
{
-class ICLTensor;
-
+namespace opencl
+{
+namespace kernels
+{
/** Interface for the kernel to perform tensor reshaping */
-class CLReshapeLayerKernel : public ICLKernel
+class ClReshapeKernel : public ICLKernel
{
public:
- /** Set the input and output of the kernel
+ ClReshapeKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClReshapeKernel);
+ /** Set the src and dst of the kernel
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] src Source tensor info. Data type supported: All.
+ * @param[out] dst Destination tensor info. Data type supported: Same as @p src
*/
- void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output);
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref ClReshapeKernel
*
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] src Source tensor info. Data type supported: All
+ * @param[in] dst Destination tensor info. Data type supported: Same as @p src
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
};
+} // namespace opencl
+} // namespace kernels
} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */
+#endif /*ARM_COMPUTE_CL_RESHAPE_KERNEL_H */