Port NECol2ImKernel

Resolves: COMPMID-4511 Change-Id: Id6335cb23ef22bba02083498025da0ecb1647714 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5898 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Manuel Bottini <manuel.bottini@arm.com> 2021-07-01 18:13:33 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-07-09 09:42:34 +0000
commit: 24b892072a2bd8190ba63d09fb0082113d7d032a (patch)
tree: 6376c4449f0d2d7f146496ab32604b3835acf976
parent: da816752cad76c8e1b367e8e9c648994a1af599a (diff)
download: ComputeLibrary-24b892072a2bd8190ba63d09fb0082113d7d032a.tar.gz
9 files changed, 232 insertions, 167 deletions
diff --git a/Android.bp b/Android.bp
index 6a76e4c92a..8b6dd8392c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -145,7 +145,6 @@ cc_library_static {
         "src/core/NEON/kernels/NEBitwiseXorKernel.cpp",
         "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp",
         "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp",
-        "src/core/NEON/kernels/NECol2ImKernel.cpp",
         "src/core/NEON/kernels/NECropKernel.cpp",
         "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp",
         "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
@@ -248,6 +247,7 @@ cc_library_static {
         "src/core/cpu/kernels/CpuActivationKernel.cpp",
         "src/core/cpu/kernels/CpuAddKernel.cpp",
         "src/core/cpu/kernels/CpuCastKernel.cpp",
+        "src/core/cpu/kernels/CpuCol2ImKernel.cpp",
         "src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp",
         "src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp",
         "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp",
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 655d733bd1..e3b7d91187 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -40,13 +40,13 @@
 namespace arm_compute
 {
 class ITensor;
-class NECol2ImKernel;
 class NEWeightsReshapeKernel;
 namespace cpu
 {
 namespace kernels
 {
 class CpuIm2ColKernel;
+class CpuCol2ImKernel;
 } // namespace kernels
 } // namespace cpu
 
@@ -163,7 +163,7 @@ private:
  * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
  * -# @ref NEGEMMLowpOutputStage (if the data type is QASYMM8/QASYMM8_SIGNED)
  * -# @ref NEArithmeticAddition (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
- * -# @ref NECol2ImKernel (if NCHW data layout)
+ * -# @ref cpu::kernels::CpuCol2ImKernel (if NCHW data layout)
  *
  */
 class NEGEMMConvolutionLayer : public IFunction
@@ -292,12 +292,12 @@ private:
     std::unique_ptr<cpu::kernels::CpuIm2ColKernel>                     _im2col_kernel;
     NEGEMM                                                             _mm_gemm;
     NEGEMMLowpMatrixMultiplyCore                                       _mm_gemmlowp;
-    std::unique_ptr<NECol2ImKernel>                                    _col2im_kernel;
+    std::unique_ptr<cpu::kernels::CpuCol2ImKernel>                     _col2im_kernel;
     NEReshapeLayer                                                     _reshape_layer;
 
     const ITensor *_input;
     const ITensor *_original_weights;
-    const ITensor *_original_output;
+    ITensor       *_original_output;
 
     Tensor _im2col_output;
     Tensor _weights_reshaped;
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index e948c2f062..78b60976d7 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -1362,7 +1362,7 @@ v17.03.1 First Major public release of the sources
    - @ref NENormalizationLayerKernel / @ref NENormalizationLayer
    - NETransposeKernel / @ref NETranspose
    - NELogits1DMaxKernel, NELogits1DShiftExpSumKernel, NELogits1DNormKernel / @ref NESoftmaxLayer
-   - NEIm2ColKernel, @ref NECol2ImKernel, NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer
+   - NEIm2ColKernel, NECol2ImKernel, NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer
    - NEGEMMMatrixAccumulateBiasesKernel / @ref NEFullyConnectedLayer
    - NEGEMMLowpMatrixMultiplyKernel / NEGEMMLowp
 
diff --git a/filelist.json b/filelist.json
index 9d07492e6a..b8c1203374 100644
--- a/filelist.json
+++ b/filelist.json
@@ -798,7 +798,7 @@
       "Col2Im": {
         "files": {
           "kernel": [
-            "src/core/NEON/kernels/NECol2ImKernel.cpp"
+            "src/core/cpu/kernels/CpuCol2ImKernel.cpp"
           ]
         }
       },
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
deleted file mode 100644
index 4ba02f1542..0000000000
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <arm_neon.h>
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-using namespace misc::shape_calculator;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
-{
-    //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
-
-    // Validate configured output
-    if(output->total_size() != 0)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_col2im_shape(*input, convolved_dims, false));
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
-    }
-
-    return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &convolved_dims)
-{
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims, false)));
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input, Steps());
-
-    // The NECol2ImKernel doesn't need padding so update_window_and_padding() can be skipped
-
-    return std::make_pair(Status{}, win);
-}
-} // namespace
-
-template <typename T>
-void NECol2ImKernel::run_col2im(const Window &window)
-{
-    const int output_stride_x = _output->info()->strides_in_bytes().x();
-    const int output_stride_y = _output->info()->strides_in_bytes().y();
-    const int output_stride_z = _output->info()->strides_in_bytes().z();
-
-    Window window_out(window);
-    window_out.set(Window::DimX, Window::Dimension(0, 0, 0));
-    window_out.set(Window::DimY, Window::Dimension(0, 0, 0));
-    window_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
-
-    // Create iterators
-    Iterator in(_input, window);
-    Iterator out(_output, window_out);
-
-    execute_window_loop(window, [&](const Coordinates & id)
-    {
-        const int hidx = id.y();
-        const int idx  = id.x() * output_stride_z + (hidx / _convolved_dims.width) * output_stride_y + (hidx % _convolved_dims.width) * output_stride_x;
-
-        *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
-    },
-    in, out);
-}
-
-NECol2ImKernel::NECol2ImKernel()
-    : _func(), _input(nullptr), _output(nullptr), _convolved_dims()
-{
-}
-
-void NECol2ImKernel::configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), convolved_dims));
-
-    _input          = input;
-    _output         = output;
-    _convolved_dims = convolved_dims;
-
-    switch(input->info()->element_size())
-    {
-        case 1:
-            _func = &NECol2ImKernel::run_col2im<uint8_t>;
-            break;
-        case 2:
-            _func = &NECol2ImKernel::run_col2im<uint16_t>;
-            break;
-        case 4:
-            _func = &NECol2ImKernel::run_col2im<uint32_t>;
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Element size not supported");
-            break;
-    }
-
-    // Configure kernel window
-    auto win_config = validate_and_configure_window(input->info(), output->info(), convolved_dims);
-    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    INEKernel::configure(win_config.second);
-}
-
-Status NECol2ImKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, convolved_dims));
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), convolved_dims).first);
-    return Status{};
-}
-
-void NECol2ImKernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
-    (this->*_func)(window);
-}
diff --git a/src/core/cpu/kernels/CpuCol2ImKernel.cpp b/src/core/cpu/kernels/CpuCol2ImKernel.cpp
new file mode 100644
index 0000000000..f860825de6
--- /dev/null
+++ b/src/core/cpu/kernels/CpuCol2ImKernel.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/cpu/kernels/CpuCol2ImKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+using namespace misc::shape_calculator;
+namespace cpu
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims)
+{
+    //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
+    ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+
+    // Validate configured output
+    if(dst->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), compute_col2im_shape(*src, convolved_dims, false));
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+    }
+
+    return Status{};
+}
+} // namespace
+
+void CpuCol2ImKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const Size2D &convolved_dims)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, convolved_dims));
+
+    _convolved_dims = convolved_dims;
+
+    // Configure kernel window
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_col2im_shape(*src, convolved_dims, false)));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*src, Steps());
+
+    ICpuKernel::configure(win);
+}
+
+Status CpuCol2ImKernel::validate(const ITensorInfo *src, const ITensorInfo *output, const Size2D &convolved_dims)
+{
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, output, convolved_dims));
+    return Status{};
+}
+
+void CpuCol2ImKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_UNUSED(info);
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+
+    auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
+    auto dst = tensors.get_tensor(TensorType::ACL_DST);
+
+    const uint8_t el_size         = src->info()->element_size();
+    const int     output_stride_x = dst->info()->strides_in_bytes().x();
+    const int     output_stride_y = dst->info()->strides_in_bytes().y();
+    const int     output_stride_z = dst->info()->strides_in_bytes().z();
+
+    Window window_out(window);
+    window_out.set(Window::DimX, Window::Dimension(0, 0, 0));
+    window_out.set(Window::DimY, Window::Dimension(0, 0, 0));
+    window_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
+
+    // Create iterators
+    Iterator in(src, window);
+    Iterator out(dst, window_out);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        const int hidx = id.y();
+        const int idx  = id.x() * output_stride_z + (hidx / _convolved_dims.width) * output_stride_y + (hidx % _convolved_dims.width) * output_stride_x;
+        std::memcpy(out.ptr() + idx, in.ptr(), el_size);
+    },
+    in, out);
+}
+
+const char *CpuCol2ImKernel::name() const
+{
+    return "CpuCol2ImKernel";
+}
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+\ No newline at end of file
diff --git a/src/core/cpu/kernels/CpuCol2ImKernel.h b/src/core/cpu/kernels/CpuCol2ImKernel.h
new file mode 100644
index 0000000000..3c1802230b
--- /dev/null
+++ b/src/core/cpu/kernels/CpuCol2ImKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_COL2IM_KERNEL_H
+#define ARM_COMPUTE_CPU_COL2IM_KERNEL_H
+
+#include "arm_compute/core/Size2D.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+/** Kernel to perform col2im reshaping.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CpuIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CpuCol2ImKernel : public ICpuKernel
+{
+public:
+    /** Default constructor */
+    CpuCol2ImKernel() = default;
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuCol2ImKernel);
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  src            The input tensor info to convert. Data types supported: All
+     * @param[out] dst            The output tensor info. 3 lower dimensions represent a single output [width, height, OFM],
+     *                            while the rest represent batch of outputs. Data types supported: Same as @p input
+     * @param[in]  convolved_dims Output convolved dimensions.
+     */
+    void configure(const ITensorInfo *src, ITensorInfo *dst, const Size2D &convolved_dims);
+    /** Static function to check if given info will lead to a valid configuration
+     *
+     * Similar to CpuCol2ImKernel::configure()
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims);
+
+    // Inherited methods overridden:
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+    const char *name() const override;
+
+private:
+    Size2D _convolved_dims{};
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CPU_COL2IM_KERNEL_H */
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index f333364289..7c06b0adf5 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -30,8 +30,8 @@
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
 #include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "src/core/cpu/kernels/CpuCol2ImKernel.h"
 #include "src/core/cpu/kernels/CpuIm2ColKernel.h"
 
 #include <set>
@@ -388,8 +388,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
         if(_data_layout == DataLayout::NCHW)
         {
             // Configure col2im
-            _col2im_kernel = std::make_unique<NECol2ImKernel>();
-            _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
+            _col2im_kernel = std::make_unique<cpu::kernels::CpuCol2ImKernel>();
+            _col2im_kernel->configure(gemm_output_to_use->info(), output->info(), Size2D(conv_w, conv_h));
         }
         else
         {
@@ -546,7 +546,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     // Validate Col2Im/ReshapeLayer
     if(!skip_col2im && (data_layout == DataLayout::NCHW))
     {
-        ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h)));
+        ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h)));
     }
 
     return Status{};
@@ -594,7 +594,12 @@ void NEGEMMConvolutionLayer::run()
     {
         if(_data_layout == DataLayout::NCHW)
         {
-            NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY);
+            ITensorPack pack =
+            {
+                { TensorType::ACL_SRC, &_gemm_output },
+                { TensorType::ACL_DST, _original_output }
+            };
+            NEScheduler::get().schedule_op(_col2im_kernel.get(), Window::DimY, _col2im_kernel->window(), pack);
         }
         else
         {
diff --git a/tests/validation/NEON/Col2Im.cpp b/tests/validation/NEON/Col2Im.cpp
index 9139f0cca8..3ae784bd04 100644
--- a/tests/validation/NEON/Col2Im.cpp
+++ b/tests/validation/NEON/Col2Im.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/cpu/kernels/CpuCol2ImKernel.h"
 #include "tests/NEON/Helper.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
@@ -39,7 +39,7 @@ namespace validation
 TEST_SUITE(NEON)
 TEST_SUITE(Col2Im)
 
-using NECol2Im = NESynthetizeFunction<NECol2ImKernel>;
+using CpuCol2Im = NESynthetizeFunction<cpu::kernels::CpuCol2ImKernel>;
 
 // *INDENT-OFF*
 // clang-format off
@@ -59,7 +59,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("Expected", { false, false, false, true })),
                input_info, output_info, convolved_width, convolved_height, expected)
 {
-    bool status = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
+    bool status = bool(CpuCol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
     ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
author	Manuel Bottini <manuel.bottini@arm.com>	2021-07-01 18:13:33 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-07-09 09:42:34 +0000
commit	24b892072a2bd8190ba63d09fb0082113d7d032a (patch)
tree	6376c4449f0d2d7f146496ab32604b3835acf976
parent	da816752cad76c8e1b367e8e9c648994a1af599a (diff)
download	ComputeLibrary-24b892072a2bd8190ba63d09fb0082113d7d032a.tar.gz