aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-07-01 18:13:33 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-09 09:42:34 +0000
commit24b892072a2bd8190ba63d09fb0082113d7d032a (patch)
tree6376c4449f0d2d7f146496ab32604b3835acf976
parentda816752cad76c8e1b367e8e9c648994a1af599a (diff)
downloadComputeLibrary-24b892072a2bd8190ba63d09fb0082113d7d032a.tar.gz
Port NECol2ImKernel
Resolves: COMPMID-4511 Change-Id: Id6335cb23ef22bba02083498025da0ecb1647714 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5898 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp2
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h8
-rw-r--r--docs/user_guide/release_version_and_change_log.dox2
-rw-r--r--filelist.json2
-rw-r--r--src/core/NEON/kernels/NECol2ImKernel.cpp151
-rw-r--r--src/core/cpu/kernels/CpuCol2ImKernel.cpp124
-rw-r--r--src/core/cpu/kernels/CpuCol2ImKernel.h87
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp15
-rw-r--r--tests/validation/NEON/Col2Im.cpp8
9 files changed, 232 insertions, 167 deletions
diff --git a/Android.bp b/Android.bp
index 6a76e4c92a..8b6dd8392c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -145,7 +145,6 @@ cc_library_static {
"src/core/NEON/kernels/NEBitwiseXorKernel.cpp",
"src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp",
"src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp",
- "src/core/NEON/kernels/NECol2ImKernel.cpp",
"src/core/NEON/kernels/NECropKernel.cpp",
"src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp",
"src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
@@ -248,6 +247,7 @@ cc_library_static {
"src/core/cpu/kernels/CpuActivationKernel.cpp",
"src/core/cpu/kernels/CpuAddKernel.cpp",
"src/core/cpu/kernels/CpuCastKernel.cpp",
+ "src/core/cpu/kernels/CpuCol2ImKernel.cpp",
"src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp",
"src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp",
"src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp",
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 655d733bd1..e3b7d91187 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -40,13 +40,13 @@
namespace arm_compute
{
class ITensor;
-class NECol2ImKernel;
class NEWeightsReshapeKernel;
namespace cpu
{
namespace kernels
{
class CpuIm2ColKernel;
+class CpuCol2ImKernel;
} // namespace kernels
} // namespace cpu
@@ -163,7 +163,7 @@ private:
* -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
* -# @ref NEGEMMLowpOutputStage (if the data type is QASYMM8/QASYMM8_SIGNED)
* -# @ref NEArithmeticAddition (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
- * -# @ref NECol2ImKernel (if NCHW data layout)
+ * -# @ref cpu::kernels::CpuCol2ImKernel (if NCHW data layout)
*
*/
class NEGEMMConvolutionLayer : public IFunction
@@ -292,12 +292,12 @@ private:
std::unique_ptr<cpu::kernels::CpuIm2ColKernel> _im2col_kernel;
NEGEMM _mm_gemm;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- std::unique_ptr<NECol2ImKernel> _col2im_kernel;
+ std::unique_ptr<cpu::kernels::CpuCol2ImKernel> _col2im_kernel;
NEReshapeLayer _reshape_layer;
const ITensor *_input;
const ITensor *_original_weights;
- const ITensor *_original_output;
+ ITensor *_original_output;
Tensor _im2col_output;
Tensor _weights_reshaped;
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index e948c2f062..78b60976d7 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -1362,7 +1362,7 @@ v17.03.1 First Major public release of the sources
- @ref NENormalizationLayerKernel / @ref NENormalizationLayer
- NETransposeKernel / @ref NETranspose
- NELogits1DMaxKernel, NELogits1DShiftExpSumKernel, NELogits1DNormKernel / @ref NESoftmaxLayer
- - NEIm2ColKernel, @ref NECol2ImKernel, NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer
+ - NEIm2ColKernel, NECol2ImKernel, NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer
- NEGEMMMatrixAccumulateBiasesKernel / @ref NEFullyConnectedLayer
- NEGEMMLowpMatrixMultiplyKernel / NEGEMMLowp
diff --git a/filelist.json b/filelist.json
index 9d07492e6a..b8c1203374 100644
--- a/filelist.json
+++ b/filelist.json
@@ -798,7 +798,7 @@
"Col2Im": {
"files": {
"kernel": [
- "src/core/NEON/kernels/NECol2ImKernel.cpp"
+ "src/core/cpu/kernels/CpuCol2ImKernel.cpp"
]
}
},
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
deleted file mode 100644
index 4ba02f1542..0000000000
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <arm_neon.h>
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-using namespace misc::shape_calculator;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
-{
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
-
- // Validate configured output
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_col2im_shape(*input, convolved_dims, false));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &convolved_dims)
-{
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims, false)));
-
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps());
-
- // The NECol2ImKernel doesn't need padding so update_window_and_padding() can be skipped
-
- return std::make_pair(Status{}, win);
-}
-} // namespace
-
-template <typename T>
-void NECol2ImKernel::run_col2im(const Window &window)
-{
- const int output_stride_x = _output->info()->strides_in_bytes().x();
- const int output_stride_y = _output->info()->strides_in_bytes().y();
- const int output_stride_z = _output->info()->strides_in_bytes().z();
-
- Window window_out(window);
- window_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- window_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- window_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
-
- // Create iterators
- Iterator in(_input, window);
- Iterator out(_output, window_out);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const int hidx = id.y();
- const int idx = id.x() * output_stride_z + (hidx / _convolved_dims.width) * output_stride_y + (hidx % _convolved_dims.width) * output_stride_x;
-
- *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
- },
- in, out);
-}
-
-NECol2ImKernel::NECol2ImKernel()
- : _func(), _input(nullptr), _output(nullptr), _convolved_dims()
-{
-}
-
-void NECol2ImKernel::configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), convolved_dims));
-
- _input = input;
- _output = output;
- _convolved_dims = convolved_dims;
-
- switch(input->info()->element_size())
- {
- case 1:
- _func = &NECol2ImKernel::run_col2im<uint8_t>;
- break;
- case 2:
- _func = &NECol2ImKernel::run_col2im<uint16_t>;
- break;
- case 4:
- _func = &NECol2ImKernel::run_col2im<uint32_t>;
- break;
- default:
- ARM_COMPUTE_ERROR("Element size not supported");
- break;
- }
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), convolved_dims);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- INEKernel::configure(win_config.second);
-}
-
-Status NECol2ImKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, convolved_dims));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), convolved_dims).first);
- return Status{};
-}
-
-void NECol2ImKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- (this->*_func)(window);
-}
diff --git a/src/core/cpu/kernels/CpuCol2ImKernel.cpp b/src/core/cpu/kernels/CpuCol2ImKernel.cpp
new file mode 100644
index 0000000000..f860825de6
--- /dev/null
+++ b/src/core/cpu/kernels/CpuCol2ImKernel.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/cpu/kernels/CpuCol2ImKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+namespace arm_compute
+{
+using namespace misc::shape_calculator;
+namespace cpu
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims)
+{
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+
+ // Validate configured output
+ if(dst->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), compute_col2im_shape(*src, convolved_dims, false));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(src, dst);
+ }
+
+ return Status{};
+}
+} // namespace
+
+void CpuCol2ImKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const Size2D &convolved_dims)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, convolved_dims));
+
+ _convolved_dims = convolved_dims;
+
+ // Configure kernel window
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_col2im_shape(*src, convolved_dims, false)));
+
+ // Configure kernel window
+ Window win = calculate_max_window(*src, Steps());
+
+ ICpuKernel::configure(win);
+}
+
+Status CpuCol2ImKernel::validate(const ITensorInfo *src, const ITensorInfo *output, const Size2D &convolved_dims)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, output, convolved_dims));
+ return Status{};
+}
+
+void CpuCol2ImKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+
+ auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
+
+ const uint8_t el_size = src->info()->element_size();
+ const int output_stride_x = dst->info()->strides_in_bytes().x();
+ const int output_stride_y = dst->info()->strides_in_bytes().y();
+ const int output_stride_z = dst->info()->strides_in_bytes().z();
+
+ Window window_out(window);
+ window_out.set(Window::DimX, Window::Dimension(0, 0, 0));
+ window_out.set(Window::DimY, Window::Dimension(0, 0, 0));
+ window_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
+
+ // Create iterators
+ Iterator in(src, window);
+ Iterator out(dst, window_out);
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const int hidx = id.y();
+ const int idx = id.x() * output_stride_z + (hidx / _convolved_dims.width) * output_stride_y + (hidx % _convolved_dims.width) * output_stride_x;
+ std::memcpy(out.ptr() + idx, in.ptr(), el_size);
+ },
+ in, out);
+}
+
+const char *CpuCol2ImKernel::name() const
+{
+ return "CpuCol2ImKernel";
+}
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/cpu/kernels/CpuCol2ImKernel.h b/src/core/cpu/kernels/CpuCol2ImKernel.h
new file mode 100644
index 0000000000..3c1802230b
--- /dev/null
+++ b/src/core/cpu/kernels/CpuCol2ImKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_COL2IM_KERNEL_H
+#define ARM_COMPUTE_CPU_COL2IM_KERNEL_H
+
+#include "arm_compute/core/Size2D.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+/** Kernel to perform col2im reshaping.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CpuIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CpuCol2ImKernel : public ICpuKernel
+{
+public:
+ /** Default constructor */
+ CpuCol2ImKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuCol2ImKernel);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] src The input tensor info to convert. Data types supported: All
+ * @param[out] dst The output tensor info. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input
+ * @param[in] convolved_dims Output convolved dimensions.
+ */
+ void configure(const ITensorInfo *src, ITensorInfo *dst, const Size2D &convolved_dims);
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to CpuCol2ImKernel::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
+
+private:
+ Size2D _convolved_dims{};
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CPU_COL2IM_KERNEL_H */
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index f333364289..7c06b0adf5 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -30,8 +30,8 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "src/core/cpu/kernels/CpuCol2ImKernel.h"
#include "src/core/cpu/kernels/CpuIm2ColKernel.h"
#include <set>
@@ -388,8 +388,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
if(_data_layout == DataLayout::NCHW)
{
// Configure col2im
- _col2im_kernel = std::make_unique<NECol2ImKernel>();
- _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
+ _col2im_kernel = std::make_unique<cpu::kernels::CpuCol2ImKernel>();
+ _col2im_kernel->configure(gemm_output_to_use->info(), output->info(), Size2D(conv_w, conv_h));
}
else
{
@@ -546,7 +546,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
// Validate Col2Im/ReshapeLayer
if(!skip_col2im && (data_layout == DataLayout::NCHW))
{
- ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h)));
+ ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h)));
}
return Status{};
@@ -594,7 +594,12 @@ void NEGEMMConvolutionLayer::run()
{
if(_data_layout == DataLayout::NCHW)
{
- NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY);
+ ITensorPack pack =
+ {
+ { TensorType::ACL_SRC, &_gemm_output },
+ { TensorType::ACL_DST, _original_output }
+ };
+ NEScheduler::get().schedule_op(_col2im_kernel.get(), Window::DimY, _col2im_kernel->window(), pack);
}
else
{
diff --git a/tests/validation/NEON/Col2Im.cpp b/tests/validation/NEON/Col2Im.cpp
index 9139f0cca8..3ae784bd04 100644
--- a/tests/validation/NEON/Col2Im.cpp
+++ b/tests/validation/NEON/Col2Im.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/cpu/kernels/CpuCol2ImKernel.h"
#include "tests/NEON/Helper.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/framework/Asserts.h"
@@ -39,7 +39,7 @@ namespace validation
TEST_SUITE(NEON)
TEST_SUITE(Col2Im)
-using NECol2Im = NESynthetizeFunction<NECol2ImKernel>;
+using CpuCol2Im = NESynthetizeFunction<cpu::kernels::CpuCol2ImKernel>;
// *INDENT-OFF*
// clang-format off
@@ -59,7 +59,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
framework::dataset::make("Expected", { false, false, false, true })),
input_info, output_info, convolved_width, convolved_height, expected)
{
- bool status = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
+ bool status = bool(CpuCol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
}
// clang-format on