aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-10 04:07:39 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-18 16:48:25 +0000
commit61ba0697756af0e4dc191af158669af0cee2be79 (patch)
tree1f04c06a759bfaa691728ea92d03375c08f5435f
parentb5a450a1acc1149f99f7bb06b10694fba554f4e3 (diff)
downloadComputeLibrary-61ba0697756af0e4dc191af158669af0cee2be79.tar.gz
Make Concatenate kernels and operator stateless
- Rename all concatenate kernels to use the Cpu prefix and move appropriately Change-Id: If647173e84969936ebd211d4d5ae6d1e73150bdc Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4799 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
-rw-r--r--Android.bp9
-rw-r--r--arm_compute/runtime/NEON/functions/NEConcatenateLayer.h70
-rw-r--r--src/core/NEON/NEKernels.h4
-rw-r--r--src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h89
-rw-r--r--src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h83
-rw-r--r--src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h82
-rw-r--r--src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp (renamed from src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp)120
-rw-r--r--src/core/cpu/kernels/CpuConcatenateBatchKernel.h78
-rw-r--r--src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp (renamed from src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp)99
-rw-r--r--src/core/cpu/kernels/CpuConcatenateDepthKernel.h (renamed from src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h)56
-rw-r--r--src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp (renamed from src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp)95
-rw-r--r--src/core/cpu/kernels/CpuConcatenateHeightKernel.h72
-rw-r--r--src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp (renamed from src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp)89
-rw-r--r--src/core/cpu/kernels/CpuConcatenateWidthKernel.h72
-rw-r--r--src/runtime/NEON/functions/NEConcatenateLayer.cpp157
-rw-r--r--src/runtime/cpu/operators/CpuConcatenate.cpp173
-rw-r--r--src/runtime/cpu/operators/CpuConcatenate.h81
17 files changed, 742 insertions, 687 deletions
diff --git a/Android.bp b/Android.bp
index 2675068e91..5ebcb30b37 100644
--- a/Android.bp
+++ b/Android.bp
@@ -228,7 +228,6 @@ cc_library_static {
"src/core/NEON/kernels/NEAccumulateKernel.cpp",
"src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp",
"src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp",
- "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp",
"src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp",
"src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp",
"src/core/NEON/kernels/NEBitwiseAndKernel.cpp",
@@ -249,7 +248,6 @@ cc_library_static {
"src/core/NEON/kernels/NECopyKernel.cpp",
"src/core/NEON/kernels/NECropKernel.cpp",
"src/core/NEON/kernels/NECumulativeDistributionKernel.cpp",
- "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp",
"src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp",
"src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp",
"src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp",
@@ -288,7 +286,6 @@ cc_library_static {
"src/core/NEON/kernels/NEHOGDescriptorKernel.cpp",
"src/core/NEON/kernels/NEHOGDetectorKernel.cpp",
"src/core/NEON/kernels/NEHarrisCornersKernel.cpp",
- "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp",
"src/core/NEON/kernels/NEHistogramKernel.cpp",
"src/core/NEON/kernels/NEIm2ColKernel.cpp",
"src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp",
@@ -339,7 +336,6 @@ cc_library_static {
"src/core/NEON/kernels/NETransposeKernel.cpp",
"src/core/NEON/kernels/NEWarpKernel.cpp",
"src/core/NEON/kernels/NEWeightsReshapeKernel.cpp",
- "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp",
"src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
"src/core/NEON/kernels/arithmetic_addition/impl/NEON/integer.cpp",
"src/core/NEON/kernels/arithmetic_addition/impl/NEON/qasymm8.cpp",
@@ -424,6 +420,10 @@ cc_library_static {
"src/core/Validate.cpp",
"src/core/Version.cpp",
"src/core/cpu/kernels/CpuActivationKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp",
"src/core/cpu/kernels/CpuFloorKernel.cpp",
"src/core/cpu/kernels/activation/NEON/fp16.cpp",
"src/core/cpu/kernels/activation/NEON/fp32.cpp",
@@ -774,6 +774,7 @@ cc_library_static {
"src/runtime/TensorAllocator.cpp",
"src/runtime/Utils.cpp",
"src/runtime/cpu/operators/CpuActivation.cpp",
+ "src/runtime/cpu/operators/CpuConcatenate.cpp",
"src/runtime/cpu/operators/CpuFloor.cpp",
"utils/CommonGraphOptions.cpp",
"utils/GraphUtils.cpp",
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index fd35d0bc46..d16ab130a3 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,11 +27,8 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
-#include "support/Requires.h"
#include <memory>
-#include <vector>
namespace arm_compute
{
@@ -40,13 +37,7 @@ class ITensor;
class ITensorInfo;
class Status;
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
+/** Basic function to execute concatenate tensors along a given axis */
class NEConcatenateLayer : public IFunction
{
public:
@@ -92,62 +83,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace experimental
-{
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
-class NEConcatenation : public INEOperator
-{
-public:
- /** Constructor */
- NEConcatenation();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConcatenation(const NEConcatenation &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConcatenation &operator=(const NEConcatenation &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConcatenation(NEConcatenation &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConcatenation &operator=(NEConcatenation &&) = delete;
- /** Default destructor */
- ~NEConcatenation() = default;
- /** Initialise the kernel's inputs vector and output.
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
- *
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- */
- void configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
- *
- * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- *
- * @return a status
- */
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
-};
-} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_NECONCATENATELAYER_H */
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 0d447de44c..64c1c8f79b 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -29,7 +29,6 @@
#include "src/core/NEON/kernels/NEAccumulateKernel.h"
#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
#include "src/core/NEON/kernels/NEBitwiseAndKernel.h"
@@ -50,7 +49,6 @@
#include "src/core/NEON/kernels/NECopyKernel.h"
#include "src/core/NEON/kernels/NECropKernel.h"
#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
@@ -89,7 +87,6 @@
#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
#include "src/core/NEON/kernels/NEHOGDetectorKernel.h"
#include "src/core/NEON/kernels/NEHarrisCornersKernel.h"
-#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
#include "src/core/NEON/kernels/NEHistogramKernel.h"
#include "src/core/NEON/kernels/NEIm2ColKernel.h"
#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
@@ -140,7 +137,6 @@
#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "src/core/NEON/kernels/NEWarpKernel.h"
#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
-#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
#endif /* ARM_COMPUTE_NEKERNELS_H */
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
deleted file mode 100644
index b74a94805d..0000000000
--- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-
-#include "src/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEBatchConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchConcatenateLayerKernel";
- }
- /** Default constructor */
- NEBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window);
-
-private:
- BatchConcatFunction *_func;
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
deleted file mode 100644
index 9d100ebff1..0000000000
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEHeightConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHeightConcatenateLayerKernel";
- }
- /** Default constructor */
- NEHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- *
- */
- void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
deleted file mode 100644
index 81b4cbed9e..0000000000
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEWidthConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEWidthConcatenateLayerKernel";
- }
- /** Default constructor */
- NEWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- */
- void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- unsigned int _width_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
index ddf69710f9..e51c341851 100644
--- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,11 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateBatchKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
@@ -37,77 +38,81 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
template <typename T>
-void batch_concat(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window)
+void batch_concat(const ITensor *src, ITensor *dst, unsigned int batch_offset, const Window &window)
{
- // Offset input
- uint8_t *input_ptr = in->buffer() + in->info()->offset_first_element_in_bytes();
+ // Offset src
+ uint8_t *src_ptr = src->buffer() + src->info()->offset_first_element_in_bytes();
- // Offset output
- uint8_t *output_ptr = out->buffer() + out->info()->offset_first_element_in_bytes() + batch_offset * out->info()->strides_in_bytes()[3];
+ // Offset dst
+ uint8_t *dst_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + batch_offset * dst->info()->strides_in_bytes()[3];
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const int window_step_x = 16 / out->info()->element_size();
+ const int window_step_x = 16 / dst->info()->element_size();
Window win{ window };
win.set(Window::DimX, Window::Dimension(0, 1, 1));
- win.set(3, Window::Dimension(0, in->info()->tensor_shape()[3], 1));
+ win.set(3, Window::Dimension(0, src->info()->tensor_shape()[3], 1));
- Iterator input(in, win);
- Iterator output(out, win);
+ Iterator src_it(src, win);
+ Iterator dst_it(dst, win);
- const DataType dt = in->info()->data_type();
- const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform();
- const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
- if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
+ const DataType dt = src->info()->data_type();
+ const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo dst_qinfo = dst->info()->quantization_info().uniform();
+ if(dt == DataType::QASYMM8 && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const uint8_t *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<uint8_t *>(output_ptr + output.offset());
+ const auto in_ptr = reinterpret_cast<const uint8_t *>(src_ptr + src_it.offset());
+ const auto out_ptr = reinterpret_cast<uint8_t *>(dst_ptr + dst_it.offset());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- wrapper::vstore(out_ptr, vquantize(vdequantize(wrapper::vloadq(in_ptr), input_qinfo), output_qinfo));
+ wrapper::vstore(out_ptr, vquantize(vdequantize(wrapper::vloadq(in_ptr), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(out_ptr + x) = quantize_qasymm8(dequantize_qasymm8(*(in_ptr + x), input_qinfo), output_qinfo);
+ *(out_ptr + x) = quantize_qasymm8(dequantize_qasymm8(*(in_ptr + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
- else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+ else if(dt == DataType::QASYMM8_SIGNED && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const int8_t *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<int8_t *>(output_ptr + output.offset());
+ const auto in_ptr = reinterpret_cast<const int8_t *>(src_ptr + src_it.offset());
+ const auto out_ptr = reinterpret_cast<int8_t *>(dst_ptr + dst_it.offset());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- wrapper::vstore(out_ptr, vquantize_signed(vdequantize(wrapper::vloadq(in_ptr), input_qinfo), output_qinfo));
+ wrapper::vstore(out_ptr, vquantize_signed(vdequantize(wrapper::vloadq(in_ptr), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(out_ptr + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(in_ptr + x), input_qinfo), output_qinfo);
+ *(out_ptr + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(in_ptr + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
else
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset());
+ const auto in_ptr = reinterpret_cast<const T *>(src_ptr + src_it.offset());
+ const auto out_ptr = reinterpret_cast<T *>(dst_ptr + dst_it.offset());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
@@ -121,41 +126,41 @@ void batch_concat(const ITensor *in, ITensor *out, unsigned int batch_offset, co
*(out_ptr + x) = *(in_ptr + x);
}
},
- input, output);
+ src_it, dst_it);
}
}
-Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimZ) != dst->dimension(Window::DimZ));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(3) + batch_offset > dst->dimension(3));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, src, dst);
return Status{};
}
} // namespace
-NEBatchConcatenateLayerKernel::NEBatchConcatenateLayerKernel()
+CpuConcatenateBatchKernel::CpuConcatenateBatchKernel()
: _func(nullptr), _batch_offset(0)
{
}
-void NEBatchConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output)
+void CpuConcatenateBatchKernel::configure(const ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, batch_offset, dst));
_func = nullptr;
_batch_offset = batch_offset;
- switch(input->data_type())
+ switch(src->data_type())
{
case DataType::S8:
case DataType::U8:
@@ -178,26 +183,26 @@ void NEBatchConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned
}
// Configure kernel window
- Window win = calculate_max_window(*output, Steps());
+ Window win = calculate_max_window(*dst, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
- INEKernel::configure(win);
+ coord.set_num_dimensions(dst->num_dimensions());
+ dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
+ ICpuKernel::configure(win);
}
-Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
- unsigned int batch_offset,
- const arm_compute::ITensorInfo *output)
+Status CpuConcatenateBatchKernel::validate(const arm_compute::ITensorInfo *src,
+ unsigned int batch_offset,
+ const arm_compute::ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, batch_offset, dst));
return Status{};
}
-void NEBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuConcatenateBatchKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
(*_func)(tensors.get_const_tensor(TensorType::ACL_SRC),
@@ -205,4 +210,11 @@ void NEBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
_batch_offset,
window);
}
+
+const char *CpuConcatenateBatchKernel::name() const
+{
+ return "CpuConcatenateBatchKernel";
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuConcatenateBatchKernel.h b/src/core/cpu/kernels/CpuConcatenateBatchKernel.h
new file mode 100644
index 0000000000..99e8d84d99
--- /dev/null
+++ b/src/core/cpu/kernels/CpuConcatenateBatchKernel.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_CONCATENATEBATCH_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATEBATCH_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the batch concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class CpuConcatenateBatchKernel : public ICpuKernel
+{
+public:
+ CpuConcatenateBatchKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateBatchKernel);
+ /** Configure kernel for a given list of arguments
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in,out] dst Destination tensor info. Data types supported: Same as @p src.
+ */
+ void configure(const ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateBatchKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
+
+private:
+ using BatchConcatFunction = void(const ITensor *, ITensor *, unsigned int, const Window &);
+
+private:
+ BatchConcatFunction *_func;
+ unsigned int _batch_offset;
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_CONCATENATEBATCH_KERNEL_H */
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
index ba90bfcd4f..dee0283a2c 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateDepthKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
@@ -41,77 +41,81 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
template <typename T>
-void depth_concat(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window)
+void depth_concat(const ITensor *src, ITensor *dst, unsigned int depth_offset, const Window &window)
{
- // Offset input
- uint8_t *input_ptr = in->buffer() + in->info()->offset_first_element_in_bytes();
+ // Offset source
+ uint8_t *src_ptr = src->buffer() + src->info()->offset_first_element_in_bytes();
- // Offset output
- uint8_t *output_ptr = out->buffer() + out->info()->offset_first_element_in_bytes() + depth_offset * out->info()->strides_in_bytes()[2];
+ // Offset destination
+ uint8_t *dst_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + depth_offset * dst->info()->strides_in_bytes()[2];
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const int window_step_x = 16 / out->info()->element_size();
+ const int window_step_x = 16 / dst->info()->element_size();
Window win{ window };
win.set(Window::DimX, Window::Dimension(0, 1, 1));
- win.set(Window::DimZ, Window::Dimension(0, in->info()->tensor_shape().z(), 1));
+ win.set(Window::DimZ, Window::Dimension(0, src->info()->tensor_shape().z(), 1));
- Iterator input(in, win);
- Iterator output(out, win);
+ Iterator src_it(src, win);
+ Iterator dst_it(dst, win);
- const DataType dt = in->info()->data_type();
- const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform();
- const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
- if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
+ const DataType dt = src->info()->data_type();
+ const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo dst_qinfo = dst->info()->quantization_info().uniform();
+ if(dt == DataType::QASYMM8 && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const uint8_t *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<uint8_t *>(output_ptr + output.offset());
+ const auto in_ptr = reinterpret_cast<const uint8_t *>(src_ptr + src_it.offset());
+ const auto out_ptr = reinterpret_cast<uint8_t *>(dst_ptr + dst_it.offset());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- wrapper::vstore(out_ptr + x, vquantize(vdequantize(wrapper::vloadq(in_ptr + x), input_qinfo), output_qinfo));
+ wrapper::vstore(out_ptr + x, vquantize(vdequantize(wrapper::vloadq(in_ptr + x), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(out_ptr + x) = quantize_qasymm8(dequantize_qasymm8(*(in_ptr + x), input_qinfo), output_qinfo);
+ *(out_ptr + x) = quantize_qasymm8(dequantize_qasymm8(*(in_ptr + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
- else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+ else if(dt == DataType::QASYMM8_SIGNED && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const int8_t *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<int8_t *>(output_ptr + output.offset());
+ const auto in_ptr = reinterpret_cast<const int8_t *>(src_ptr + src_it.offset());
+ const auto out_ptr = reinterpret_cast<int8_t *>(dst_ptr + dst_it.offset());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- wrapper::vstore(out_ptr + x, vquantize_signed(vdequantize(wrapper::vloadq(in_ptr + x), input_qinfo), output_qinfo));
+ wrapper::vstore(out_ptr + x, vquantize_signed(vdequantize(wrapper::vloadq(in_ptr + x), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(out_ptr + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(in_ptr + x), input_qinfo), output_qinfo);
+ *(out_ptr + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(in_ptr + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
else
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset());
+ const auto in_ptr = reinterpret_cast<const T *>(src_ptr + src_it.offset());
+ const auto out_ptr = reinterpret_cast<T *>(dst_ptr + dst_it.offset());
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
@@ -123,7 +127,7 @@ void depth_concat(const ITensor *in, ITensor *out, unsigned int depth_offset, co
*(out_ptr + x) = *(in_ptr + x);
}
},
- input, output);
+ src_it, dst_it);
}
}
@@ -143,20 +147,20 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c
}
} // namespace
-NEDepthConcatenateLayerKernel::NEDepthConcatenateLayerKernel()
+CpuConcatenateDepthKernel::CpuConcatenateDepthKernel()
: _func(nullptr), _depth_offset(0)
{
}
-void NEDepthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
+void CpuConcatenateDepthKernel::configure(const ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, depth_offset, dst));
_func = nullptr;
_depth_offset = depth_offset;
- switch(input->data_type())
+ switch(src->data_type())
{
case DataType::QASYMM8:
_func = &depth_concat<uint8_t>;
@@ -175,27 +179,27 @@ void NEDepthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned
}
// Configure kernel window
- Window win = calculate_max_window(*output, Steps());
+ Window win = calculate_max_window(*dst, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
+ coord.set_num_dimensions(dst->num_dimensions());
- output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
- INEKernel::configure(win);
+ dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
+ ICpuKernel::configure(win);
}
-Status NEDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
- unsigned int depth_offset,
- const arm_compute::ITensorInfo *output)
+Status CpuConcatenateDepthKernel::validate(const arm_compute::ITensorInfo *src,
+ unsigned int depth_offset,
+ const arm_compute::ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, depth_offset, dst));
return Status{};
}
-void NEDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuConcatenateDepthKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
(*_func)(tensors.get_const_tensor(TensorType::ACL_SRC),
@@ -203,4 +207,11 @@ void NEDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
_depth_offset,
window);
}
+
+const char *CpuConcatenateDepthKernel::name() const
+{
+ return "CpuConcatenateDepthKernel";
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/src/core/cpu/kernels/CpuConcatenateDepthKernel.h
index 02c5479f93..af89c2464f 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
+++ b/src/core/cpu/kernels/CpuConcatenateDepthKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,68 +22,62 @@
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
+#ifndef ARM_COMPUTE_CPU_CONCATENATEDEPTH_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATEDEPTH_KERNEL_H
-#include "src/core/NEON/INEKernel.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+namespace cpu
+{
+namespace kernels
+{
/** Interface for the depth concatenate kernel.
* The input tensor will be concatenated into the output tensor.
*/
-class NEDepthConcatenateLayerKernel : public INEKernel
+class CpuConcatenateDepthKernel : public ICpuKernel
{
public:
- const char *name() const override
- {
- return "NEDepthConcatenateLayerKernel";
- }
- /** Default constructor */
- NEDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
+ CpuConcatenateDepthKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateDepthKernel);
+ /** Configure kernel for a given list of arguments
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in,out] dst Destination tensor info. Data types supported: Same as @p src.
*
* @note: The output tensor's low two dimensions can't be smaller than the input one's.
* @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
*
*/
- void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel
+ void configure(const ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateDepthKernel
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
private:
- using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window);
+ using DepthConcatFunction = void(const ITensor *, ITensor *, unsigned int, const Window &);
private:
DepthConcatFunction *_func;
unsigned int _depth_offset;
};
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_CONCATENATEDEPTH_KERNEL_H */
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
index 227013a014..8522c93340 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateHeightKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
@@ -40,63 +40,67 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) + height_offset > dst->dimension(Window::DimY));
for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i));
}
return Status{};
}
} // namespace
-NEHeightConcatenateLayerKernel::NEHeightConcatenateLayerKernel()
+CpuConcatenateHeightKernel::CpuConcatenateHeightKernel()
: _height_offset(0)
{
}
-void NEHeightConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output)
+void CpuConcatenateHeightKernel::configure(const ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_UNUSED(input);
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output));
+ ARM_COMPUTE_UNUSED(src);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, height_offset, dst));
_height_offset = height_offset;
// Configure kernel window
- Window win = calculate_max_window(*output, Steps());
+ Window win = calculate_max_window(*dst, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
- INEKernel::configure(win);
+ coord.set_num_dimensions(dst->num_dimensions());
+ dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
+ ICpuKernel::configure(win);
}
-Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
+Status CpuConcatenateHeightKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, height_offset, dst));
return Status{};
}
-void NEHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuConcatenateHeightKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
- // Offset output pointer to the correct position
- uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY];
+ // Offset destination pointer to the correct position
+ uint8_t *dst_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY];
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size());
@@ -107,56 +111,56 @@ void NEHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &
win.set(Window::DimY, Window::Dimension(0, src->info()->tensor_shape().y(), 1));
// Create iterators
- Iterator input(src, win);
- Iterator output(dst, win);
+ Iterator src_it(src, win);
+ Iterator dst_it(dst, win);
- const DataType dt = src->info()->data_type();
- const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform();
- const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform();
- if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
+ const DataType dt = src->info()->data_type();
+ const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &dst_qinfo = dst->info()->quantization_info().uniform();
+ if(dt == DataType::QASYMM8 && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- vst1q_u8(output_ptr + output.offset() + x, vquantize(vdequantize(vld1q_u8(input.ptr() + x), input_qinfo), output_qinfo));
+ vst1q_u8(dst_ptr + dst_it.offset() + x, vquantize(vdequantize(vld1q_u8(src_it.ptr() + x), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + output.offset() + x) = quantize_qasymm8(dequantize_qasymm8(*(input.ptr() + x), input_qinfo), output_qinfo);
+ *(dst_ptr + dst_it.offset() + x) = quantize_qasymm8(dequantize_qasymm8(*(src_it.ptr() + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
- else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+ else if(dt == DataType::QASYMM8_SIGNED && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- vst1q_s8(reinterpret_cast<int8_t *>(output_ptr + output.offset() + x),
- vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(input.ptr()) + x), input_qinfo), output_qinfo));
+ vst1q_s8(reinterpret_cast<int8_t *>(dst_ptr + dst_it.offset() + x),
+ vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(src_it.ptr()) + x), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + output.offset() + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(input.ptr() + x), input_qinfo), output_qinfo);
+ *(dst_ptr + dst_it.offset() + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(src_it.ptr() + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
else
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = input.ptr();
- const auto out_ptr = output_ptr + output.offset();
+ const auto in_ptr = src_it.ptr();
+ const auto out_ptr = dst_ptr + dst_it.offset();
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
@@ -170,7 +174,14 @@ void NEHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &
*(out_ptr + x) = *(in_ptr + x);
}
},
- input, output);
+ src_it, dst_it);
}
}
+
+const char *CpuConcatenateHeightKernel::name() const
+{
+ return "CpuConcatenateHeightKernel";
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuConcatenateHeightKernel.h b/src/core/cpu/kernels/CpuConcatenateHeightKernel.h
new file mode 100644
index 0000000000..609bb21da7
--- /dev/null
+++ b/src/core/cpu/kernels/CpuConcatenateHeightKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_CONCATENATEHEIGHT_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATEHEIGHT_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the height concatenate kernel.
+ * The source tensor will be concatenated into the destination tensor.
+ */
+class CpuConcatenateHeightKernel : public ICpuKernel
+{
+public:
+ CpuConcatenateHeightKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateHeightKernel);
+ /** Configure kernel for a given list of arguments
+ *
+ * @param[in] src Source tensor info. Data types supported: All
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[in,out] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ */
+ void configure(const ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateHeightKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
+
+private:
+ unsigned int _height_offset;
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_CONCATENATEHEIGHT_KERNEL_H */
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
index b5afeed1f6..27ded05aff 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateWidthKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
@@ -40,63 +40,67 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0));
for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i));
}
return Status{};
}
} // namespace
-NEWidthConcatenateLayerKernel::NEWidthConcatenateLayerKernel()
+CpuConcatenateWidthKernel::CpuConcatenateWidthKernel()
: _width_offset(0)
{
}
-void NEWidthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output)
+void CpuConcatenateWidthKernel::configure(const ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, width_offset, dst));
_width_offset = width_offset;
// Configure kernel window
- Window win = calculate_max_window(*input, Steps());
+ Window win = calculate_max_window(*src, Steps());
Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+ coord.set_num_dimensions(dst->num_dimensions());
+ dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
- INEKernel::configure(win);
+ ICpuKernel::configure(win);
}
-Status NEWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
+Status CpuConcatenateWidthKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, width_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, width_offset, dst));
return Status{};
}
-void NEWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuConcatenateWidthKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
// Offset output pointer to the correct position
- uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0];
+ uint8_t *dst_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0];
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size());
@@ -106,54 +110,54 @@ void NEWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
win.set(Window::DimX, Window::Dimension(0, 1, 1));
// Create iterators
- Iterator input(src, win);
- Iterator output(dst, win);
- const DataType dt = src->info()->data_type();
- const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform();
- const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform();
- if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
+ Iterator src_it(src, win);
+ Iterator dst_it(dst, win);
+ const DataType dt = src->info()->data_type();
+ const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo &dst_qinfo = dst->info()->quantization_info().uniform();
+ if(dt == DataType::QASYMM8 && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- vst1q_u8(output_ptr + output.offset() + x, vquantize(vdequantize(vld1q_u8(input.ptr() + x), input_qinfo), output_qinfo));
+ vst1q_u8(dst_ptr + dst_it.offset() + x, vquantize(vdequantize(vld1q_u8(src_it.ptr() + x), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + output.offset() + x) = quantize_qasymm8(dequantize_qasymm8(*(input.ptr() + x), input_qinfo), output_qinfo);
+ *(dst_ptr + dst_it.offset() + x) = quantize_qasymm8(dequantize_qasymm8(*(src_it.ptr() + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
- else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+ else if(dt == DataType::QASYMM8_SIGNED && src_qinfo != dst_qinfo)
{
execute_window_loop(win, [&](const Coordinates &)
{
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- vst1q_s8(reinterpret_cast<int8_t *>(output_ptr + output.offset() + x),
- vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(input.ptr() + x)), input_qinfo), output_qinfo));
+ vst1q_s8(reinterpret_cast<int8_t *>(dst_ptr + dst_it.offset() + x),
+ vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(src_it.ptr() + x)), src_qinfo), dst_qinfo));
}
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + output.offset() + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(input.ptr() + x), input_qinfo), output_qinfo);
+ *(dst_ptr + dst_it.offset() + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(src_it.ptr() + x), src_qinfo), dst_qinfo);
}
},
- input, output);
+ src_it, dst_it);
}
else
{
execute_window_loop(win, [&](const Coordinates &)
{
- const auto in_ptr = input.ptr();
- const auto out_ptr = output_ptr + output.offset();
+ const auto in_ptr = src_it.ptr();
+ const auto out_ptr = dst_ptr + dst_it.offset();
int x = window_start_x;
for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
@@ -166,7 +170,14 @@ void NEWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
*(out_ptr + x) = *(in_ptr + x);
}
},
- input, output);
+ src_it, dst_it);
}
}
+
+const char *CpuConcatenateWidthKernel::name() const
+{
+ return "CpuConcatenateWidthKernel";
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuConcatenateWidthKernel.h b/src/core/cpu/kernels/CpuConcatenateWidthKernel.h
new file mode 100644
index 0000000000..afdc3ccddd
--- /dev/null
+++ b/src/core/cpu/kernels/CpuConcatenateWidthKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CPU_CONCATENATEWIDTH_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATEWIDTH_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel.
+ * The source tensor will be concatenated into the destination tensor.
+ */
+class CpuConcatenateWidthKernel : public ICPPKernel
+{
+public:
+ CpuConcatenateWidthKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateWidthKernel);
+ /** Configure kernel for a given list of arguments
+ *
+ * @param[in] src Source tensor info. Data types supported: All
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in,out] dst Destination tensor info. Data types supported: Same as @p src.
+ */
+ void configure(const ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateWidthKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+ const char *name() const override;
+
+private:
+ unsigned int _width_offset;
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_CONCATENATEWIDTH_KERNEL_H */
diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
index 782f8f1ff7..dcc5cd3a64 100644
--- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,7 @@
*/
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
-#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
-#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
-#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
+#include "src/runtime/cpu/operators/CpuConcatenate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -39,156 +36,22 @@
namespace arm_compute
{
-namespace experimental
-{
-NEConcatenation::NEConcatenation()
- : _concat_kernels(), _num_inputs(0), _axis(0)
-{
-}
-
-void NEConcatenation::configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis)
-{
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- _axis = axis;
- _num_inputs = inputs_vector.size();
-
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(NEConcatenateLayer::validate(inputs_vector, output, axis));
-
- unsigned int offset = 0;
-
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- switch(axis)
- {
- case Window::DimX:
- {
- auto kernel = std::make_unique<NEWidthConcatenateLayerKernel>();
- kernel->configure(inputs_vector.at(i), offset, output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- case Window::DimY:
- {
- auto kernel = std::make_unique<NEHeightConcatenateLayerKernel>();
- kernel->configure(inputs_vector.at(i), offset, output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- case Window::DimZ:
- {
- auto kernel = std::make_unique<NEDepthConcatenateLayerKernel>();
- kernel->configure(inputs_vector.at(i), offset, output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- case 3:
- {
- auto kernel = std::make_unique<NEBatchConcatenateLayerKernel>();
- kernel->configure(inputs_vector.at(i), offset, output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
- offset += inputs_vector.at(i)->dimension(axis);
- }
-}
-
-Status NEConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
-
- unsigned int offset = 0;
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- switch(axis)
- {
- case Window::DimX:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayerKernel::validate(input, offset, output));
- break;
- }
- case Window::DimY:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEHeightConcatenateLayerKernel::validate(input, offset, output));
- break;
- }
- case Window::DimZ:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEDepthConcatenateLayerKernel::validate(input, offset, output));
- break;
- }
- case 3:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEBatchConcatenateLayerKernel::validate(input, offset, output));
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
- offset += input->dimension(axis);
- }
-
- if(output->total_size() != 0)
- {
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis);
- ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
- }
-
- return Status{};
-}
-
-void NEConcatenation::run(ITensorPack &tensors)
-{
- if(tensors.empty())
- {
- ARM_COMPUTE_ERROR("No inputs provided");
- }
-
- if(static_cast<int>(tensors.size() - 1) != static_cast<int>(_num_inputs))
- {
- ARM_COMPUTE_ERROR("Configured with different number of inputs");
- }
-
- int i = 0;
- for(auto &k : _concat_kernels)
- {
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
- pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
- NEScheduler::get().schedule_op(k.get(), Window::DimY, pack);
- ++i;
- }
-}
-} // namespace experimental
-
struct NEConcatenateLayer::Impl
{
- std::vector<const ITensor *> srcs{};
- ITensor *dst{ nullptr };
- unsigned int num_inputs{ 0 };
- unsigned int axis{ 0 };
- std::unique_ptr<experimental::NEConcatenation> op{ nullptr };
+ std::vector<const ITensor *> srcs{};
+ ITensor *dst{ nullptr };
+ unsigned int num_inputs{ 0 };
+ unsigned int axis{ 0 };
+ std::unique_ptr<cpu::CpuConcatenate> op{ nullptr };
};
NEConcatenateLayer::NEConcatenateLayer()
: _impl(std::make_unique<Impl>())
{
}
-
NEConcatenateLayer::NEConcatenateLayer(NEConcatenateLayer &&) = default;
-
NEConcatenateLayer &NEConcatenateLayer::operator=(NEConcatenateLayer &&) = default;
-
-NEConcatenateLayer::~NEConcatenateLayer() = default;
+NEConcatenateLayer::~NEConcatenateLayer() = default;
void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis)
{
@@ -198,7 +61,7 @@ void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, I
_impl->dst = output;
_impl->axis = axis;
_impl->num_inputs = inputs_vector.size();
- _impl->op = std::make_unique<experimental::NEConcatenation>();
+ _impl->op = std::make_unique<cpu::CpuConcatenate>();
std::vector<const ITensorInfo *> inputs_vector_info;
for(unsigned int i = 0; i < inputs_vector.size(); ++i)
@@ -211,7 +74,7 @@ void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, I
Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
{
- return experimental::NEConcatenation::validate(inputs_vector, output, axis);
+ return cpu::CpuConcatenate::validate(inputs_vector, output, axis);
}
void NEConcatenateLayer::run()
diff --git a/src/runtime/cpu/operators/CpuConcatenate.cpp b/src/runtime/cpu/operators/CpuConcatenate.cpp
new file mode 100644
index 0000000000..2094e65034
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuConcatenate.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/cpu/operators/CpuConcatenate.h"
+
+#include "src/core/cpu/kernels/CpuConcatenateBatchKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateDepthKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateHeightKernel.h"
+#include "src/core/cpu/kernels/CpuConcatenateWidthKernel.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+CpuConcatenate::CpuConcatenate()
+ : _concat_kernels(), _num_srcs(0), _axis(0)
+{
+}
+
+void CpuConcatenate::configure(const std::vector<const ITensorInfo *> &srcs_vector, ITensorInfo *dst, size_t axis)
+{
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+
+ _axis = axis;
+ _num_srcs = srcs_vector.size();
+
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(srcs_vector, axis);
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*dst, dst_shape, 1, srcs_vector[0]->data_type());
+ ARM_COMPUTE_ERROR_THROW_ON(CpuConcatenate::validate(srcs_vector, dst, axis));
+
+ unsigned int offset = 0;
+
+ for(unsigned int i = 0; i < _num_srcs; ++i)
+ {
+ switch(axis)
+ {
+ case Window::DimX:
+ {
+ auto kernel = std::make_unique<kernels::CpuConcatenateWidthKernel>();
+ kernel->configure(srcs_vector.at(i), offset, dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ case Window::DimY:
+ {
+ auto kernel = std::make_unique<kernels::CpuConcatenateHeightKernel>();
+ kernel->configure(srcs_vector.at(i), offset, dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ case Window::DimZ:
+ {
+ auto kernel = std::make_unique<kernels::CpuConcatenateDepthKernel>();
+ kernel->configure(srcs_vector.at(i), offset, dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ case 3:
+ {
+ auto kernel = std::make_unique<kernels::CpuConcatenateBatchKernel>();
+ kernel->configure(srcs_vector.at(i), offset, dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+ offset += srcs_vector.at(i)->dimension(axis);
+ }
+}
+
+Status CpuConcatenate::validate(const std::vector<const ITensorInfo *> &srcs_vector, const ITensorInfo *dst, size_t axis)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(srcs_vector.size() < 2);
+
+ unsigned int offset = 0;
+ for(const auto &src : srcs_vector)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+ switch(axis)
+ {
+ case Window::DimX:
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuConcatenateWidthKernel::validate(src, offset, dst));
+ break;
+ }
+ case Window::DimY:
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuConcatenateHeightKernel::validate(src, offset, dst));
+ break;
+ }
+ case Window::DimZ:
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuConcatenateDepthKernel::validate(src, offset, dst));
+ break;
+ }
+ case 3:
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuConcatenateBatchKernel::validate(src, offset, dst));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+ offset += src->dimension(axis);
+ }
+
+ if(dst->total_size() != 0)
+ {
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(srcs_vector, axis);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
+ }
+
+ return Status{};
+}
+
+void CpuConcatenate::run(ITensorPack &tensors)
+{
+ if(tensors.empty())
+ {
+ ARM_COMPUTE_ERROR("No inputs provided");
+ }
+
+ if(static_cast<int>(tensors.size() - 1) != static_cast<int>(_num_srcs))
+ {
+ ARM_COMPUTE_ERROR("Configured with different number of inputs");
+ }
+
+ int i = 0;
+ for(auto &k : _concat_kernels)
+ {
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
+ pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
+ NEScheduler::get().schedule_op(k.get(), Window::DimY, pack);
+ ++i;
+ }
+}
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuConcatenate.h b/src/runtime/cpu/operators/CpuConcatenate.h
new file mode 100644
index 0000000000..376534275f
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuConcatenate.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_CONCATENATE_H
+#define ARM_COMPUTE_CPU_CONCATENATE_H
+
+#include "src/core/cpu/ICpuKernel.h"
+#include "src/runtime/cpu/ICpuOperator.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
+ *
+ * -# @ref CpuConcatenateWidthKernel (if underlying concatenation axis is 0).
+ * -# @ref CpuConcatenateHeightKernel (if underlying concatenation axis is 1).
+ * -# @ref CpuConcatenateDepthKernel (if underlying concatenation axis is 2).
+ * -# @ref CpuConcatenateBatchKernel (if underlying concatenation axis is 3).
+ */
+class CpuConcatenate : public ICpuOperator
+{
+public:
+ /** Constructor */
+ CpuConcatenate();
+ /** Configure operator for a given list of arguments
+ *
+ * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+ * @note Preconditions can be found respectively at @ref CpuConcatenateWidthKernel, @ref CpuConcatenateHeightKernel, @ref CpuConcatenateDepthKernel and @ref CpuConcatenateBatchKernel.
+ *
+ * @param[in,out] srcs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] dst Output tensor. Data types supported: Same as @p srcs_vector.
+ * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+ */
+ void configure(const std::vector<const ITensorInfo *> &srcs_vector, ITensorInfo *dst, size_t axis);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
+ *
+ * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+ * @note Preconditions can be found respectively at @ref CpuConcatenateWidthKernel, @ref CpuConcatenateHeightKernel, @ref CpuConcatenateDepthKernel and @ref CpuConcatenateBatchKernel.
+ *
+ * @param[in] srcs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p srcs_vector.
+ * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+ *
+ * @return a status
+ */
+ static Status validate(const std::vector<const ITensorInfo *> &srcs_vector, const ITensorInfo *dst, size_t axis);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+
+private:
+ std::vector<std::unique_ptr<ICpuKernel>> _concat_kernels;
+ unsigned int _num_srcs;
+ unsigned int _axis;
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_CONCATENATE_H */