aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-05-17 18:14:40 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-05-21 11:28:01 +0000
commit09f24975437e2e141ba51a07055a9372b0d173a2 (patch)
treefe565e4b9abd379cb1f467e5d9e36d68fcfbacef
parentf24411ffc842970609a1fb6ba2f9527cfb681dbd (diff)
downloadComputeLibrary-09f24975437e2e141ba51a07055a9372b0d173a2.tar.gz
COMPMID-2109: Remove CL/NE Width/Depth ConcatenateLayer functions.
Change-Id: Icbda771abffbb45d4ed0958933c60ff9ace01314 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1178 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLConcatenateLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h95
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h88
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/GCFunctions.h1
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h68
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEConcatenateLayer.h22
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h93
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h90
-rw-r--r--docs/00_introduction.dox22
-rw-r--r--docs/05_functions_list.dox5
-rw-r--r--src/graph/backends/GLES/GCFunctionsFactory.cpp41
-rw-r--r--src/runtime/CL/functions/CLConcatenateLayer.cpp6
-rw-r--r--src/runtime/CL/functions/CLDepthConcatenateLayer.cpp107
-rw-r--r--src/runtime/CL/functions/CLLSTMLayer.cpp4
-rw-r--r--src/runtime/CL/functions/CLWidthConcatenateLayer.cpp143
-rwxr-xr-xsrc/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp75
-rw-r--r--src/runtime/NEON/functions/NEConcatenateLayer.cpp31
-rw-r--r--src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp108
-rw-r--r--src/runtime/NEON/functions/NELSTMLayer.cpp27
-rw-r--r--src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp117
-rw-r--r--tests/benchmark/CL/DepthConcatenateLayer.cpp6
-rw-r--r--tests/benchmark/NEON/DepthConcatenateLayer.cpp4
-rw-r--r--tests/benchmark/fixtures/DepthConcatenateLayerFixture.h2
27 files changed, 96 insertions, 1089 deletions
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index e314f44370..fbaab35414 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -53,7 +53,6 @@
#include "arm_compute/runtime/CL/functions/CLCropResize.h"
#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h"
@@ -143,7 +142,6 @@
#include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h"
#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
-#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h"
#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
#include "arm_compute/runtime/CL/functions/CLYOLOLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index d85a4453d8..c56fc117b9 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -26,7 +26,7 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
#include <memory>
@@ -41,9 +41,9 @@ class Status;
/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
*
- * -# @ref CLWidthConcatenateLayer (if underlying concatenation axis is 0).
+ * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
* -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref CLDepthConcatenateLayer (if underlying concatenation axis is 2).
+ * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
*/
class CLConcatenateLayer : public IFunction
{
@@ -53,7 +53,7 @@ public:
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayer, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayer.
+ * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
*
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
@@ -63,7 +63,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayer, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayer.
+ * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
*
* @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
deleted file mode 100644
index 9ef21f32d7..0000000000
--- a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
-#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
- *
- * @deprecated This function is deprecated and will be removed in release 19.08
- *
- * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
- * -# @ref CLDepthConcatenateLayerKernel
- *
- */
-class CLDepthConcatenateLayer : public IFunction
-{
-public:
- /** Default constructor */
- CLDepthConcatenateLayer();
- /** Initialise the kernel's inputs vector and output.
- *
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
- * Input dimensions might differ for each input for the first three dimensions (width, height, depth)
- * and must match for the rest.
- * Note that the difference between the minimum and maximum width and height among the input tensors
- * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and
- * height when they are less than the maximum input sizes.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions match the inputs' ones from the fourth dimension and above,
- * while width and height are the maximum width and height of the input tensors.
- * Finally, depth is the sum of the input depths.
- */
- void configure(const std::vector<ICLTensor *> &inputs_vector, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayer
- *
- * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
- * Input dimensions might differ for each input for the first three dimensions (width, height, depth)
- * and must match for the rest.
- * Note that the difference between the minimum and maximum width and height among the input tensors
- * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and
- * height when they are less than the maximum input sizes.
- * @param[in] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions match the inputs' ones from the fourth dimension and above,
- * while width and height are the maximum width and height of the input tensors.
- * Finally, depth is the sum of the input depths.
- *
- * @return a status
- */
- static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::vector<CLDepthConcatenateLayerKernel> _concat_kernels_vector;
- std::vector<CLFillBorderKernel> _border_handlers_vector;
- unsigned int _num_inputs;
-};
-}
-#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index 8bd47cbf8e..3add152878 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -35,10 +35,10 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/common/LSTMParams.h"
@@ -184,7 +184,7 @@ private:
CLActivationLayerKernel _projection_clip;
CLCopyKernel _copy_cell_state;
CLCopyKernel _copy_output;
- CLWidthConcatenateLayer _concat_scratch_buffer;
+ CLConcatenateLayer _concat_scratch_buffer;
CLWidthConcatenate2TensorsKernel _concat_inputs_forget_gate;
CLWidthConcatenate2TensorsKernel _concat_weights_forget_gate;
CLWidthConcatenate2TensorsKernel _concat_weights_input_gate;
diff --git a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h
deleted file mode 100644
index 6a30fcfa92..0000000000
--- a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLWIDTHCONCATENATELAYER_H__
-#define __ARM_COMPUTE_CLWIDTHCONCATENATELAYER_H__
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute concatenate tensors along x axis. This function calls the following kernel:
- *
- * @deprecated This function is deprecated and will be removed in release 19.08
- *
- * -# @ref CLWidthConcatenateLayerKernel
- * -# @ref CLWidthConcatenate2TensorsKernel (if there are exactly 2 input tensors)
- * -# @ref CLWidthConcatenate4TensorsKernel (if there are exactly 4 input tensors)
- *
- */
-class CLWidthConcatenateLayer : public IFunction
-{
-public:
- /** Default constructor */
- CLWidthConcatenateLayer();
- /** Initialise the kernel's inputs vector and output.
- *
- * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
- * Dimensions of all the inputs should match apart for the width which can differ.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions are the same with the inputs from the second dimension and above.
- * The first dimension (width) is the sum of the input tensors' widths.
- */
- void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
- *
- * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
- * Dimensions of all the inputs should match apart for the width which can differ.
- * @param[in] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions are the same with the inputs from the second dimension and above.
- * The first dimension (width) is the sum of the input tensors' widths.
- *
- * @return a status
- */
- static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::vector<CLWidthConcatenateLayerKernel> _concat_kernels_vector;
- CLWidthConcatenate2TensorsKernel _concat_x2_kernel;
- CLWidthConcatenate4TensorsKernel _concat_x4_kernel;
- unsigned int _num_inputs;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLWIDTHCONCATENATELAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
index 7e01480801..67275303c9 100644
--- a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
+++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
@@ -31,7 +31,6 @@
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h"
-#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h
deleted file mode 100644
index da00f387e9..0000000000
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
-#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
-
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
- *
- * @deprecated This function is deprecated and will be removed in release 19.08
- * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
- * -# @ref GCDepthConcatenateLayerKernel
- *
- */
-class GCDepthConcatenateLayer : public IFunction
-{
-public:
- /** Default constructor */
- GCDepthConcatenateLayer();
- /** Initialise the kernel's inputs vector and output.
- *
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::vector<std::unique_ptr<GCDepthConcatenateLayerKernel>> _concat_kernels_vector;
- std::vector<std::unique_ptr<GCFillBorderKernel>> _border_handlers_vector;
- unsigned int _num_inputs;
-};
-}
-#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index d84422f882..0d94ea78fc 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -51,7 +51,6 @@
#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NECropResize.h"
#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h"
@@ -142,7 +141,6 @@
#include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h"
#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h"
#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h"
-#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index f8cda326d2..8c97efc4f0 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -26,8 +26,9 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/Requires.h"
#include <memory>
#include <vector>
@@ -41,9 +42,9 @@ class Status;
/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
*
- * -# @ref NEWidthConcatenateLayer (if underlying concatenation axis is 0).
+ * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
* -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref NEDepthConcatenateLayer (if underlying concatenation axis is 2).
+ * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
*/
class NEConcatenateLayer : public IFunction
{
@@ -53,17 +54,18 @@ public:
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayer.
+ * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
*
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
* @param[out] output Output tensor. Data types supported: Same as @p input.
* @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2.
*/
- void configure(const std::vector<ITensor *> &inputs_vector, ITensor *output, size_t axis);
+ void configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis);
+ void configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis);
/** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayer.
+ * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
*
* @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
@@ -72,11 +74,19 @@ public:
* @return a status
*/
static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
+ static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
// Inherited methods overridden:
void run() override;
private:
+ template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)>
+ void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis);
+
+ template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)>
+ static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);
+
+private:
std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
unsigned int _num_inputs;
unsigned int _axis;
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
deleted file mode 100644
index b3bf752b40..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
-#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
- *
- * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
- * -# @ref NEDepthConcatenateLayerKernel
- *
- * @deprecated This function is deprecated and will be removed in release 19.08
- *
- */
-class NEDepthConcatenateLayer : public IFunction
-{
-public:
- /** Default constructor */
- NEDepthConcatenateLayer();
- /** Initialise the kernel's inputs vector and output.
- *
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
- * Input dimensions might differ for each input for the first three dimensions (width, height, depth)
- * and must match for the rest.
- * Note that the difference between the minimum and maximum width and height among the input tensors
- * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and
- * height when they are less than the maximum input sizes.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions match the inputs' ones from the fourth dimension and above,
- * while width and height are the maximum width and height of the input tensors.
- * Finally, depth is the sum of the input depths.
- */
- void configure(const std::vector<ITensor *> &inputs_vector, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayer
- *
- * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
- * Input dimensions might differ for each input for the first three dimensions (width, height, depth)
- * and must match for the rest.
- * Note that the difference between the minimum and maximum width and height among the input tensors
- * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and
- * height when they are less than the maximum input sizes.
- * @param[in] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions match the inputs' ones from the fourth dimension and above,
- * while width and height are the maximum width and height of the input tensors.
- * Finally, depth is the sum of the input depths.
- *
- * @return a status
- */
- static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::vector<ITensor *> _inputs_vector;
- std::vector<std::unique_ptr<NEDepthConcatenateLayerKernel>> _concat_kernels_vector;
- std::vector<std::unique_ptr<NEFillBorderKernel>> _border_handlers_vector;
- unsigned int _num_inputs;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index f3a1aa7c75..cf0f06c215 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -32,9 +32,9 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h"
#include "arm_compute/runtime/common/LSTMParams.h"
namespace arm_compute
@@ -176,11 +176,11 @@ private:
NEActivationLayerKernel _projection_clip;
NECopyKernel _copy_cell_state;
NECopyKernel _copy_output;
- NEWidthConcatenateLayer _concat_scratch_buffer;
- NEWidthConcatenateLayer _concat_inputs_forget_gate;
- NEWidthConcatenateLayer _concat_weights_forget_gate;
- NEWidthConcatenateLayer _concat_weights_input_gate;
- NEWidthConcatenateLayer _concat_weights_output;
+ NEConcatenateLayer _concat_scratch_buffer;
+ NEConcatenateLayer _concat_inputs_forget_gate;
+ NEConcatenateLayer _concat_weights_forget_gate;
+ NEConcatenateLayer _concat_weights_input_gate;
+ NEConcatenateLayer _concat_weights_output;
Tensor _input_gate_out1;
Tensor _input_gate_out2;
Tensor _input_gate_out3;
diff --git a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
deleted file mode 100644
index 8d221766cd..0000000000
--- a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__
-#define __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
-
-#include "arm_compute/core/utils/misc/Requires.h"
-
-#include <memory>
-#include <type_traits>
-#include <vector>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to execute concatenate tensors along x axis. This function calls the following kernel:
- *
- * -# @ref NEWidthConcatenateLayerKernel
- *
- * @deprecated This function is deprecated and will be removed in release 19.08
- */
-class NEWidthConcatenateLayer : public IFunction
-{
-public:
- /** Default constructor */
- NEWidthConcatenateLayer();
- /** Initialise the kernel's inputs vector and output.
- *
- * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * Dimensions of all the inputs should match apart for the width which can differ.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions are the same with the inputs from the second dimension and above.
- * The first dimension (width) is the sum of the input tensors' widths.
- */
- void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
- void configure(std::vector<const ITensor *> inputs_vector, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayer
- *
- * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * Dimensions of all the inputs should match apart for the width which can differ.
- * @param[in] output Output tensor. Data types supported: Same as @p input.
- * Output tensor dimensions are the same with the inputs from the second dimension and above.
- * The first dimension (width) is the sum of the input tensors' widths.
- *
- * @return a status
- */
- static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output);
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::vector<NEWidthConcatenateLayerKernel> _concat_kernels_vector;
- unsigned int _num_inputs;
- template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)>
- void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output);
- template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)>
- static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__ */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 03e889d14a..caf7ee77bc 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -267,11 +267,11 @@ v19.05 Public major release
- New OpenGLES kernels / functions:
- @ref GCConcatenateLayer
- Deprecated functions/interfaces
- - @ref GCDepthConcatenateLayer
- - @ref NEWidthConcatenateLayer
- - @ref NEDepthConcatenateLayer
- - @ref CLWidthConcatenateLayer
- - @ref CLDepthConcatenateLayer
+ - GCDepthConcatenateLayer
+ - NEWidthConcatenateLayer
+ - NEDepthConcatenateLayer
+ - CLWidthConcatenateLayer
+ - CLDepthConcatenateLayer
- CLGEMMInterleave4x4
- CLGEMMTranspose1xW
- Support different quantization info in CLConcatLayer.
@@ -424,7 +424,7 @@ v18.11 Public major release
- Added documentation for add a new function or kernel.
- Improved doxygen documentation adding a list of the existing functions.
- Add 4D tensors support to
- - @ref CLWidthConcatenateLayer
+ - CLWidthConcatenateLayer
- @ref CLFlattenLayer
- @ref CLSoftmaxLayer
- Add dot product support for @ref CLDepthwiseConvolutionLayer3x3NHWCKernel non-unit stride
@@ -453,7 +453,7 @@ v18.08 Public major release
- Removed support for QS8/QS16 data types.
- Added support for grouped convolution in @ref CLConvolutionLayer.
- Added NHWC data layout support to:
- - @ref NEDepthConcatenateLayer / @ref CLDepthConcatenateLayer
+ - NEDepthConcatenateLayer / CLDepthConcatenateLayer
- @ref NEWinogradConvolutionLayer / @ref CLWinogradConvolutionLayer
- @ref CLDepthwiseConvolutionLayer
- @ref CLDirectConvolutionLayer
@@ -496,7 +496,7 @@ v18.05 Public major release
- @ref CLCopy / @ref CLCopyKernel
- @ref CLLSTMLayer
- @ref CLRNNLayer
- - @ref CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel
+ - CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel
- @ref CLWinogradFilterTransformKernel / @ref CLWinogradInputTransformKernel / @ref CLWinogradConvolutionLayer
- @ref CLWinogradInputTransformKernel / @ref CLWinogradInputTransform
- New Neon kernels / functions:
@@ -619,7 +619,7 @@ v17.12 Public major release
- @ref GCActivationLayerKernel / @ref GCActivationLayer
- @ref GCBatchNormalizationLayerKernel / @ref GCBatchNormalizationLayer
- @ref GCCol2ImKernel
- - @ref GCDepthConcatenateLayerKernel / @ref GCDepthConcatenateLayer
+ - @ref GCDepthConcatenateLayerKernel / GCDepthConcatenateLayer
- @ref GCDirectConvolutionLayerKernel / @ref GCDirectConvolutionLayer
- @ref GCDropoutLayerKernel / @ref GCDropoutLayer
- @ref GCFillBorderKernel / @ref GCFillBorder
@@ -707,7 +707,7 @@ v17.06 Public major release
- User can specify his own scheduler by implementing the @ref IScheduler interface.
- New OpenCL kernels / functions:
- @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer
- - @ref CLDepthConcatenateLayerKernel / @ref CLDepthConcatenateLayer
+ - @ref CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer
- @ref CLHOGOrientationBinningKernel @ref CLHOGBlockNormalizationKernel, @ref CLHOGDetectorKernel / @ref CLHOGDescriptor @ref CLHOGDetector @ref CLHOGGradient @ref CLHOGMultiDetection
- @ref CLLocallyConnectedMatrixMultiplyKernel / @ref CLLocallyConnectedLayer
- @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights
@@ -715,7 +715,7 @@ v17.06 Public major release
- @ref CPPDetectionWindowNonMaximaSuppressionKernel
- New NEON kernels / functions:
- @ref NEBatchNormalizationLayerKernel / @ref NEBatchNormalizationLayer
- - @ref NEDepthConcatenateLayerKernel / @ref NEDepthConcatenateLayer
+ - @ref NEDepthConcatenateLayerKernel / NEDepthConcatenateLayer
- @ref NEDirectConvolutionLayerKernel / @ref NEDirectConvolutionLayer
- @ref NELocallyConnectedMatrixMultiplyKernel / @ref NELocallyConnectedLayer
- @ref NEWeightsReshapeKernel / @ref NEConvolutionLayerReshapeWeights
diff --git a/docs/05_functions_list.dox b/docs/05_functions_list.dox
index 9a5c8c0027..999b573674 100644
--- a/docs/05_functions_list.dox
+++ b/docs/05_functions_list.dox
@@ -112,7 +112,6 @@ namespace arm_compute
- @ref NEConvolutionSquare &lt;matrix_size&gt;
- @ref NECropResize
- @ref NEDeconvolutionLayer
- - @ref NEDepthConcatenateLayer
- @ref NEDepthwiseConvolutionAssemblyDispatch
- @ref NEDepthwiseConvolutionLayer
- @ref NEDepthwiseConvolutionLayer3x3
@@ -171,7 +170,6 @@ namespace arm_compute
- @ref NEStackLayer
- @ref NEUnstack
- @ref NEUpsampleLayer
- - @ref NEWidthConcatenateLayer
- @ref NEWinogradConvolutionLayer
@section S5_2 OpenCL functions
@@ -188,7 +186,6 @@ namespace arm_compute
- @ref CLCropResize
- @ref CLDeconvolutionLayer
- @ref CLDeconvolutionLayerUpsample
- - @ref CLDepthConcatenateLayer
- @ref CLDepthwiseConvolutionLayer
- @ref CLDepthwiseConvolutionLayer3x3
- @ref CLDepthwiseSeparableConvolutionLayer
@@ -241,7 +238,6 @@ namespace arm_compute
- @ref CLStackLayer
- @ref CLUnstack
- @ref CLUpsampleLayer
- - @ref CLWidthConcatenateLayer
- @ref CLWinogradConvolutionLayer
- @ref ICLSimpleFunction
- @ref CLAbsoluteDifference
@@ -327,7 +323,6 @@ namespace arm_compute
- @ref GCConcatenateLayer
- @ref GCConvolutionLayer
- @ref GCConvolutionLayerReshapeWeights
- - @ref GCDepthConcatenateLayer
- @ref GCDepthwiseConvolutionLayer3x3
- @ref GCDirectConvolutionLayer
- @ref GCDropoutLayer
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 0de58f5c28..13543dbf15 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -68,43 +68,6 @@ struct GCEltwiseFunctions
namespace detail
{
-// Specialize functions
-template <>
-std::unique_ptr<IFunction> create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(ConcatenateLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Return nullptr if depth concatenate is switched off
- if(!node.is_enabled())
- {
- return nullptr;
- }
-
- // Extract IO and info
- std::vector<GCTargetInfo::TensorType *> inputs;
- for(unsigned int i = 0; i < node.num_inputs(); ++i)
- {
- inputs.push_back(get_backing_tensor<GCTargetInfo>(node.input(i)));
- }
- typename GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>();
- func->configure(inputs, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
- << node.name()
- << " Target " << GCTargetInfo::TargetType
- << " Data Type: " << output->info()->data_type()
- << " Shape: " << output->info()->tensor_shape()
- << " Num Inputs: " << inputs.size()
- << std::endl);
-
- return std::move(func);
-}
-
template <>
std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(ConvolutionLayerNode &node, GraphContext &ctx)
{
@@ -282,7 +245,7 @@ std::unique_ptr<IFunction> GCFunctionFactory::create(INode *node, GraphContext &
case NodeType::ConvolutionLayer:
return detail::create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
case NodeType::ConcatenateLayer:
- return detail::create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
+ return detail::create_concatenate_layer<GCConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
return detail::create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index b8224d2cce..0594a17a7a 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -23,11 +23,13 @@
*/
#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
-#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
diff --git a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
deleted file mode 100644
index f687e54552..0000000000
--- a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLDepthConcatenateLayer::CLDepthConcatenateLayer() // NOLINT
- : _concat_kernels_vector(),
- _border_handlers_vector(),
- _num_inputs(0)
-{
-}
-
-void CLDepthConcatenateLayer::configure(const std::vector<ICLTensor *> &inputs_vector, ICLTensor *output) // NOLINT
-{
- _num_inputs = inputs_vector.size();
-
- std::vector<ITensorInfo *> inputs_vector_info;
- for(unsigned int i = 0; i < _num_inputs; i++)
- {
- inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
- }
-
- _concat_kernels_vector.resize(_num_inputs);
- _border_handlers_vector.resize(_num_inputs);
-
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector_info, Window::DimZ);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(CLDepthConcatenateLayer::validate(inputs_vector_info, output->info()));
-
- unsigned int depth_offset = 0;
- for(unsigned int i = 0; i < _num_inputs; i++)
- {
- _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
- _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue());
-
- depth_offset += inputs_vector.at(i)->info()->dimension(2);
- }
-
- // Set valid region from shape
- output->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
-}
-
-Status CLDepthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
-
- // Output auto inizialitation if not yet initialized
- TensorInfo tmp_output_info = *output->clone();
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimZ);
- auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type());
-
- unsigned int depth_offset = 0;
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, depth_offset, &tmp_output_info));
- depth_offset += input->dimension(2);
- }
-
- return Status{};
-}
-
-void CLDepthConcatenateLayer::run()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- for(unsigned i = 0; i < _num_inputs; i++)
- {
- CLScheduler::get().enqueue(_border_handlers_vector[i], false);
- CLScheduler::get().enqueue(_concat_kernels_vector[i], true);
- }
-}
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp
index 4606a66bf2..85a81a8cd4 100644
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -316,7 +316,7 @@ void CLLSTMLayer::configure(const ICLTensor *input,
scratch_inputs.emplace_back(&_cell_state_out1);
scratch_inputs.emplace_back(forget_gate_out);
scratch_inputs.emplace_back(output_gate_out);
- _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer);
+ _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX);
input_gate_out->allocator()->allocate();
_cell_state_out1.allocator()->allocate();
forget_gate_out->allocator()->allocate();
@@ -497,7 +497,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input,
inputs_vector_info_raw.push_back(&forget_gate);
inputs_vector_info_raw.push_back(&output_gate_tmp);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX));
return Status{};
}
diff --git a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
deleted file mode 100644
index a8667c3138..0000000000
--- a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLWidthConcatenateLayer::CLWidthConcatenateLayer() // NOLINT
- : _concat_kernels_vector(),
- _concat_x2_kernel(),
- _concat_x4_kernel(),
- _num_inputs(0)
-{
-}
-
-Status CLWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) // NOLINT
-{
- const unsigned int num_inputs = inputs_vector.size();
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
-
- // Output auto inizialitation if not yet initialized
- TensorInfo tmp_output_info = *output->clone();
- const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX);
- auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type());
-
- switch(num_inputs)
- {
- case 2:
- // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], &tmp_output_info));
- break;
- case 4:
- // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], &tmp_output_info));
- break;
- default:
- unsigned int width_offset = 0;
- // Validate generic case of WidthConcatenate kernel
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info));
- width_offset += input->dimension(0);
- }
- break;
- }
-
- return Status{};
-}
-
-void CLWidthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output) // NOLINT
-{
- _num_inputs = inputs_vector.size();
-
- std::vector<ITensorInfo *> inputs_vector_info;
- for(unsigned int i = 0; i < _num_inputs; i++)
- {
- inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
- }
- const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
-
- ARM_COMPUTE_ERROR_THROW_ON(CLWidthConcatenateLayer::validate(inputs_vector_info, output->info()));
-
- switch(_num_inputs)
- {
- case 2:
- // Configure WidthConcatenate2Tensors kernel
- _concat_x2_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), output);
- break;
- case 4:
- // Configure WidthConcatenate4Tensors kernel
- _concat_x4_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
- break;
- default:
- // Configure generic case WidthConcatenate kernels
- _concat_kernels_vector.resize(_num_inputs);
-
- unsigned int width_offset = 0;
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output);
- width_offset += inputs_vector.at(i)->info()->dimension(0);
- }
- break;
- }
-}
-
-void CLWidthConcatenateLayer::run()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- switch(_num_inputs)
- {
- case 2:
- CLScheduler::get().enqueue(_concat_x2_kernel, true);
- break;
- case 4:
- CLScheduler::get().enqueue(_concat_x4_kernel, true);
- break;
- default:
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- CLScheduler::get().enqueue(_concat_kernels_vector[i], true);
- }
- break;
- }
-}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp
deleted file mode 100755
index b89aafa2e5..0000000000
--- a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-GCDepthConcatenateLayer::GCDepthConcatenateLayer() //NOLINT
- : _concat_kernels_vector(),
- _border_handlers_vector(),
- _num_inputs(0)
-{
-}
-
-void GCDepthConcatenateLayer::configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output) //NOLINT
-{
- ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
-
- _num_inputs = inputs_vector.size();
-
- unsigned int depth_offset = 0;
-
- _concat_kernels_vector.reserve(_num_inputs);
- _border_handlers_vector.reserve(_num_inputs);
-
- for(unsigned int i = 0; i < _num_inputs; i++)
- {
- auto concat_kernel = support::cpp14::make_unique<GCDepthConcatenateLayerKernel>();
- auto border_kernel = support::cpp14::make_unique<GCFillBorderKernel>();
-
- concat_kernel->configure(inputs_vector.at(i), depth_offset, output);
- border_kernel->configure(inputs_vector.at(i), concat_kernel->border_size(), BorderMode::CONSTANT, PixelValue());
- _concat_kernels_vector.emplace_back(std::move(concat_kernel));
- _border_handlers_vector.emplace_back(std::move(border_kernel));
-
- depth_offset += inputs_vector.at(i)->info()->dimension(2);
- }
-}
-
-void GCDepthConcatenateLayer::run()
-{
- for(unsigned i = 0; i < _num_inputs; i++)
- {
- GCScheduler::get().dispatch(*_border_handlers_vector[i].get(), false);
- GCScheduler::get().memory_barrier();
- GCScheduler::get().dispatch(*_concat_kernels_vector[i].get(), true);
- }
-}
diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
index 71af560fb0..d338493e51 100644
--- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
@@ -23,8 +23,9 @@
*/
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h"
+#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -44,7 +45,28 @@ NEConcatenateLayer::NEConcatenateLayer()
{
}
-void NEConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, ITensor *output, size_t axis)
+void NEConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis)
+{
+ configure_internal(std::move(inputs_vector), output, axis);
+}
+
+void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis)
+{
+ configure_internal(std::move(inputs_vector), output, axis);
+}
+
+Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+{
+ return validate_internal(inputs_vector, output, axis);
+}
+
+Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+{
+ return validate_internal(inputs_vector, output, axis);
+}
+
+template <typename TensorType, typename>
+void NEConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis)
{
ARM_COMPUTE_ERROR_ON(output == nullptr);
_axis = axis;
@@ -97,7 +119,8 @@ void NEConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector,
}
}
-Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
+template <typename TensorInfoType, typename>
+Status NEConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
diff --git a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
deleted file mode 100644
index 8f070a2d7d..0000000000
--- a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-NEDepthConcatenateLayer::NEDepthConcatenateLayer() // NOLINT
- : _inputs_vector(),
- _concat_kernels_vector(),
- _border_handlers_vector(),
- _num_inputs(0)
-{
-}
-
-void NEDepthConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, ITensor *output) // NOLINT
-{
- _num_inputs = inputs_vector.size();
-
- std::vector<ITensorInfo *> inputs_vector_info;
- for(unsigned int i = 0; i < _num_inputs; i++)
- {
- inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
- }
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector_info, Window::DimZ);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(NEDepthConcatenateLayer::validate(inputs_vector_info, output->info()));
-
- unsigned int depth_offset = 0;
- _concat_kernels_vector.reserve(_num_inputs);
- _border_handlers_vector.reserve(_num_inputs);
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto concat_kernel = support::cpp14::make_unique<NEDepthConcatenateLayerKernel>();
- auto border_kernel = support::cpp14::make_unique<NEFillBorderKernel>();
- concat_kernel->configure(inputs_vector.at(i), depth_offset, output);
- border_kernel->configure(inputs_vector.at(i), concat_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
- _border_handlers_vector.emplace_back(std::move(border_kernel));
- _concat_kernels_vector.emplace_back(std::move(concat_kernel));
-
- depth_offset += inputs_vector.at(i)->info()->dimension(2);
- }
-
- // Set valid region from shape
- output->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
-}
-
-Status NEDepthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
-
- // Output auto inizialitation if not yet initialized
- TensorInfo tmp_output_info = *output->clone();
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimZ);
- auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type());
-
- unsigned int depth_offset = 0;
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(NEDepthConcatenateLayerKernel::validate(input, depth_offset, &tmp_output_info));
- depth_offset += input->dimension(2);
- }
-
- return Status{};
-}
-
-void NEDepthConcatenateLayer::run()
-{
- for(unsigned i = 0; i < _num_inputs; ++i)
- {
- NEScheduler::get().schedule(_border_handlers_vector[i].get(), Window::DimX);
- NEScheduler::get().schedule(_concat_kernels_vector[i].get(), Window::DimX);
- }
-}
diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp
index 3d3c6a12fa..42b805794b 100644
--- a/src/runtime/NEON/functions/NELSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayer.cpp
@@ -107,14 +107,14 @@ void NELSTMLayer::configure(const ITensor *input,
inputs_vector.emplace_back(output_state_in);
_memory_group.manage(&_forget_gate_out2);
- _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2);
+ _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2, Window::DimX);
std::vector<const ITensor *> weights_vector;
weights_vector.emplace_back(input_to_forget_weights);
weights_vector.emplace_back(recurrent_to_forget_weights);
- _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6);
+ _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6, Window::DimX);
_memory_group.manage(&_forget_gate_out5);
_fully_connected_forget_gate.configure(&_forget_gate_out2, &_forget_gate_out6, forget_gate_bias, &_forget_gate_out5);
@@ -165,7 +165,7 @@ void NELSTMLayer::configure(const ITensor *input,
lstm_weights.emplace_back(lstm_params.input_to_input_weights());
lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
- _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2);
+ _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2, Window::DimX);
_memory_group.manage(&_input_gate_out1);
_memory_group.manage(&_input_gate_out4);
@@ -234,7 +234,7 @@ void NELSTMLayer::configure(const ITensor *input,
in_out_weights.emplace_back(input_to_output_weights);
in_out_weights.emplace_back(recurrent_to_output_weights);
- _concat_weights_output.configure(in_out_weights, &_output2);
+ _concat_weights_output.configure(in_out_weights, &_output2, Window::DimX);
_memory_group.manage(&_output1);
_memory_group.manage(&_output4);
@@ -308,7 +308,7 @@ void NELSTMLayer::configure(const ITensor *input,
scratch_inputs.emplace_back(&_cell_state_out1);
scratch_inputs.emplace_back(forget_gate_out);
scratch_inputs.emplace_back(output_gate_out);
- _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer);
+ _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX);
input_gate_out->allocator()->allocate();
_cell_state_out1.allocator()->allocate();
forget_gate_out->allocator()->allocate();
@@ -383,8 +383,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
std::vector<const ITensorInfo *> inputs_vector;
inputs_vector.emplace_back(input);
inputs_vector.emplace_back(output_state_in);
- TensorInfo forget_gate_concat;
- ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector, &forget_gate_concat));
+ const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0);
+ TensorInfo forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type());
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX));
// Validate forget gate
ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_forget_weights, forget_gate_bias, &forget_gate));
@@ -409,8 +410,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
std::vector<const ITensorInfo *> lstm_weights;
lstm_weights.emplace_back(lstm_params.input_to_input_weights());
lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
- TensorInfo lstm_gate_concat;
- ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(lstm_weights, &lstm_gate_concat));
+ TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
+ TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type());
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX));
ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), &input_gate));
if(lstm_params.has_peephole_opt())
@@ -445,8 +447,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
std::vector<const ITensorInfo *> in_out_weights;
in_out_weights.emplace_back(input_to_output_weights);
in_out_weights.emplace_back(recurrent_to_output_weights);
- TensorInfo in_out_gate_concat;
- ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(in_out_weights, &in_out_gate_concat));
+ TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
+ TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type());
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX));
ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_output_weights, output_gate_bias, &output_gate_tmp));
@@ -485,7 +488,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input,
inputs_vector_info_raw.push_back(&forget_gate);
inputs_vector_info_raw.push_back(&output_gate_tmp);
- ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX));
return Status{};
}
diff --git a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp
deleted file mode 100644
index 25b5216305..0000000000
--- a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-NEWidthConcatenateLayer::NEWidthConcatenateLayer()
- : _concat_kernels_vector(),
- _num_inputs(0)
-{
-}
-
-template <typename TensorInfoType, typename>
-inline Status NEWidthConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2);
-
- // Output auto inizialitation if not yet initialized
- TensorInfo tmp_output_info = *output->clone();
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX);
- auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type());
-
- unsigned int width_offset = 0;
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info));
- width_offset += input->dimension(0);
- }
-
- return Status{};
-}
-template <typename TensorType, typename>
-inline void NEWidthConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output)
-{
- _num_inputs = inputs_vector.size();
-
- std::vector<ITensorInfo *> inputs_vector_info;
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
- }
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(NEWidthConcatenateLayer::validate(inputs_vector_info, output->info()));
-
- unsigned int width_offset = 0;
-
- _concat_kernels_vector.resize(_num_inputs);
-
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output);
- width_offset += inputs_vector.at(i)->info()->dimension(0);
- }
-}
-
-void NEWidthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, ITensor *output)
-{
- configure_internal(std::move(inputs_vector), output);
-}
-
-void NEWidthConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output)
-{
- configure_internal(std::move(inputs_vector), output);
-}
-
-Status NEWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output)
-{
- return validate_internal(inputs_vector, output);
-}
-
-Status NEWidthConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output)
-{
- return validate_internal(inputs_vector, output);
-}
-
-void NEWidthConcatenateLayer::run()
-{
- for(unsigned i = 0; i < _num_inputs; ++i)
- {
- NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimY);
- }
-}
diff --git a/tests/benchmark/CL/DepthConcatenateLayer.cpp b/tests/benchmark/CL/DepthConcatenateLayer.cpp
index 3a5c457135..9b101d84ed 100644
--- a/tests/benchmark/CL/DepthConcatenateLayer.cpp
+++ b/tests/benchmark/CL/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
#include "tests/CL/CLAccessor.h"
#include "tests/benchmark/fixtures/DepthConcatenateLayerFixture.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -44,7 +44,7 @@ namespace
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
} // namespace
-using CLDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<CLTensor, ICLTensor, CLDepthConcatenateLayer, CLAccessor>;
+using CLDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<CLTensor, ICLTensor, CLConcatenateLayer, CLAccessor>;
TEST_SUITE(CL)
TEST_SUITE(DepthConcatenateLayer)
diff --git a/tests/benchmark/NEON/DepthConcatenateLayer.cpp b/tests/benchmark/NEON/DepthConcatenateLayer.cpp
index b82da24999..1d8b18c2bb 100644
--- a/tests/benchmark/NEON/DepthConcatenateLayer.cpp
+++ b/tests/benchmark/NEON/DepthConcatenateLayer.cpp
@@ -23,7 +23,7 @@
*/
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "tests/NEON/Accessor.h"
@@ -44,7 +44,7 @@ namespace
const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
} // namespace
-using NEDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<Tensor, ITensor, NEDepthConcatenateLayer, Accessor>;
+using NEDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<Tensor, ITensor, NEConcatenateLayer, Accessor>;
TEST_SUITE(NEON)
TEST_SUITE(DepthConcatenateLayer)
diff --git a/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h b/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h
index 541dfb285c..272da38952 100644
--- a/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h
+++ b/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h
@@ -86,7 +86,7 @@ public:
TensorShape dst_shape = misc::shape_calculator::calculate_concatenate_shape(src_ptrs, Window::DimZ);
_dst = create_tensor<TensorType>(dst_shape, data_type, 1);
- _depth_concat.configure(src_ptrs, &_dst);
+ _depth_concat.configure(src_ptrs, &_dst, 2);
for(auto &src : _srcs)
{