diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-05-17 18:14:40 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-05-21 11:28:01 +0000 |
commit | 09f24975437e2e141ba51a07055a9372b0d173a2 (patch) | |
tree | fe565e4b9abd379cb1f467e5d9e36d68fcfbacef | |
parent | f24411ffc842970609a1fb6ba2f9527cfb681dbd (diff) | |
download | ComputeLibrary-09f24975437e2e141ba51a07055a9372b0d173a2.tar.gz |
COMPMID-2109: Remove CL/NE Width/Depth ConcatenateLayer functions.
Change-Id: Icbda771abffbb45d4ed0958933c60ff9ace01314
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1178
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
27 files changed, 96 insertions, 1089 deletions
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index e314f44370..fbaab35414 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -53,7 +53,6 @@ #include "arm_compute/runtime/CL/functions/CLCropResize.h" #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" #include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h" @@ -143,7 +142,6 @@ #include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h" #include "arm_compute/runtime/CL/functions/CLWarpAffine.h" #include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" #include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" #include "arm_compute/runtime/CL/functions/CLYOLOLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index d85a4453d8..c56fc117b9 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -26,7 +26,7 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/Types.h" #include <memory> @@ -41,9 +41,9 @@ class Status; /** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: * - * -# @ref CLWidthConcatenateLayer (if underlying concatenation axis is 0). + * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0). * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref CLDepthConcatenateLayer (if underlying concatenation axis is 2). + * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2). */ class CLConcatenateLayer : public IFunction { @@ -53,7 +53,7 @@ public: /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayer, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayer. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. @@ -63,7 +63,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayer, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayer. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h deleted file mode 100644 index 9ef21f32d7..0000000000 --- a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ -#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ - -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" - -#include <memory> -#include <vector> - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: - * - * @deprecated This function is deprecated and will be removed in release 19.08 - * - * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) - * -# @ref CLDepthConcatenateLayerKernel - * - */ -class CLDepthConcatenateLayer : public IFunction -{ -public: - /** Default constructor */ - CLDepthConcatenateLayer(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * Input dimensions might differ for each input for the first three dimensions (width, height, depth) - * and must match for the rest. - * Note that the difference between the minimum and maximum width and height among the input tensors - * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and - * height when they are less than the maximum input sizes. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions match the inputs' ones from the fourth dimension and above, - * while width and height are the maximum width and height of the input tensors. - * Finally, depth is the sum of the input depths. - */ - void configure(const std::vector<ICLTensor *> &inputs_vector, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayer - * - * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * Input dimensions might differ for each input for the first three dimensions (width, height, depth) - * and must match for the rest. - * Note that the difference between the minimum and maximum width and height among the input tensors - * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and - * height when they are less than the maximum input sizes. - * @param[in] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions match the inputs' ones from the fourth dimension and above, - * while width and height are the maximum width and height of the input tensors. - * Finally, depth is the sum of the input depths. - * - * @return a status - */ - static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector<CLDepthConcatenateLayerKernel> _concat_kernels_vector; - std::vector<CLFillBorderKernel> _border_handlers_vector; - unsigned int _num_inputs; -}; -} -#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 8bd47cbf8e..3add152878 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -35,10 +35,10 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/common/LSTMParams.h" @@ -184,7 +184,7 @@ private: CLActivationLayerKernel _projection_clip; CLCopyKernel _copy_cell_state; CLCopyKernel _copy_output; - CLWidthConcatenateLayer _concat_scratch_buffer; + CLConcatenateLayer _concat_scratch_buffer; CLWidthConcatenate2TensorsKernel _concat_inputs_forget_gate; CLWidthConcatenate2TensorsKernel _concat_weights_forget_gate; CLWidthConcatenate2TensorsKernel _concat_weights_input_gate; diff --git a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h deleted file mode 100644 index 6a30fcfa92..0000000000 --- a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLWIDTHCONCATENATELAYER_H__ -#define __ARM_COMPUTE_CLWIDTHCONCATENATELAYER_H__ - -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - -#include <memory> -#include <vector> - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to execute concatenate tensors along x axis. This function calls the following kernel: - * - * @deprecated This function is deprecated and will be removed in release 19.08 - * - * -# @ref CLWidthConcatenateLayerKernel - * -# @ref CLWidthConcatenate2TensorsKernel (if there are exactly 2 input tensors) - * -# @ref CLWidthConcatenate4TensorsKernel (if there are exactly 4 input tensors) - * - */ -class CLWidthConcatenateLayer : public IFunction -{ -public: - /** Default constructor */ - CLWidthConcatenateLayer(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * Dimensions of all the inputs should match apart for the width which can differ. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions are the same with the inputs from the second dimension and above. - * The first dimension (width) is the sum of the input tensors' widths. - */ - void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel - * - * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * Dimensions of all the inputs should match apart for the width which can differ. - * @param[in] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions are the same with the inputs from the second dimension and above. - * The first dimension (width) is the sum of the input tensors' widths. - * - * @return a status - */ - static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector<CLWidthConcatenateLayerKernel> _concat_kernels_vector; - CLWidthConcatenate2TensorsKernel _concat_x2_kernel; - CLWidthConcatenate4TensorsKernel _concat_x4_kernel; - unsigned int _num_inputs; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLWIDTHCONCATENATELAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h index 7e01480801..67275303c9 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h @@ -31,7 +31,6 @@ #include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h" diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h deleted file mode 100644 index da00f387e9..0000000000 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ -#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ - -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" - -#include <memory> -#include <vector> - -namespace arm_compute -{ -class IGCTensor; - -/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: - * - * @deprecated This function is deprecated and will be removed in release 19.08 - * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) - * -# @ref GCDepthConcatenateLayerKernel - * - */ -class GCDepthConcatenateLayer : public IFunction -{ -public: - /** Default constructor */ - GCDepthConcatenateLayer(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector<std::unique_ptr<GCDepthConcatenateLayerKernel>> _concat_kernels_vector; - std::vector<std::unique_ptr<GCFillBorderKernel>> _border_handlers_vector; - unsigned int _num_inputs; -}; -} -#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index d84422f882..0d94ea78fc 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -51,7 +51,6 @@ #include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NECropResize.h" #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h" @@ -142,7 +141,6 @@ #include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h" #include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" #include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h" diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index f8cda326d2..8c97efc4f0 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -26,8 +26,9 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/Requires.h" #include <memory> #include <vector> @@ -41,9 +42,9 @@ class Status; /** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: * - * -# @ref NEWidthConcatenateLayer (if underlying concatenation axis is 0). + * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref NEDepthConcatenateLayer (if underlying concatenation axis is 2). + * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). */ class NEConcatenateLayer : public IFunction { @@ -53,17 +54,18 @@ public: /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayer. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. */ - void configure(const std::vector<ITensor *> &inputs_vector, ITensor *output, size_t axis); + void configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis); + void configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayer. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. @@ -72,11 +74,19 @@ public: * @return a status */ static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); + static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: void run() override; private: + template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)> + void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis); + + template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)> + static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis); + +private: std::vector<std::unique_ptr<INEKernel>> _concat_kernels; unsigned int _num_inputs; unsigned int _axis; diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h deleted file mode 100644 index b3bf752b40..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ -#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" - -#include <memory> -#include <vector> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: - * - * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) - * -# @ref NEDepthConcatenateLayerKernel - * - * @deprecated This function is deprecated and will be removed in release 19.08 - * - */ -class NEDepthConcatenateLayer : public IFunction -{ -public: - /** Default constructor */ - NEDepthConcatenateLayer(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * Input dimensions might differ for each input for the first three dimensions (width, height, depth) - * and must match for the rest. - * Note that the difference between the minimum and maximum width and height among the input tensors - * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and - * height when they are less than the maximum input sizes. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions match the inputs' ones from the fourth dimension and above, - * while width and height are the maximum width and height of the input tensors. - * Finally, depth is the sum of the input depths. - */ - void configure(const std::vector<ITensor *> &inputs_vector, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayer - * - * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * Input dimensions might differ for each input for the first three dimensions (width, height, depth) - * and must match for the rest. - * Note that the difference between the minimum and maximum width and height among the input tensors - * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and - * height when they are less than the maximum input sizes. - * @param[in] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions match the inputs' ones from the fourth dimension and above, - * while width and height are the maximum width and height of the input tensors. - * Finally, depth is the sum of the input depths. - * - * @return a status - */ - static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector<ITensor *> _inputs_vector; - std::vector<std::unique_ptr<NEDepthConcatenateLayerKernel>> _concat_kernels_vector; - std::vector<std::unique_ptr<NEFillBorderKernel>> _border_handlers_vector; - unsigned int _num_inputs; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index f3a1aa7c75..cf0f06c215 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -32,9 +32,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" #include "arm_compute/runtime/common/LSTMParams.h" namespace arm_compute @@ -176,11 +176,11 @@ private: NEActivationLayerKernel _projection_clip; NECopyKernel _copy_cell_state; NECopyKernel _copy_output; - NEWidthConcatenateLayer _concat_scratch_buffer; - NEWidthConcatenateLayer _concat_inputs_forget_gate; - NEWidthConcatenateLayer _concat_weights_forget_gate; - NEWidthConcatenateLayer _concat_weights_input_gate; - NEWidthConcatenateLayer _concat_weights_output; + NEConcatenateLayer _concat_scratch_buffer; + NEConcatenateLayer _concat_inputs_forget_gate; + NEConcatenateLayer _concat_weights_forget_gate; + NEConcatenateLayer _concat_weights_input_gate; + NEConcatenateLayer _concat_weights_output; Tensor _input_gate_out1; Tensor _input_gate_out2; Tensor _input_gate_out3; diff --git a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h deleted file mode 100644 index 8d221766cd..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__ -#define __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" - -#include "arm_compute/core/utils/misc/Requires.h" - -#include <memory> -#include <type_traits> -#include <vector> - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to execute concatenate tensors along x axis. This function calls the following kernel: - * - * -# @ref NEWidthConcatenateLayerKernel - * - * @deprecated This function is deprecated and will be removed in release 19.08 - */ -class NEWidthConcatenateLayer : public IFunction -{ -public: - /** Default constructor */ - NEWidthConcatenateLayer(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * Dimensions of all the inputs should match apart for the width which can differ. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions are the same with the inputs from the second dimension and above. - * The first dimension (width) is the sum of the input tensors' widths. - */ - void configure(std::vector<ITensor *> inputs_vector, ITensor *output); - void configure(std::vector<const ITensor *> inputs_vector, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayer - * - * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * Dimensions of all the inputs should match apart for the width which can differ. - * @param[in] output Output tensor. Data types supported: Same as @p input. - * Output tensor dimensions are the same with the inputs from the second dimension and above. - * The first dimension (width) is the sum of the input tensors' widths. - * - * @return a status - */ - static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output); - static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector<NEWidthConcatenateLayerKernel> _concat_kernels_vector; - unsigned int _num_inputs; - template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)> - void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output); - template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)> - static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__ */ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 03e889d14a..caf7ee77bc 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -267,11 +267,11 @@ v19.05 Public major release - New OpenGLES kernels / functions: - @ref GCConcatenateLayer - Deprecated functions/interfaces - - @ref GCDepthConcatenateLayer - - @ref NEWidthConcatenateLayer - - @ref NEDepthConcatenateLayer - - @ref CLWidthConcatenateLayer - - @ref CLDepthConcatenateLayer + - GCDepthConcatenateLayer + - NEWidthConcatenateLayer + - NEDepthConcatenateLayer + - CLWidthConcatenateLayer + - CLDepthConcatenateLayer - CLGEMMInterleave4x4 - CLGEMMTranspose1xW - Support different quantization info in CLConcatLayer. @@ -424,7 +424,7 @@ v18.11 Public major release - Added documentation for add a new function or kernel. - Improved doxygen documentation adding a list of the existing functions. - Add 4D tensors support to - - @ref CLWidthConcatenateLayer + - CLWidthConcatenateLayer - @ref CLFlattenLayer - @ref CLSoftmaxLayer - Add dot product support for @ref CLDepthwiseConvolutionLayer3x3NHWCKernel non-unit stride @@ -453,7 +453,7 @@ v18.08 Public major release - Removed support for QS8/QS16 data types. - Added support for grouped convolution in @ref CLConvolutionLayer. - Added NHWC data layout support to: - - @ref NEDepthConcatenateLayer / @ref CLDepthConcatenateLayer + - NEDepthConcatenateLayer / CLDepthConcatenateLayer - @ref NEWinogradConvolutionLayer / @ref CLWinogradConvolutionLayer - @ref CLDepthwiseConvolutionLayer - @ref CLDirectConvolutionLayer @@ -496,7 +496,7 @@ v18.05 Public major release - @ref CLCopy / @ref CLCopyKernel - @ref CLLSTMLayer - @ref CLRNNLayer - - @ref CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel + - CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel - @ref CLWinogradFilterTransformKernel / @ref CLWinogradInputTransformKernel / @ref CLWinogradConvolutionLayer - @ref CLWinogradInputTransformKernel / @ref CLWinogradInputTransform - New Neon kernels / functions: @@ -619,7 +619,7 @@ v17.12 Public major release - @ref GCActivationLayerKernel / @ref GCActivationLayer - @ref GCBatchNormalizationLayerKernel / @ref GCBatchNormalizationLayer - @ref GCCol2ImKernel - - @ref GCDepthConcatenateLayerKernel / @ref GCDepthConcatenateLayer + - @ref GCDepthConcatenateLayerKernel / GCDepthConcatenateLayer - @ref GCDirectConvolutionLayerKernel / @ref GCDirectConvolutionLayer - @ref GCDropoutLayerKernel / @ref GCDropoutLayer - @ref GCFillBorderKernel / @ref GCFillBorder @@ -707,7 +707,7 @@ v17.06 Public major release - User can specify his own scheduler by implementing the @ref IScheduler interface. - New OpenCL kernels / functions: - @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer - - @ref CLDepthConcatenateLayerKernel / @ref CLDepthConcatenateLayer + - @ref CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer - @ref CLHOGOrientationBinningKernel @ref CLHOGBlockNormalizationKernel, @ref CLHOGDetectorKernel / @ref CLHOGDescriptor @ref CLHOGDetector @ref CLHOGGradient @ref CLHOGMultiDetection - @ref CLLocallyConnectedMatrixMultiplyKernel / @ref CLLocallyConnectedLayer - @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights @@ -715,7 +715,7 @@ v17.06 Public major release - @ref CPPDetectionWindowNonMaximaSuppressionKernel - New NEON kernels / functions: - @ref NEBatchNormalizationLayerKernel / @ref NEBatchNormalizationLayer - - @ref NEDepthConcatenateLayerKernel / @ref NEDepthConcatenateLayer + - @ref NEDepthConcatenateLayerKernel / NEDepthConcatenateLayer - @ref NEDirectConvolutionLayerKernel / @ref NEDirectConvolutionLayer - @ref NELocallyConnectedMatrixMultiplyKernel / @ref NELocallyConnectedLayer - @ref NEWeightsReshapeKernel / @ref NEConvolutionLayerReshapeWeights diff --git a/docs/05_functions_list.dox b/docs/05_functions_list.dox index 9a5c8c0027..999b573674 100644 --- a/docs/05_functions_list.dox +++ b/docs/05_functions_list.dox @@ -112,7 +112,6 @@ namespace arm_compute - @ref NEConvolutionSquare <matrix_size> - @ref NECropResize - @ref NEDeconvolutionLayer - - @ref NEDepthConcatenateLayer - @ref NEDepthwiseConvolutionAssemblyDispatch - @ref NEDepthwiseConvolutionLayer - @ref NEDepthwiseConvolutionLayer3x3 @@ -171,7 +170,6 @@ namespace arm_compute - @ref NEStackLayer - @ref NEUnstack - @ref NEUpsampleLayer - - @ref NEWidthConcatenateLayer - @ref NEWinogradConvolutionLayer @section S5_2 OpenCL functions @@ -188,7 +186,6 @@ namespace arm_compute - @ref CLCropResize - @ref CLDeconvolutionLayer - @ref CLDeconvolutionLayerUpsample - - @ref CLDepthConcatenateLayer - @ref CLDepthwiseConvolutionLayer - @ref CLDepthwiseConvolutionLayer3x3 - @ref CLDepthwiseSeparableConvolutionLayer @@ -241,7 +238,6 @@ namespace arm_compute - @ref CLStackLayer - @ref CLUnstack - @ref CLUpsampleLayer - - @ref CLWidthConcatenateLayer - @ref CLWinogradConvolutionLayer - @ref ICLSimpleFunction - @ref CLAbsoluteDifference @@ -327,7 +323,6 @@ namespace arm_compute - @ref GCConcatenateLayer - @ref GCConvolutionLayer - @ref GCConvolutionLayerReshapeWeights - - @ref GCDepthConcatenateLayer - @ref GCDepthwiseConvolutionLayer3x3 - @ref GCDirectConvolutionLayer - @ref GCDropoutLayer diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index 0de58f5c28..13543dbf15 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,43 +68,6 @@ struct GCEltwiseFunctions namespace detail { -// Specialize functions -template <> -std::unique_ptr<IFunction> create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(ConcatenateLayerNode &node) -{ - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl); - ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); - - // Return nullptr if depth concatenate is switched off - if(!node.is_enabled()) - { - return nullptr; - } - - // Extract IO and info - std::vector<GCTargetInfo::TensorType *> inputs; - for(unsigned int i = 0; i < node.num_inputs(); ++i) - { - inputs.push_back(get_backing_tensor<GCTargetInfo>(node.input(i))); - } - typename GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0)); - - // Create and configure function - auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>(); - func->configure(inputs, output); - - // Log info - ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " - << node.name() - << " Target " << GCTargetInfo::TargetType - << " Data Type: " << output->info()->data_type() - << " Shape: " << output->info()->tensor_shape() - << " Num Inputs: " << inputs.size() - << std::endl); - - return std::move(func); -} - template <> std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(ConvolutionLayerNode &node, GraphContext &ctx) { @@ -282,7 +245,7 @@ std::unique_ptr<IFunction> GCFunctionFactory::create(INode *node, GraphContext & case NodeType::ConvolutionLayer: return detail::create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx); case NodeType::ConcatenateLayer: - return detail::create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node)); + return detail::create_concatenate_layer<GCConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node)); case NodeType::DepthwiseConvolutionLayer: return detail::create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node)); case NodeType::EltwiseLayer: diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index b8224d2cce..0594a17a7a 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" diff --git a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp deleted file mode 100644 index f687e54552..0000000000 --- a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLDepthConcatenateLayer::CLDepthConcatenateLayer() // NOLINT - : _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void CLDepthConcatenateLayer::configure(const std::vector<ICLTensor *> &inputs_vector, ICLTensor *output) // NOLINT -{ - _num_inputs = inputs_vector.size(); - - std::vector<ITensorInfo *> inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - - _concat_kernels_vector.resize(_num_inputs); - _border_handlers_vector.resize(_num_inputs); - - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector_info, Window::DimZ); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLDepthConcatenateLayer::validate(inputs_vector_info, output->info())); - - unsigned int depth_offset = 0; - for(unsigned int i = 0; i < _num_inputs; i++) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); - _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue()); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } - - // Set valid region from shape - output->info()->set_valid_region(ValidRegion(Coordinates(), output_shape)); -} - -Status CLDepthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimZ); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - unsigned int depth_offset = 0; - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, depth_offset, &tmp_output_info)); - depth_offset += input->dimension(2); - } - - return Status{}; -} - -void CLDepthConcatenateLayer::run() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - for(unsigned i = 0; i < _num_inputs; i++) - { - CLScheduler::get().enqueue(_border_handlers_vector[i], false); - CLScheduler::get().enqueue(_concat_kernels_vector[i], true); - } -} diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 4606a66bf2..85a81a8cd4 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -316,7 +316,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, scratch_inputs.emplace_back(&_cell_state_out1); scratch_inputs.emplace_back(forget_gate_out); scratch_inputs.emplace_back(output_gate_out); - _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer); + _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX); input_gate_out->allocator()->allocate(); _cell_state_out1.allocator()->allocate(); forget_gate_out->allocator()->allocate(); @@ -497,7 +497,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, inputs_vector_info_raw.push_back(&forget_gate); inputs_vector_info_raw.push_back(&output_gate_tmp); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer)); + ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX)); return Status{}; } diff --git a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp deleted file mode 100644 index a8667c3138..0000000000 --- a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLWidthConcatenateLayer::CLWidthConcatenateLayer() // NOLINT - : _concat_kernels_vector(), - _concat_x2_kernel(), - _concat_x4_kernel(), - _num_inputs(0) -{ -} - -Status CLWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) // NOLINT -{ - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], &tmp_output_info)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], &tmp_output_info)); - break; - default: - unsigned int width_offset = 0; - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info)); - width_offset += input->dimension(0); - } - break; - } - - return Status{}; -} - -void CLWidthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output) // NOLINT -{ - _num_inputs = inputs_vector.size(); - - std::vector<ITensorInfo *> inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - - ARM_COMPUTE_ERROR_THROW_ON(CLWidthConcatenateLayer::validate(inputs_vector_info, output->info())); - - switch(_num_inputs) - { - case 2: - // Configure WidthConcatenate2Tensors kernel - _concat_x2_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), output); - break; - case 4: - // Configure WidthConcatenate4Tensors kernel - _concat_x4_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - break; - default: - // Configure generic case WidthConcatenate kernels - _concat_kernels_vector.resize(_num_inputs); - - unsigned int width_offset = 0; - for(unsigned int i = 0; i < _num_inputs; ++i) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output); - width_offset += inputs_vector.at(i)->info()->dimension(0); - } - break; - } -} - -void CLWidthConcatenateLayer::run() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - switch(_num_inputs) - { - case 2: - CLScheduler::get().enqueue(_concat_x2_kernel, true); - break; - case 4: - CLScheduler::get().enqueue(_concat_x4_kernel, true); - break; - default: - for(unsigned int i = 0; i < _num_inputs; ++i) - { - CLScheduler::get().enqueue(_concat_kernels_vector[i], true); - } - break; - } -} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp deleted file mode 100755 index b89aafa2e5..0000000000 --- a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -GCDepthConcatenateLayer::GCDepthConcatenateLayer() //NOLINT - : _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void GCDepthConcatenateLayer::configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output) //NOLINT -{ - ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); - - _num_inputs = inputs_vector.size(); - - unsigned int depth_offset = 0; - - _concat_kernels_vector.reserve(_num_inputs); - _border_handlers_vector.reserve(_num_inputs); - - for(unsigned int i = 0; i < _num_inputs; i++) - { - auto concat_kernel = support::cpp14::make_unique<GCDepthConcatenateLayerKernel>(); - auto border_kernel = support::cpp14::make_unique<GCFillBorderKernel>(); - - concat_kernel->configure(inputs_vector.at(i), depth_offset, output); - border_kernel->configure(inputs_vector.at(i), concat_kernel->border_size(), BorderMode::CONSTANT, PixelValue()); - _concat_kernels_vector.emplace_back(std::move(concat_kernel)); - _border_handlers_vector.emplace_back(std::move(border_kernel)); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } -} - -void GCDepthConcatenateLayer::run() -{ - for(unsigned i = 0; i < _num_inputs; i++) - { - GCScheduler::get().dispatch(*_border_handlers_vector[i].get(), false); - GCScheduler::get().memory_barrier(); - GCScheduler::get().dispatch(*_concat_kernels_vector[i].get(), true); - } -} diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 71af560fb0..d338493e51 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -44,7 +45,28 @@ NEConcatenateLayer::NEConcatenateLayer() { } -void NEConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, ITensor *output, size_t axis) +void NEConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis) +{ + configure_internal(std::move(inputs_vector), output, axis); +} + +void NEConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis) +{ + configure_internal(std::move(inputs_vector), output, axis); +} + +Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) +{ + return validate_internal(inputs_vector, output, axis); +} + +Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) +{ + return validate_internal(inputs_vector, output, axis); +} + +template <typename TensorType, typename> +void NEConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis) { ARM_COMPUTE_ERROR_ON(output == nullptr); _axis = axis; @@ -97,7 +119,8 @@ void NEConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, } } -Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) +template <typename TensorInfoType, typename> +Status NEConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); diff --git a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp deleted file mode 100644 index 8f070a2d7d..0000000000 --- a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -NEDepthConcatenateLayer::NEDepthConcatenateLayer() // NOLINT - : _inputs_vector(), - _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void NEDepthConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, ITensor *output) // NOLINT -{ - _num_inputs = inputs_vector.size(); - - std::vector<ITensorInfo *> inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector_info, Window::DimZ); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(NEDepthConcatenateLayer::validate(inputs_vector_info, output->info())); - - unsigned int depth_offset = 0; - _concat_kernels_vector.reserve(_num_inputs); - _border_handlers_vector.reserve(_num_inputs); - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto concat_kernel = support::cpp14::make_unique<NEDepthConcatenateLayerKernel>(); - auto border_kernel = support::cpp14::make_unique<NEFillBorderKernel>(); - concat_kernel->configure(inputs_vector.at(i), depth_offset, output); - border_kernel->configure(inputs_vector.at(i), concat_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f))); - _border_handlers_vector.emplace_back(std::move(border_kernel)); - _concat_kernels_vector.emplace_back(std::move(concat_kernel)); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } - - // Set valid region from shape - output->info()->set_valid_region(ValidRegion(Coordinates(), output_shape)); -} - -Status NEDepthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimZ); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - unsigned int depth_offset = 0; - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(NEDepthConcatenateLayerKernel::validate(input, depth_offset, &tmp_output_info)); - depth_offset += input->dimension(2); - } - - return Status{}; -} - -void NEDepthConcatenateLayer::run() -{ - for(unsigned i = 0; i < _num_inputs; ++i) - { - NEScheduler::get().schedule(_border_handlers_vector[i].get(), Window::DimX); - NEScheduler::get().schedule(_concat_kernels_vector[i].get(), Window::DimX); - } -} diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index 3d3c6a12fa..42b805794b 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -107,14 +107,14 @@ void NELSTMLayer::configure(const ITensor *input, inputs_vector.emplace_back(output_state_in); _memory_group.manage(&_forget_gate_out2); - _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2); + _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2, Window::DimX); std::vector<const ITensor *> weights_vector; weights_vector.emplace_back(input_to_forget_weights); weights_vector.emplace_back(recurrent_to_forget_weights); - _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6); + _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6, Window::DimX); _memory_group.manage(&_forget_gate_out5); _fully_connected_forget_gate.configure(&_forget_gate_out2, &_forget_gate_out6, forget_gate_bias, &_forget_gate_out5); @@ -165,7 +165,7 @@ void NELSTMLayer::configure(const ITensor *input, lstm_weights.emplace_back(lstm_params.input_to_input_weights()); lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights()); - _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2); + _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2, Window::DimX); _memory_group.manage(&_input_gate_out1); _memory_group.manage(&_input_gate_out4); @@ -234,7 +234,7 @@ void NELSTMLayer::configure(const ITensor *input, in_out_weights.emplace_back(input_to_output_weights); in_out_weights.emplace_back(recurrent_to_output_weights); - _concat_weights_output.configure(in_out_weights, &_output2); + _concat_weights_output.configure(in_out_weights, &_output2, Window::DimX); _memory_group.manage(&_output1); _memory_group.manage(&_output4); @@ -308,7 +308,7 @@ void NELSTMLayer::configure(const ITensor *input, scratch_inputs.emplace_back(&_cell_state_out1); scratch_inputs.emplace_back(forget_gate_out); scratch_inputs.emplace_back(output_gate_out); - _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer); + _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX); input_gate_out->allocator()->allocate(); _cell_state_out1.allocator()->allocate(); forget_gate_out->allocator()->allocate(); @@ -383,8 +383,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input, std::vector<const ITensorInfo *> inputs_vector; inputs_vector.emplace_back(input); inputs_vector.emplace_back(output_state_in); - TensorInfo forget_gate_concat; - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector, &forget_gate_concat)); + const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0); + TensorInfo forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX)); // Validate forget gate ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_forget_weights, forget_gate_bias, &forget_gate)); @@ -409,8 +410,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input, std::vector<const ITensorInfo *> lstm_weights; lstm_weights.emplace_back(lstm_params.input_to_input_weights()); lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights()); - TensorInfo lstm_gate_concat; - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(lstm_weights, &lstm_gate_concat)); + TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0); + TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX)); ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), &input_gate)); if(lstm_params.has_peephole_opt()) @@ -445,8 +447,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input, std::vector<const ITensorInfo *> in_out_weights; in_out_weights.emplace_back(input_to_output_weights); in_out_weights.emplace_back(recurrent_to_output_weights); - TensorInfo in_out_gate_concat; - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(in_out_weights, &in_out_gate_concat)); + TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0); + TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX)); ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_output_weights, output_gate_bias, &output_gate_tmp)); @@ -485,7 +488,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input, inputs_vector_info_raw.push_back(&forget_gate); inputs_vector_info_raw.push_back(&output_gate_tmp); - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer)); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX)); return Status{}; } diff --git a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp deleted file mode 100644 index 25b5216305..0000000000 --- a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -NEWidthConcatenateLayer::NEWidthConcatenateLayer() - : _concat_kernels_vector(), - _num_inputs(0) -{ -} - -template <typename TensorInfoType, typename> -inline Status NEWidthConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - unsigned int width_offset = 0; - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info)); - width_offset += input->dimension(0); - } - - return Status{}; -} -template <typename TensorType, typename> -inline void NEWidthConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output) -{ - _num_inputs = inputs_vector.size(); - - std::vector<ITensorInfo *> inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; ++i) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(NEWidthConcatenateLayer::validate(inputs_vector_info, output->info())); - - unsigned int width_offset = 0; - - _concat_kernels_vector.resize(_num_inputs); - - for(unsigned int i = 0; i < _num_inputs; ++i) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output); - width_offset += inputs_vector.at(i)->info()->dimension(0); - } -} - -void NEWidthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, ITensor *output) -{ - configure_internal(std::move(inputs_vector), output); -} - -void NEWidthConcatenateLayer::configure(std::vector<const ITensor *> inputs_vector, ITensor *output) -{ - configure_internal(std::move(inputs_vector), output); -} - -Status NEWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) -{ - return validate_internal(inputs_vector, output); -} - -Status NEWidthConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output) -{ - return validate_internal(inputs_vector, output); -} - -void NEWidthConcatenateLayer::run() -{ - for(unsigned i = 0; i < _num_inputs; ++i) - { - NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimY); - } -} diff --git a/tests/benchmark/CL/DepthConcatenateLayer.cpp b/tests/benchmark/CL/DepthConcatenateLayer.cpp index 3a5c457135..9b101d84ed 100644 --- a/tests/benchmark/CL/DepthConcatenateLayer.cpp +++ b/tests/benchmark/CL/DepthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" +#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" #include "tests/CL/CLAccessor.h" #include "tests/benchmark/fixtures/DepthConcatenateLayerFixture.h" #include "tests/datasets/ShapeDatasets.h" @@ -44,7 +44,7 @@ namespace const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 }); } // namespace -using CLDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<CLTensor, ICLTensor, CLDepthConcatenateLayer, CLAccessor>; +using CLDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<CLTensor, ICLTensor, CLConcatenateLayer, CLAccessor>; TEST_SUITE(CL) TEST_SUITE(DepthConcatenateLayer) diff --git a/tests/benchmark/NEON/DepthConcatenateLayer.cpp b/tests/benchmark/NEON/DepthConcatenateLayer.cpp index b82da24999..1d8b18c2bb 100644 --- a/tests/benchmark/NEON/DepthConcatenateLayer.cpp +++ b/tests/benchmark/NEON/DepthConcatenateLayer.cpp @@ -23,7 +23,7 @@ */ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" @@ -44,7 +44,7 @@ namespace const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 }); } // namespace -using NEDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<Tensor, ITensor, NEDepthConcatenateLayer, Accessor>; +using NEDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<Tensor, ITensor, NEConcatenateLayer, Accessor>; TEST_SUITE(NEON) TEST_SUITE(DepthConcatenateLayer) diff --git a/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h b/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h index 541dfb285c..272da38952 100644 --- a/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h +++ b/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h @@ -86,7 +86,7 @@ public: TensorShape dst_shape = misc::shape_calculator::calculate_concatenate_shape(src_ptrs, Window::DimZ); _dst = create_tensor<TensorType>(dst_shape, data_type, 1); - _depth_concat.configure(src_ptrs, &_dst); + _depth_concat.configure(src_ptrs, &_dst, 2); for(auto &src : _srcs) { |