From e29acf14f5c3f2d2c20799a1ea3e4aad50dff834 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 16 Jul 2018 14:40:09 +0100 Subject: COMPMID-1365: Add support for NHWC in CLDepthConcatenateLayer Change-Id: I3ed55bdb95d888aff0b0b76fb841bf1669659308 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139963 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 11 ++- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 2 +- arm_compute/core/Utils.h | 31 --------- arm_compute/core/utils/misc/ShapeCalculator.h | 25 +++++++ arm_compute/runtime/CL/CLFunctions.h | 1 + .../runtime/CL/functions/CLConcatenateLayer.h | 81 ++++++++++++++++++++++ .../runtime/CL/functions/CLDepthConcatenateLayer.h | 26 ++++++- .../runtime/CL/functions/CLWidthConcatenateLayer.h | 14 ++-- 8 files changed, 153 insertions(+), 38 deletions(-) create mode 100644 arm_compute/runtime/CL/functions/CLConcatenateLayer.h (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h index cbcab8f554..ff8009085f 100644 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -52,7 +52,7 @@ public: ~CLDepthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32. * @param[in] depth_offset The offset on the Z axis. * @param[in,out] output Output tensor. Data types supported: Same as @p input. * @@ -61,6 +61,15 @@ public: * */ void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/F16/F32 + * @param[in] depth_offset The offset on the Z axis. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h index d206eb0da7..7ecd9276aa 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h @@ -58,7 +58,7 @@ public: * */ void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel + /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 * @param[in] width_offset The offset on the X axis. diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 729a46fe3f..1cdfd389db 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -630,37 +630,6 @@ inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matri return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); } -/** Calculate the output shapes of the depth concatenate function. - * - * @param[in] inputs_vector The vector that stores all the pointers to input. - * - * @return the output shape - */ -template -TensorShape calculate_depth_concatenate_shape(const std::vector &inputs_vector) -{ - TensorShape out_shape = inputs_vector[0]->info()->tensor_shape(); - - size_t max_x = 0; - size_t max_y = 0; - size_t depth = 0; - - for(const auto &tensor : inputs_vector) - { - ARM_COMPUTE_ERROR_ON(tensor == nullptr); - const TensorShape shape = tensor->info()->tensor_shape(); - max_x = std::max(shape.x(), max_x); - max_y = std::max(shape.y(), max_y); - depth += shape.z(); - } - - out_shape.set(0, max_x); - out_shape.set(1, max_y); - out_shape.set(2, depth); - - return out_shape; -} - /** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats. * * @note Adding here a few links discussing the issue of odd size and sharing the same solution: diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 9bf6b046b4..e5516ba154 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -413,6 +413,31 @@ inline TensorShape get_shape_from_info(ITensorInfo *info) return info->tensor_shape(); } +template +inline TensorShape calculate_depth_concatenate_shape(const std::vector &inputs_vector) +{ + TensorShape out_shape = get_shape_from_info(inputs_vector[0]); + + size_t max_x = 0; + size_t max_y = 0; + size_t depth = 0; + + for(const auto &tensor : inputs_vector) + { + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + const TensorShape shape = get_shape_from_info(tensor); + max_x = std::max(shape.x(), max_x); + max_y = std::max(shape.y(), max_y); + depth += shape.z(); + } + + out_shape.set(0, max_x); + out_shape.set(1, max_y); + out_shape.set(2, depth); + + return out_shape; +} + template inline TensorShape calculate_width_concatenate_shape(const std::vector &inputs_vector) { diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index 0b69c96673..5e42715c2f 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -42,6 +42,7 @@ #include "arm_compute/runtime/CL/functions/CLChannelExtract.h" #include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h" #include "arm_compute/runtime/CL/functions/CLColorConvert.h" +#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" #include "arm_compute/runtime/CL/functions/CLConvolution.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h new file mode 100644 index 0000000000..018c58942f --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONCATENATELAYER_H__ +#define __ARM_COMPUTE_CLCONCATENATELAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; +class ITensorInfo; +class Status; + +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref CLWidthConcatenateLayer (if underlying concatenation axis is 0). + * -# @ref CLDepthConcatenateLayer (if underlying concatenation axis is 2). + */ +class CLConcatenateLayer : public IFunction +{ +public: + /** Default constructor */ + CLConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayer and @ref CLDepthConcatenateLayer. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0 and 2. + */ + void configure(std::vector inputs_vector, ICLTensor *output, DataLayoutDimension axis); + /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayer and @ref CLDepthConcatenateLayer. + * + * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0 and 2. + * + * @return a status + */ + static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, DataLayoutDimension axis); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _concat_function; +}; +} +#endif /* __ARM_COMPUTE_CLCONCATENATELAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h index d505814e73..bafce1c66f 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h @@ -52,10 +52,34 @@ public: CLDepthConcatenateLayer(); /** Initialise the kernel's inputs vector and output. * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. + * Input dimensions might differ for each input for the first three dimensions (width, height, depth) + * and must match for the rest. + * Note that the difference between the minimum and maximum width and height among the input tensors + * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and + * height when they are less than the maximum input sizes. * @param[out] output Output tensor. Data types supported: Same as @p input. + * Output tensor dimensions match the inputs' ones from the fourth dimension and above, + * while width and height are the maximum width and height of the input tensors. + * Finally, depth is the sum of the input depths. */ void configure(std::vector inputs_vector, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayer + * + * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. + * Input dimensions might differ for each input for the first three dimensions (width, height, depth) + * and must match for the rest. + * Note that the difference between the minimum and maximum width and height among the input tensors + * must be divisible by 2 otherwise it is not clear how padding should be added on the inputs' width and + * height when they are less than the maximum input sizes. + * @param[in] output Output tensor. Data types supported: Same as @p input. + * Output tensor dimensions match the inputs' ones from the fourth dimension and above, + * while width and height are the maximum width and height of the input tensors. + * Finally, depth is the sum of the input depths. + * + * @return a status + */ + static Status validate(const std::vector &inputs_vector, const ITensorInfo *output); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h index 289191e030..44462b02b2 100644 --- a/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h @@ -50,14 +50,20 @@ public: CLWidthConcatenateLayer(); /** Initialise the kernel's inputs vector and output. * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. + * Dimensions of all the inputs should match apart for the width which can differ. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * Output tensor dimensions are the same with the inputs from the second dimension and above. + * The first dimension (width) is the sum of the input tensors' widths. */ void configure(std::vector inputs_vector, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel * - * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. + * Dimensions of all the inputs should match apart for the width which can differ. + * @param[in] output Output tensor. Data types supported: Same as @p input. + * Output tensor dimensions are the same with the inputs from the second dimension and above. + * The first dimension (width) is the sum of the input tensors' widths. * * @return a status */ -- cgit v1.2.1