From 68a3f56627b04acdefebe67d645727dd83889766 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 26 Jul 2018 11:44:03 +0100 Subject: COMPMID-1276 - Allow GEMM to work with 3D input tensor Skipped im2col in CLGEMMConvolutionLayer for 1x1 convolutions with NHWC data layout Change-Id: I894e6b952ed8605e8f3ffc0ffc25c24730d4664c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141909 Tested-by: Jenkins Reviewed-by: Anthony Barbier Reviewed-by: Georgios Pinitas --- .../core/CL/kernels/CLGEMMInterleave4x4Kernel.h | 7 +++- .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 3 +- arm_compute/core/Types.h | 48 ++++++++++++++++------ arm_compute/core/utils/misc/ShapeCalculator.h | 31 ++++++++++---- 4 files changed, 65 insertions(+), 24 deletions(-) (limited to 'arm_compute/core') diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h index 7f8e766f1a..4592fc2921 100644 --- a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h @@ -67,17 +67,19 @@ public: * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleave block + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor */ - void configure(const ICLTensor *input, ICLTensor *output, int mult_interleave4x4_height = 1); + void configure(const ICLTensor *input, ICLTensor *output, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMInterleave4x4Kernel * * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. * @param[in] mult_interleave4x4_height Multiplication factor for the height of the 4x4 interleave block + * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int mult_interleave4x4_height); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int mult_interleave4x4_height, bool reinterpret_input_as_3d); // Inherited methods overridden void run(const Window &window, cl::CommandQueue &queue) override; @@ -85,6 +87,7 @@ public: private: const ICLTensor *_input; ICLTensor *_output; + bool _reinterpret_input_as_3d; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h index 1b6a0c87a9..e030fa2d2a 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -85,7 +85,8 @@ public: const ICLTensor *_input1; ICLTensor *_output; bool _slide_matrix_b; - bool _is_gemm3d; + bool _reinterpret_input_as_3d; + bool _reinterpret_output_as_3d; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 00370918bd..81d652dd7d 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1031,7 +1031,7 @@ class GEMMReshapeInfo final public: /** Default constructor */ GEMMReshapeInfo() - : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(1) + : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(1), _reinterpret_input_as_3d(false) { } /** Constructor @@ -1042,9 +1042,12 @@ public: * @param[in] mult_transpose1xW_width (Optional) Multiplication factor for the width of the 1xW transposed block * @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleaved block * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel + * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used + * to perform 1x1 convolutions with the NHWC data layout) */ - GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 1) - : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d) + GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 1, bool reinterpret_input_as_3d = false) + : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d), + _reinterpret_input_as_3d(reinterpret_input_as_3d) { } /** Number of matrix A rows @@ -1098,14 +1101,23 @@ public: { return _depth_output_gemm3d; } + /** Flag which specifies if the input tensor has to be reinterpreted as 3D + * + * @return True if the input tensor has to be reinterpreted as 3D tensor + */ + bool reinterpret_input_as_3d() const + { + return _reinterpret_input_as_3d; + }; private: - const int _m; - const int _n; - const int _k; - const int _mult_transpose1xW_width; - const int _mult_interleave4x4_height; - const int _depth_output_gemm3d; + const int _m; + const int _n; + const int _k; + const int _mult_transpose1xW_width; + const int _mult_interleave4x4_height; + const int _depth_output_gemm3d; + const bool _reinterpret_input_as_3d; }; /** GEMM information class. This class stores the necessary information to compute GEMM functions @@ -1118,7 +1130,7 @@ class GEMMInfo public: /** Default constructor */ GEMMInfo() - : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _depth_output_gemm3d(1) + : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _depth_output_gemm3d(1), _reinterpret_input_as_3d(false) { } /** Constructor @@ -1127,10 +1139,13 @@ public: * @param[in] is_b_reshaped True if the matrix B has been reshaped * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel + * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used + * to perform 1x1 convolutions with the NHWC data layout) * */ - GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 1) - : _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), _depth_output_gemm3d(depth_output_gemm3d) + GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 1, bool reinterpret_input_as_3d = false) + : _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), _depth_output_gemm3d(depth_output_gemm3d), + _reinterpret_input_as_3d(reinterpret_input_as_3d) { } /** Flag which specifies if the matrix A has been reshaped @@ -1167,12 +1182,21 @@ public: { return _depth_output_gemm3d; }; + /** Flag which specifies if the input tensor has to be reinterpreted as 3D + * + * @return True if the input tensor has to be reinterpreted as 3D tensor + */ + bool reinterpret_input_as_3d() const + { + return _reinterpret_input_as_3d; + }; private: const bool _is_a_reshaped; const bool _is_b_reshaped; const bool _reshape_b_only_on_first_run; const int _depth_output_gemm3d; + const bool _reinterpret_input_as_3d; }; /** Winograd information */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index dbf26a423d..bf55add1d2 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -66,14 +66,24 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo return weights_reshaped; } -inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1) +inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) { // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1); const int interleave_width = 4 * mult_interleave4x4_height; TensorShape shape_interleaved_a{ a.tensor_shape() }; shape_interleaved_a.set(0, a.dimension(0) * interleave_width); - shape_interleaved_a.set(1, std::ceil(a.dimension(1) / static_cast(interleave_width))); + if(reinterpret_input_as_3d) + { + const int M = a.dimension(1) * a.dimension(2); + const int height = std::ceil(M / static_cast(interleave_width)); + shape_interleaved_a.set(1, height); + shape_interleaved_a.remove_dimension(2); + } + else + { + shape_interleaved_a.set(1, std::ceil(a.dimension(1) / static_cast(interleave_width))); + } return shape_interleaved_a; } @@ -374,23 +384,26 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) { ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); + ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); - const bool is_gemm3d = reshape_info.depth_output_gemm3d() != 1; + const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); + const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 1; + const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third // dimension of the output tensor const int dim0 = is_interleaved_transposed ? reshape_info.n() : input1.dimension(0); - const int dim1 = is_interleaved_transposed ? reshape_info.m() / reshape_info.depth_output_gemm3d() : input0.dimension(1) / reshape_info.depth_output_gemm3d(); - const int dim2 = input0.tensor_shape()[2]; - const int dim3 = input0.tensor_shape()[3]; + const int dim1 = is_interleaved_transposed ? reshape_info.m() / reshape_info.depth_output_gemm3d() : m / reshape_info.depth_output_gemm3d(); + const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2]; + const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3]; TensorShape output_shape{ input0.tensor_shape() }; output_shape.set(0, dim0); output_shape.set(1, dim1); - output_shape.set(2, is_gemm3d ? reshape_info.depth_output_gemm3d() : dim2); - output_shape.set(3, is_gemm3d ? dim2 : dim3); - output_shape.set(4, is_gemm3d ? dim3 : 1); + output_shape.set(2, reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : dim2); + output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3); + output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1); return output_shape; } -- cgit v1.2.1