From c0b6f76561580414f08633a804fc548ccad65659 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 2 Nov 2020 01:37:17 +0000 Subject: COMPMID-3776: Indirect GEMM Signed-off-by: Georgios Pinitas Change-Id: I51a1b0f098bc3a8c408c50c92221e4df3061e12c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4343 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- arm_compute/core/Types.h | 9 +- arm_compute/runtime/FunctionDescriptors.h | 24 +++++ arm_compute/runtime/NEON/NEFunctions.h | 2 +- .../runtime/NEON/functions/NEConvolutionLayer.h | 9 +- .../NEON/functions/NEGEMMAssemblyDispatch.h | 58 +++++++---- arm_compute/runtime/NEON/functions/NEGEMMConv2d.h | 108 +++++++++++++++++++++ .../NEGEMMLowpAssemblyMatrixMultiplyCore.h | 74 -------------- 7 files changed, 182 insertions(+), 102 deletions(-) create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMConv2d.h delete mode 100644 arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h (limited to 'arm_compute') diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 306bdc6706..2e639c4be4 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -137,10 +137,11 @@ enum class DataLayoutDimension /** Available ConvolutionMethod*/ enum class ConvolutionMethod { - GEMM, /**< Convolution using GEMM */ - DIRECT, /**< Direct convolution */ - WINOGRAD, /**< Convolution using Winograd */ - FFT /**< Convolution using FFT */ + GEMM, /**< Convolution using GEMM */ + GEMM_CONV2D, /**< Direct 2D GEMM convolution */ + DIRECT, /**< Direct convolution */ + WINOGRAD, /**< Convolution using Winograd */ + FFT /**< Convolution using FFT */ }; /** Available DepthwiseConvolutionFunction*/ diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h index 16d6c345e2..1f4216eb21 100644 --- a/arm_compute/runtime/FunctionDescriptors.h +++ b/arm_compute/runtime/FunctionDescriptors.h @@ -23,6 +23,9 @@ */ #ifndef ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H #define ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H + +#include "arm_compute/core/Types.h" + #include namespace arm_compute @@ -48,5 +51,26 @@ struct FFT2DInfo unsigned int axis1{ 1 }; /**< Axis to run second pass on. If same, multiple transforms are performed on single axis*/ FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */ }; + +/** Descriptor used by the Convolution function */ +struct Conv2dInfo +{ + Conv2dInfo() = default; + + Conv2dInfo(const PadStrideInfo &conv_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups) + : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups) + { + } + + PadStrideInfo conv_info{}; + Size2D dilation{ 1U, 1U }; + ActivationLayerInfo act_info{}; + bool enable_fast_math{ false }; + unsigned int num_groups{ 1 }; +}; } // namespace arm_compute #endif /* ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index a97fa3b81a..e7d59e1608 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -78,9 +78,9 @@ #include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h" diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 54dae57752..a061dc7b04 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -26,16 +26,15 @@ #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" + #include namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to simulate a convolution layer. This function calls one of the following NEON functions: @@ -158,5 +157,5 @@ private: std::shared_ptr _memory_manager; std::unique_ptr _function; /**< Function to run */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h index ac77acf69d..8f9498d0f5 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -32,6 +32,28 @@ namespace arm_compute { +/* Convolution method supported by the assembly gemm interface */ +enum class AsmConvMethod +{ + Im2Col, + Indirect, + Conv +}; + +struct AsmGemmInfo +{ + AsmConvMethod method{ AsmConvMethod::Im2Col }; + PadStrideInfo ps_info{}; + ActivationLayerInfo activation_info{}; + GEMMLowpOutputStageInfo output_stage{}; + bool negated_offsets{ true }; + bool reinterpret_input_as_3d{ false }; + bool depth_output_gemm3d{ false }; + int64_t padding_top{ 0 }; + int64_t padding_left{ 0 }; + float padding_value{ 0.f }; +}; + /** Assembly kernel glue */ class NEGEMMAssemblyDispatch : public IFunction { @@ -55,33 +77,28 @@ public: virtual ~IFallback() = default; }; -private: - /** Interface for the arm_gemm fallback */ - std::unique_ptr _arm_gemm; - MemoryGroup _memory_group; /**< Function memory group */ - IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ public: /** If supported create a Compute Library function else fallback to the arm_gemm function. * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations - * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] gemm_info GEMM meta-data + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations + * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] info GEMM meta-data */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const GEMMInfo &gemm_info); + void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info); /** Indicates whether or not this function can be used to process the given parameters. * - * @param[in] a Input tensor info (Matrix A) - * @param[in] b Input tensor info (Matrix B) - * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations - * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] gemm_info GEMM meta-data + * @param[in] a Input tensor info (Matrix A) + * @param[in] b Input tensor info (Matrix B) + * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations + * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] info GEMM meta-data * * @return a status. */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info); + static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info); /** Checks if activation is supported by the gemm assembly dispatcher * * @param[in] activation Activation to check @@ -94,10 +111,15 @@ public: * @return True if the function is configured and ready to run */ bool is_configured() const; + // Inherited methods overridden: - /** Runs a preparation step, usually for pre-transposing matrix b */ void prepare() override; void run() override; + +private: + std::unique_ptr _arm_gemm; /** Interface for the arm_gemm fallback */ + MemoryGroup _memory_group; /**< Function memory group */ + IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h new file mode 100644 index 0000000000..7cae39397f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMCONV2D_H +#define ARM_COMPUTE_NEGEMMCONV2D_H + +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "arm_compute/runtime/NEON/functions/NEPermute.h" +#include "arm_compute/runtime/Tensor.h" + +#include +namespace arm_compute +{ +// Forward declarations +class ITensor; +/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: + * + * Supports only NHWC data layout + * + * -# @ref NEGEMMAssemblyDispatch + * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch + * + * Weights are transformed from OHWI to HWIO format using the following kernels: + * -# @ref NEPermute + */ +class NEGEMMConv2d : public IFunction +{ +public: + /** Constructor */ + NEGEMMConv2d(const std::shared_ptr &memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMConv2d(const NEGEMMConv2d &) = delete; + /** Default move constructor */ + NEGEMMConv2d(NEGEMMConv2d &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete; + /** Default move assignment operator */ + NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] info Convolution layer descriptor + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d + * + * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + NEGEMMAssemblyDispatch _gemm_asm_func; + NEActivationLayer _activation_func; + NEPermute _weights_permute_func; + const ITensor *_original_weights; + Tensor _permuted_weights; + bool _is_prepared; + bool _run_activation; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h deleted file mode 100644 index 961b1901e7..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H -#define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H - -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace arm_compute -{ -// Forward declarations -class ITensor; -class NEGEMMInterleave4x4Kernel; -class NEGEMMTranspose1xWKernel; -class NEGEMMLowpMatrixMultiplyKernel; - -/** Basic function to execute matrix multiply assembly kernels. */ -class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction -{ -public: - /** Constructor */ - NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr memory_manager = nullptr); - /** Destructor */ - ~NEGEMMLowpAssemblyMatrixMultiplyCore(); - - /** Initialise the kernel's inputs, output - * - * @param[in] a First input tensor (Matrix A). Data type supported: U8, S8. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). Data type supported: same as @p a - * @param[out] output Output tensor. Data type supported: Data type supported: U32, S32 - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr _mm_kernel; - std::unique_ptr _mtx_a_reshape_kernel; - std::unique_ptr _mtx_b_reshape_kernel; - Tensor _tmp_a; - Tensor _tmp_b; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */ -- cgit v1.2.1