diff options
author | Alex Gilday <alexander.gilday@arm.com> | 2018-03-21 13:54:09 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:16 +0000 |
commit | c357c47be8a3f210f9eee9a05cc13f1051b036d3 (patch) | |
tree | a88ac857150da970a0862a3479b78c616d8aa1d3 /arm_compute/core/NEON/kernels | |
parent | 724079d6fce3bf6a05cd6c7b4884b132b27e9e90 (diff) | |
download | ComputeLibrary-c357c47be8a3f210f9eee9a05cc13f1051b036d3.tar.gz |
COMPMID-1008: Fix Doxygen issues
Change-Id: Ie73d8771f85d1f5b059f3a56f1bbd73c98e94a38
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124723
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels')
15 files changed, 94 insertions, 18 deletions
diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h index ad5a16c9f3..82a4199761 100644 --- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -101,6 +101,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Interface for the accumulate weighted kernel using F16 */ using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h index 63eb739487..2408a665e4 100644 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -47,7 +47,7 @@ public: NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; /** Default Move Constructor. */ NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; /** Default destructor */ ~NEBatchNormalizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h index 9c139551cb..2f93fd2480 100644 --- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -51,7 +51,7 @@ public: }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform a Box 3x3 filter using F16 simd +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ class NEBox3x3FP16Kernel : public NEBox3x3Kernel { @@ -64,6 +64,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ using NEBox3x3FP16Kernel = NEBox3x3Kernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h index 401b9e47af..58ef1757fe 100644 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -86,7 +86,7 @@ protected: }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Gradient computation +/** NEON kernel to perform Gradient computation for FP16 datatype */ class NEGradientFP16Kernel : public NEGradientKernel { @@ -99,6 +99,7 @@ public: void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform Gradient computation for FP16 datatype */ using NEGradientFP16Kernel = NEGradientKernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h index 5871cc5dcb..0c2f30a98c 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h @@ -49,7 +49,7 @@ public: NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete; /** Default Move Constructor. */ NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h index 7ee2078e9e..f48e76f340 100644 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -50,7 +50,7 @@ public: NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; /** Default Move Constructor. */ NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; /** Default destructor */ ~NEDequantizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h index 286be1acc9..a05d591850 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h @@ -30,6 +30,7 @@ namespace arm_compute { class ITensor; +/** Interface for the GEMM matrix vector multiply kernel. **/ class NEGEMMMatrixVectorMultiplyKernel : public INESimpleKernel { public: diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h index 8037e41695..aabf8b312b 100644 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -127,6 +127,7 @@ private: HarrisScoreFunction *_func; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Interface for the accumulate Weighted kernel using FP16 */ template <int32_t block_size> using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel<block_size>; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h index f2105582eb..9a8947f9a0 100644 --- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -45,6 +45,7 @@ struct NELKInternalKeypoint bool tracking_status{ false }; /**< the tracking status of the keypoint */ }; +/** Interface for NEON Array of Internal Key Points. */ using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>; /** Interface for the Lucas-Kanade tracker kernel */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h index 522ed54f95..696721673d 100644 --- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -165,6 +165,7 @@ private: ITensor *_phase; /**< Output - Phase */ }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Template interface for the kernel to compute magnitude and phase */ template <MagnitudeType mag_type, PhaseType phase_type> using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel<mag_type, phase_type>; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h index f122ed15fd..588de49316 100644 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -83,7 +83,7 @@ protected: }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32 +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel { @@ -101,6 +101,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h index b835ca7c53..6ae7b73423 100644 --- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -47,7 +47,7 @@ public: NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; /** Default Move Constructor. */ NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; /** Default destructor */ ~NENormalizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h index e7cf0a8ca4..ca7658bb7e 100644 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -50,7 +50,7 @@ public: NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete; /** Default Move Constructor. */ NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default; /** Default destructor */ ~NEQuantizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h index 9169b75d19..2f44d19b4f 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h @@ -34,6 +34,7 @@ namespace arm_compute { class ITensor; +/** Interface for the NEON kernel to perform Winograd input transform. */ template <typename T> class INEWinogradLayerTransformInputKernel : public INEKernel { @@ -46,6 +47,8 @@ public: * @param[in] n_rows Number of rows in each feature map. * @param[in] n_cols Number of columns in each feature map. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TIn) required. */ virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0; @@ -72,11 +75,13 @@ public: */ virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0; + /** Destructor */ virtual ~INEWinogradLayerTransformInputKernel() { } }; +/** NEON kernel to perform Winograd input transform. */ template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T> { @@ -89,6 +94,8 @@ public: * @param[in] n_rows Number of rows in each feature map. * @param[in] n_cols Number of columns in each feature map. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TIn) required. */ unsigned int get_input_storage_size( int n_batches, @@ -107,6 +114,7 @@ public: */ int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override; + /** Default constructor */ NEWinogradLayerTransformInputKernel(); const char *name() const override @@ -139,7 +147,9 @@ public: void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; + /** Winograd base kernel */ using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>; + /** Winograd convolution kernel */ using WinogradConv = typename WinogradBase::template Convolution<T, T>; private: @@ -147,6 +157,7 @@ private: std::unique_ptr<InputTransform> _transform; }; +/** Interface for the NEON kernel to perform Winograd output transform. */ template <typename T> class INEWinogradLayerTransformOutputKernel : public INEKernel { @@ -159,6 +170,8 @@ public: * @param[in] n_cols Number of columns in each feature map of the input tensor. * @param[in] n_output_channels Number of feature maps in the output tensor. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TOut) required. */ virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0; @@ -208,6 +221,7 @@ public: } }; +/** NEON kernel to perform Winograd output transform. */ template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T> { @@ -227,7 +241,7 @@ public: NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default; /** Allow instances of this class to be moved */ NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default; - + /** Default destructor */ ~NEWinogradLayerTransformOutputKernel() = default; // Inherited methods overridden: @@ -239,6 +253,8 @@ public: * @param[in] n_cols Number of columns in each feature map of the input tensor. * @param[in] n_output_channels Number of feature maps in the output tensor. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TOut) required. */ unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override; @@ -301,6 +317,7 @@ private: int _n_channels; }; +/** Interface for the NEON kernel to perform Winograd weights transform. */ template <typename T> class INEWinogradLayerTransformWeightsKernel : public INEKernel { @@ -310,6 +327,8 @@ public: * * @param[in] n_output_channels Number of output feature maps. * @param[in] n_input_channels Number of input feature maps. + * + * @return Storage size (in units of T) required. */ virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0; /** Gets the stride between matrices in the kernel worspace @@ -335,10 +354,12 @@ public: } }; +/** NEON kernel to perform Winograd weights transform. */ template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T> { public: + /** Default constructor. */ NEWinogradLayerTransformWeightsKernel(); const char *name() const override { @@ -359,6 +380,7 @@ private: std::unique_ptr<WeightsTransform> _transform; }; +/** Interface for the NEON kernel to perform Winograd. */ template <typename TIn, typename TOut> class INEWinogradLayerBatchedGEMMKernel : public INEKernel { @@ -406,16 +428,17 @@ public: virtual int get_number_blocks() const = 0; }; +/** NEON kernel to perform Winograd. */ template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut> { public: + /** Winograd base kernel */ using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>; + /** Winograd convolution kernel */ using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>; - using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>; - - static const int _output_tile_rows = OutputTileRows; - static const int _output_tile_cols = OutputTileCols; + /** Winograd batched blocked GEMM operator */ + using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>; const char *name() const override { @@ -432,7 +455,7 @@ public: NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default; /** Allow instances of this class to be moved */ NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default; - + /** Default destructor. */ ~NEWinogradLayerBatchedGEMMKernel() = default; // Inherited methods overridden: @@ -474,6 +497,8 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: + static const int _output_tile_rows = OutputTileRows; + static const int _output_tile_cols = OutputTileCols; std::unique_ptr<MultiGEMM> _gemms; }; diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h index e4d3f54943..71d5a9eef7 100644 --- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h +++ b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h @@ -30,17 +30,25 @@ namespace arm_compute { namespace detail { -// Dummy activation object /** Dummy activation object */ template <typename T, int S> struct dummy { + /** NEON vector type. */ using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** Construct a dummy activation object. + * + * @param[in] act_info Activation layer information. + */ explicit dummy(ActivationLayerInfo act_info) { ARM_COMPUTE_UNUSED(act_info); } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { ARM_COMPUTE_UNUSED(vval); @@ -50,62 +58,97 @@ struct dummy template <typename T, int S> struct relu { - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** NEON vector type. */ + using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; + /** Construct a RELU activation object. + * + * @param[in] act_info Activation layer information. + */ explicit relu(ActivationLayerInfo act_info) : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})) { ARM_COMPUTE_UNUSED(act_info); } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { vval = wrapper::vmax(vzero, vval); } + /** Vector of zeroes. */ const ExactType vzero; }; /** Bounded RELU activation object */ template <typename T, int S> struct brelu { - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** NEON vector type. */ + using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; + /** Construct a bounded RELU activation object. + * + * @param[in] act_info Activation layer information. + */ explicit brelu(ActivationLayerInfo act_info) : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})), valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})) { } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval)); } + /** Vector of zeroes. */ const ExactType vzero; + /** Vector of alphas. */ const ExactType valpha; }; /** Lower-Upper Bounded RELU activation object */ template <typename T, int S> struct lubrelu { - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** NEON vector type. */ + using ExactType = typename wrapper::traits::neon_vector<T, S>::type; + /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; + /** Construct a lower-upper bounded RELU activation object. + * + * @param[in] act_info Activation layer information. + */ explicit lubrelu(ActivationLayerInfo act_info) : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})), vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{})) { } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval)); } + /** Vector of alphas. */ const ExactType valpha; + /** Vector of betas. */ const ExactType vbeta; }; } // namespace detail |