From c357c47be8a3f210f9eee9a05cc13f1051b036d3 Mon Sep 17 00:00:00 2001 From: Alex Gilday Date: Wed, 21 Mar 2018 13:54:09 +0000 Subject: COMPMID-1008: Fix Doxygen issues Change-Id: Ie73d8771f85d1f5b059f3a56f1bbd73c98e94a38 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124723 Reviewed-by: Michalis Spyrou Tested-by: Jenkins --- arm_compute/core/NEON/INEKernel.h | 3 +- arm_compute/core/NEON/INESimpleKernel.h | 3 +- arm_compute/core/NEON/NEColorConvertHelper.inl | 102 ++++++++++++++++++++- arm_compute/core/NEON/NEFixedPoint.inl | 4 +- arm_compute/core/NEON/NEMath.inl | 15 ++- arm_compute/core/NEON/kernels/NEAccumulateKernel.h | 1 + .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 2 +- arm_compute/core/NEON/kernels/NEBox3x3Kernel.h | 3 +- arm_compute/core/NEON/kernels/NECannyEdgeKernel.h | 3 +- .../kernels/NEDepthwiseConvolutionLayer3x3Kernel.h | 2 +- .../NEON/kernels/NEDequantizationLayerKernel.h | 2 +- .../kernels/NEGEMMMatrixVectorMultiplyKernel.h | 1 + .../core/NEON/kernels/NEHarrisCornersKernel.h | 1 + arm_compute/core/NEON/kernels/NELKTrackerKernel.h | 1 + .../core/NEON/kernels/NEMagnitudePhaseKernel.h | 1 + .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 3 +- .../core/NEON/kernels/NENormalizationLayerKernel.h | 2 +- .../core/NEON/kernels/NEQuantizationLayerKernel.h | 2 +- .../core/NEON/kernels/NEWinogradLayerKernel.h | 37 ++++++-- .../kernels/detail/NEActivationFunctionDetail.h | 51 ++++++++++- arm_compute/core/NEON/wrapper/traits.h | 4 +- 21 files changed, 215 insertions(+), 28 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h index 529606a709..32d7ab6338 100644 --- a/arm_compute/core/NEON/INEKernel.h +++ b/arm_compute/core/NEON/INEKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ namespace arm_compute { +/** Common interface for all kernels implemented in NEON. */ using INEKernel = ICPPKernel; } // namespace arm_compute #endif /*__ARM_COMPUTE_INEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h index 0d2211ac32..15fc3be5ed 100644 --- a/arm_compute/core/NEON/INESimpleKernel.h +++ b/arm_compute/core/NEON/INESimpleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ namespace arm_compute { +/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ using INESimpleKernel = ICPPSimpleKernel; } // namespace arm_compute #endif /*__ARM_COMPUTE_INESIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl index 9a9caefaab..0da5affe18 100644 --- a/arm_compute/core/NEON/NEColorConvertHelper.inl +++ b/arm_compute/core/NEON/NEColorConvertHelper.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ namespace { +#ifndef DOXYGEN_SKIP_THIS constexpr float red_coef_bt709 = 1.5748F; constexpr float green_coef_bt709 = -0.1873f; constexpr float green_coef2_bt709 = -0.4681f; @@ -296,10 +297,18 @@ inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, co vst1q_u8(out_u, uvec); vst1q_u8(out_v, vvec); } +#endif /* DOXYGEN_SKIP_THIS */ } namespace arm_compute { +/** Convert RGB to RGBX. + * + * @param[in] input Input RGB data buffer. + * @param[out] output Output RGBX buffer. + * @param[in] win Window for iterating the buffers. + * + */ void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) { ARM_COMPUTE_ERROR_ON(nullptr == input); @@ -324,6 +333,13 @@ void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict out in, out); } +/** Convert RGBX to RGB. + * + * @param[in] input Input RGBX data buffer. + * @param[out] output Output RGB buffer. + * @param[in] win Window for iterating the buffers. + * + */ void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) { ARM_COMPUTE_ERROR_ON(nullptr == input); @@ -347,6 +363,13 @@ void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win in, out); } +/** Convert YUYV to RGB. + * + * @param[in] input Input YUYV data buffer. + * @param[out] output Output RGB buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) { @@ -385,6 +408,13 @@ void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict out in, out); } +/** Convert NV12 to RGB. + * + * @param[in] input Input NV12 data buffer. + * @param[out] output Output RGB buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) { @@ -441,6 +471,13 @@ void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict out in_y, in_uv, out); } +/** Convert IYUV to RGB. + * + * @param[in] input Input IYUV data buffer. + * @param[out] output Output RGB buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) { @@ -498,6 +535,13 @@ void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict out in_y, in_u, in_v, out); } +/** Convert YUYV to NV12. + * + * @param[in] input Input YUYV data buffer. + * @param[out] output Output NV12 buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) { @@ -547,6 +591,13 @@ void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict ou in, out_y, out_uv); } +/** Convert IYUV to NV12. + * + * @param[in] input Input IYUV data buffer. + * @param[out] output Output NV12 buffer. + * @param[in] win Window for iterating the buffers. + * + */ void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) { ARM_COMPUTE_ERROR_ON(nullptr == input); @@ -587,6 +638,13 @@ void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict ou in_y, in_u, in_v, out_y, out_uv); } +/** Convert NV12 to IYUV. + * + * @param[in] input Input NV12 data buffer. + * @param[out] output Output IYUV buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) { @@ -629,6 +687,13 @@ void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict ou in_y, in_uv, out_y, out_u, out_v); } +/** Convert YUYV to IYUV. + * + * @param[in] input Input YUYV data buffer. + * @param[out] output Output IYUV buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) { @@ -682,6 +747,13 @@ void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict ou in, out_y, out_u, out_v); } +/** Convert NV12 to YUV4. + * + * @param[in] input Input NV12 data buffer. + * @param[out] output Output YUV4 buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) { @@ -734,6 +806,13 @@ void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict ou in_y, in_uv, out_y, out_u, out_v); } +/** Convert IYUV to YUV4. + * + * @param[in] input Input IYUV data buffer. + * @param[out] output Output YUV4 buffer. + * @param[in] win Window for iterating the buffers. + * + */ void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) { ARM_COMPUTE_ERROR_ON(nullptr == input); @@ -785,6 +864,13 @@ void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict ou in_y, in_u, in_v, out_y, out_u, out_v); } +/** Convert RGB to NV12. + * + * @param[in] input Input RGB data buffer. + * @param[out] output Output NV12 buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) { @@ -821,6 +907,13 @@ void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict out in, out_y, out_uv); } +/** Convert RGB to IYUV. + * + * @param[in] input Input RGB data buffer. + * @param[out] output Output IYUV buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) { @@ -858,6 +951,13 @@ void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict out in, out_y, out_u, out_v); } +/** Convert RGB to YUV4. + * + * @param[in] input Input RGB data buffer. + * @param[out] output Output YUV4 buffer. + * @param[in] win Window for iterating the buffers. + * + */ template void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) { diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index 966313d58b..b86c3cbec3 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -130,6 +130,7 @@ static const std::array log_tabq_qs16 = } }; +#ifndef DOXYGEN_SKIP_THIS inline qint8x8_t vget_low_qs8(qint8x16_t a) { return vget_low_s8(a); @@ -1996,4 +1997,5 @@ inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b) }; return res; } +#endif /* DOXYGEN_SKIP_THIS */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 50f217c1f1..84154020a5 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ namespace arm_compute { -/* Exponent polynomial coefficients */ +/** Exponent polynomial coefficients */ const std::array exp_tab = { { @@ -39,7 +39,7 @@ const std::array exp_tab = } }; -/* Logarithm polynomial coefficients */ +/** Logarithm polynomial coefficients */ const std::array log_tab = { { @@ -54,6 +54,7 @@ const std::array log_tab = } }; +#ifndef DOXYGEN_SKIP_THIS inline float32x4_t vfloorq_f32(float32x4_t val) { static const float32x4_t CONST_1 = vdupq_n_f32(1.f); @@ -168,8 +169,10 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) { return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); } +#endif /* DOXYGEN_SKIP_THIS */ + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/* Exponent polynomial coefficients */ +/** Exponent polynomial coefficients */ const std::array exp_tab_f16 = { { @@ -184,7 +187,7 @@ const std::array exp_tab_f16 = } }; -/* Logarithm polynomial coefficients */ +/** Logarithm polynomial coefficients */ const std::array log_tab_f16 = { { @@ -199,6 +202,7 @@ const std::array log_tab_f16 = } }; +#ifndef DOXYGEN_SKIP_THIS inline float16x4_t vinvsqrt_f16(float16x4_t x) { float16x4_t sqrt_reciprocal = vrsqrte_f16(x); @@ -301,5 +305,6 @@ inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n) { return vexpq_f16(vmulq_f16(n, vlogq_f16(val))); } +#endif /* DOXYGEN_SKIP_THIS */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h index ad5a16c9f3..82a4199761 100644 --- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -101,6 +101,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Interface for the accumulate weighted kernel using F16 */ using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h index 63eb739487..2408a665e4 100644 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -47,7 +47,7 @@ public: NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; /** Default Move Constructor. */ NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; /** Default destructor */ ~NEBatchNormalizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h index 9c139551cb..2f93fd2480 100644 --- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -51,7 +51,7 @@ public: }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform a Box 3x3 filter using F16 simd +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ class NEBox3x3FP16Kernel : public NEBox3x3Kernel { @@ -64,6 +64,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ using NEBox3x3FP16Kernel = NEBox3x3Kernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h index 401b9e47af..58ef1757fe 100644 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -86,7 +86,7 @@ protected: }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Gradient computation +/** NEON kernel to perform Gradient computation for FP16 datatype */ class NEGradientFP16Kernel : public NEGradientKernel { @@ -99,6 +99,7 @@ public: void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform Gradient computation for FP16 datatype */ using NEGradientFP16Kernel = NEGradientKernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h index 5871cc5dcb..0c2f30a98c 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h @@ -49,7 +49,7 @@ public: NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete; /** Default Move Constructor. */ NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h index 7ee2078e9e..f48e76f340 100644 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -50,7 +50,7 @@ public: NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; /** Default Move Constructor. */ NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; /** Default destructor */ ~NEDequantizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h index 286be1acc9..a05d591850 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h @@ -30,6 +30,7 @@ namespace arm_compute { class ITensor; +/** Interface for the GEMM matrix vector multiply kernel. **/ class NEGEMMMatrixVectorMultiplyKernel : public INESimpleKernel { public: diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h index 8037e41695..aabf8b312b 100644 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -127,6 +127,7 @@ private: HarrisScoreFunction *_func; }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Interface for the accumulate Weighted kernel using FP16 */ template using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h index f2105582eb..9a8947f9a0 100644 --- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -45,6 +45,7 @@ struct NELKInternalKeypoint bool tracking_status{ false }; /**< the tracking status of the keypoint */ }; +/** Interface for NEON Array of Internal Key Points. */ using INELKInternalKeypointArray = IArray; /** Interface for the Lucas-Kanade tracker kernel */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h index 522ed54f95..696721673d 100644 --- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -165,6 +165,7 @@ private: ITensor *_phase; /**< Output - Phase */ }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** Template interface for the kernel to compute magnitude and phase */ template using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h index f122ed15fd..588de49316 100644 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -83,7 +83,7 @@ protected: }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32 +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel { @@ -101,6 +101,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); }; #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h index b835ca7c53..6ae7b73423 100644 --- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -47,7 +47,7 @@ public: NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; /** Default Move Constructor. */ NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; /** Default destructor */ ~NENormalizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h index e7cf0a8ca4..ca7658bb7e 100644 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -50,7 +50,7 @@ public: NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete; /** Default Move Constructor. */ NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default; - /** Default move assignment operator. */ + /** Default move assignment operator */ NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default; /** Default destructor */ ~NEQuantizationLayerKernel() = default; diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h index 9169b75d19..2f44d19b4f 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h @@ -34,6 +34,7 @@ namespace arm_compute { class ITensor; +/** Interface for the NEON kernel to perform Winograd input transform. */ template class INEWinogradLayerTransformInputKernel : public INEKernel { @@ -46,6 +47,8 @@ public: * @param[in] n_rows Number of rows in each feature map. * @param[in] n_cols Number of columns in each feature map. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TIn) required. */ virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0; @@ -72,11 +75,13 @@ public: */ virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0; + /** Destructor */ virtual ~INEWinogradLayerTransformInputKernel() { } }; +/** NEON kernel to perform Winograd input transform. */ template class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel { @@ -89,6 +94,8 @@ public: * @param[in] n_rows Number of rows in each feature map. * @param[in] n_cols Number of columns in each feature map. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TIn) required. */ unsigned int get_input_storage_size( int n_batches, @@ -107,6 +114,7 @@ public: */ int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override; + /** Default constructor */ NEWinogradLayerTransformInputKernel(); const char *name() const override @@ -139,7 +147,9 @@ public: void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; + /** Winograd base kernel */ using WinogradBase = winograd::WinogradGEMM; + /** Winograd convolution kernel */ using WinogradConv = typename WinogradBase::template Convolution; private: @@ -147,6 +157,7 @@ private: std::unique_ptr _transform; }; +/** Interface for the NEON kernel to perform Winograd output transform. */ template class INEWinogradLayerTransformOutputKernel : public INEKernel { @@ -159,6 +170,8 @@ public: * @param[in] n_cols Number of columns in each feature map of the input tensor. * @param[in] n_output_channels Number of feature maps in the output tensor. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TOut) required. */ virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0; @@ -208,6 +221,7 @@ public: } }; +/** NEON kernel to perform Winograd output transform. */ template class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel { @@ -227,7 +241,7 @@ public: NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default; /** Allow instances of this class to be moved */ NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default; - + /** Default destructor */ ~NEWinogradLayerTransformOutputKernel() = default; // Inherited methods overridden: @@ -239,6 +253,8 @@ public: * @param[in] n_cols Number of columns in each feature map of the input tensor. * @param[in] n_output_channels Number of feature maps in the output tensor. * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". + * + * @return Storage size (in units of TOut) required. */ unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override; @@ -301,6 +317,7 @@ private: int _n_channels; }; +/** Interface for the NEON kernel to perform Winograd weights transform. */ template class INEWinogradLayerTransformWeightsKernel : public INEKernel { @@ -310,6 +327,8 @@ public: * * @param[in] n_output_channels Number of output feature maps. * @param[in] n_input_channels Number of input feature maps. + * + * @return Storage size (in units of T) required. */ virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0; /** Gets the stride between matrices in the kernel worspace @@ -335,10 +354,12 @@ public: } }; +/** NEON kernel to perform Winograd weights transform. */ template class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel { public: + /** Default constructor. */ NEWinogradLayerTransformWeightsKernel(); const char *name() const override { @@ -359,6 +380,7 @@ private: std::unique_ptr _transform; }; +/** Interface for the NEON kernel to perform Winograd. */ template class INEWinogradLayerBatchedGEMMKernel : public INEKernel { @@ -406,16 +428,17 @@ public: virtual int get_number_blocks() const = 0; }; +/** NEON kernel to perform Winograd. */ template class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel { public: + /** Winograd base kernel */ using WinogradBase = winograd::WinogradGEMM; + /** Winograd convolution kernel */ using WinogradConv = typename WinogradBase::template Convolution; - using MultiGEMM = winograd::BatchedBlockedGemm; - - static const int _output_tile_rows = OutputTileRows; - static const int _output_tile_cols = OutputTileCols; + /** Winograd batched blocked GEMM operator */ + using MultiGEMM = winograd::BatchedBlockedGemm; const char *name() const override { @@ -432,7 +455,7 @@ public: NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default; /** Allow instances of this class to be moved */ NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default; - + /** Default destructor. */ ~NEWinogradLayerBatchedGEMMKernel() = default; // Inherited methods overridden: @@ -474,6 +497,8 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: + static const int _output_tile_rows = OutputTileRows; + static const int _output_tile_cols = OutputTileCols; std::unique_ptr _gemms; }; diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h index e4d3f54943..71d5a9eef7 100644 --- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h +++ b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h @@ -30,17 +30,25 @@ namespace arm_compute { namespace detail { -// Dummy activation object /** Dummy activation object */ template struct dummy { + /** NEON vector type. */ using ExactType = typename wrapper::traits::neon_vector::type; + /** Construct a dummy activation object. + * + * @param[in] act_info Activation layer information. + */ explicit dummy(ActivationLayerInfo act_info) { ARM_COMPUTE_UNUSED(act_info); } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { ARM_COMPUTE_UNUSED(vval); @@ -50,62 +58,97 @@ struct dummy template struct relu { - using ExactType = typename wrapper::traits::neon_vector::type; + /** NEON vector type. */ + using ExactType = typename wrapper::traits::neon_vector::type; + /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_vector::tag_type; + /** Construct a RELU activation object. + * + * @param[in] act_info Activation layer information. + */ explicit relu(ActivationLayerInfo act_info) : vzero(wrapper::vdup_n(static_cast(0.f), ExactTagType{})) { ARM_COMPUTE_UNUSED(act_info); } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { vval = wrapper::vmax(vzero, vval); } + /** Vector of zeroes. */ const ExactType vzero; }; /** Bounded RELU activation object */ template struct brelu { - using ExactType = typename wrapper::traits::neon_vector::type; + /** NEON vector type. */ + using ExactType = typename wrapper::traits::neon_vector::type; + /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_vector::tag_type; + /** Construct a bounded RELU activation object. + * + * @param[in] act_info Activation layer information. + */ explicit brelu(ActivationLayerInfo act_info) : vzero(wrapper::vdup_n(static_cast(0.f), ExactTagType{})), valpha(wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{})) { } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval)); } + /** Vector of zeroes. */ const ExactType vzero; + /** Vector of alphas. */ const ExactType valpha; }; /** Lower-Upper Bounded RELU activation object */ template struct lubrelu { - using ExactType = typename wrapper::traits::neon_vector::type; + /** NEON vector type. */ + using ExactType = typename wrapper::traits::neon_vector::type; + /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_vector::tag_type; + /** Construct a lower-upper bounded RELU activation object. + * + * @param[in] act_info Activation layer information. + */ explicit lubrelu(ActivationLayerInfo act_info) : valpha(wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{})), vbeta(wrapper::vdup_n(static_cast(act_info.b()), ExactTagType{})) { } + /** Run activation function. + * + * @param[in] vval Vector of values. + */ void operator()(ExactType &vval) { vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval)); } + /** Vector of alphas. */ const ExactType valpha; + /** Vector of betas. */ const ExactType vbeta; }; } // namespace detail diff --git a/arm_compute/core/NEON/wrapper/traits.h b/arm_compute/core/NEON/wrapper/traits.h index 08b2c9b48f..495ddbb1af 100644 --- a/arm_compute/core/NEON/wrapper/traits.h +++ b/arm_compute/core/NEON/wrapper/traits.h @@ -42,7 +42,8 @@ struct vector_128_tag {}; /** Create the appropriate NEON vector given its type and size */ template struct neon_vector; -/** Specializations */ +// Specializations +#ifndef DOXYGEN_SKIP_THIS template <> struct neon_vector{ using type = uint8x8_t; using tag_type = vector_64_tag; }; template <> struct neon_vector{ using type = int8x8_t; using tag_type = vector_64_tag; }; template <> struct neon_vector{ using type = uint8x16_t; using tag_type = vector_128_tag; }; @@ -61,6 +62,7 @@ template <> struct neon_vector{ using type = uint64x2_t; using tag_ template <> struct neon_vector{ using type = int64x2_t; using tag_type = vector_128_tag; }; template <> struct neon_vector{ using type = float32x2_t; using tag_type = vector_64_tag; }; template <> struct neon_vector{ using type = float32x4_t; using tag_type = vector_128_tag; }; +#endif /* DOXYGEN_SKIP_THIS */ /** Helper type template to get the type of a neon vector */ template using neon_vector_t = typename neon_vector::type; -- cgit v1.2.1