diff options
Diffstat (limited to 'arm_compute/runtime/NEON')
145 files changed, 4718 insertions, 6128 deletions
diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h new file mode 100644 index 0000000000..7971168d24 --- /dev/null +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_INEOPERATOR_H +#define ARM_COMPUTE_INEOPERATOR_H + +#include "arm_compute/runtime/IOperator.h" +#include "arm_compute/runtime/IRuntimeContext.h" +#include "arm_compute/runtime/Types.h" + +#include "../../core/ITensor.h" +#include <memory> + +namespace arm_compute +{ +class ICPPKernel; +class Window; + +using INEKernel = ICPPKernel; +namespace experimental +{ +/** Basic interface for functions which have a single async CPU kernel */ +class INEOperator : public IOperator +{ +public: + /** Constructor + * + * @param[in] ctx Runtime context to be used by the function + */ + INEOperator(IRuntimeContext *ctx = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEOperator(const INEOperator &) = delete; + /** Default move constructor */ + INEOperator(INEOperator &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEOperator &operator=(const INEOperator &) = delete; + /** Default move assignment operator */ + INEOperator &operator=(INEOperator &&) = default; + /** Default destructor */ + ~INEOperator(); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; + MemoryRequirements workspace() const override; + +protected: + void run(ITensorPack &tensors, const Window &window); + + std::unique_ptr<INEKernel> _kernel; + IRuntimeContext *_ctx; + MemoryRequirements _workspace; +}; +} // namespace experimental +} // namespace arm_compute +#endif /*ARM_COMPUTE_INEOPERATOR_H */ diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h index 8506797fe3..f783a836ee 100644 --- a/arm_compute/runtime/NEON/INESimpleFunction.h +++ b/arm_compute/runtime/NEON/INESimpleFunction.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,27 +24,38 @@ #ifndef ARM_COMPUTE_INESIMPLEFUNCTION_H #define ARM_COMPUTE_INESIMPLEFUNCTION_H -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/runtime/IFunction.h" #include <memory> namespace arm_compute { -/** Basic interface for functions which have a single NEON kernel */ +class ICPPKernel; +class NEFillBorderKernel; +using INEKernel = ICPPKernel; +/** Basic interface for functions which have a single CPU kernel */ class INESimpleFunction : public IFunction { public: /** Constructor */ INESimpleFunction(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INESimpleFunction(const INESimpleFunction &) = delete; + /** Default move constructor */ + INESimpleFunction(INESimpleFunction &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INESimpleFunction &operator=(const INESimpleFunction &) = delete; + /** Default move assignment operator */ + INESimpleFunction &operator=(INESimpleFunction &&) = default; + /** Default destructor */ + ~INESimpleFunction(); // Inherited methods overridden: void run() override final; protected: - std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */ - NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */ + std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */ }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_INESIMPLEFUNCTION_H */ diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h index 223048f40f..dc4bac17e4 100644 --- a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h +++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H #define ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IRuntimeContext.h" @@ -32,7 +31,9 @@ namespace arm_compute { -/** Basic interface for functions which have a single NEON kernel and no border */ +class ICPPKernel; +using INEKernel = ICPPKernel; +/** Basic interface for functions which have a single CPU kernel and no border */ class INESimpleFunctionNoBorder : public IFunction { public: @@ -49,6 +50,8 @@ public: INESimpleFunctionNoBorder &operator=(const INESimpleFunctionNoBorder &) = delete; /** Default move assignment operator */ INESimpleFunctionNoBorder &operator=(INESimpleFunctionNoBorder &&) = default; + /** Default destructor */ + ~INESimpleFunctionNoBorder(); // Inherited methods overridden: void run() override final; diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index de364fa9af..cc4d303202 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,13 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEFUNCTIONS_H -#define ARM_COMPUTE_NEFUNCTIONS_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_NEFUNCTIONS_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_NEFUNCTIONS_H -/* Header regrouping all the NEON functions */ -#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" -#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEAddMulAdd.h" #include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" @@ -38,18 +36,11 @@ #include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" #include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" #include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" -#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" -#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" #include "arm_compute/runtime/NEON/functions/NECast.h" -#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" -#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" #include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h" -#include "arm_compute/runtime/NEON/functions/NECol2Im.h" -#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" -#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NEConv3D.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" -#include "arm_compute/runtime/NEON/functions/NEConvolution.h" #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NECropResize.h" @@ -58,103 +49,66 @@ #include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" #include "arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDilate.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h" #include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h" -#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" -#include "arm_compute/runtime/NEON/functions/NEErode.h" #include "arm_compute/runtime/NEON/functions/NEFFT1D.h" #include "arm_compute/runtime/NEON/functions/NEFFT2D.h" #include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" #include "arm_compute/runtime/NEON/functions/NEFill.h" #include "arm_compute/runtime/NEON/functions/NEFillBorder.h" #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" #include "arm_compute/runtime/NEON/functions/NEFloor.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h" +#include "arm_compute/runtime/NEON/functions/NEGather.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h" -#include "arm_compute/runtime/NEON/functions/NEGather.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" #include "arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" -#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" -#include "arm_compute/runtime/NEON/functions/NEHistogram.h" -#include "arm_compute/runtime/NEON/functions/NEIm2Col.h" #include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" #include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h" +#include "arm_compute/runtime/NEON/functions/NELogical.h" #include "arm_compute/runtime/NEON/functions/NELSTMLayer.h" #include "arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" -#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" -#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" +#include "arm_compute/runtime/NEON/functions/NEMatMul.h" +#include "arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h" #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" -#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" -#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" -#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" #include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" -#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h" #include "arm_compute/runtime/NEON/functions/NEPadLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/NEON/functions/NEPhase.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NEPooling3dLayer.h" #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h" #include "arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h" #include "arm_compute/runtime/NEON/functions/NEQLSTMLayer.h" #include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NERNNLayer.h" -#include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h" -#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h" #include "arm_compute/runtime/NEON/functions/NERange.h" #include "arm_compute/runtime/NEON/functions/NEReduceMean.h" #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/NEON/functions/NERemap.h" +#include "arm_compute/runtime/NEON/functions/NEReorderLayer.h" #include "arm_compute/runtime/NEON/functions/NEReorgLayer.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/NEON/functions/NEReverse.h" +#include "arm_compute/runtime/NEON/functions/NERNNLayer.h" +#include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h" +#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" #include "arm_compute/runtime/NEON/functions/NESelect.h" -#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h" #include "arm_compute/runtime/NEON/functions/NESlice.h" -#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" -#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" #include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" #include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h" #include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h" #include "arm_compute/runtime/NEON/functions/NESplit.h" #include "arm_compute/runtime/NEON/functions/NEStackLayer.h" #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" -#include "arm_compute/runtime/NEON/functions/NETableLookup.h" -#include "arm_compute/runtime/NEON/functions/NEThreshold.h" #include "arm_compute/runtime/NEON/functions/NETile.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/NEON/functions/NEUnstack.h" -#include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h" -#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" -#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h" -#endif /* ARM_COMPUTE_NEFUNCTIONS_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_NEFUNCTIONS_H diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h index 54a92bbb41..613f44cc52 100644 --- a/arm_compute/runtime/NEON/NEScheduler.h +++ b/arm_compute/runtime/NEON/NEScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,7 +28,7 @@ namespace arm_compute { -/** NEON Scheduler */ +/** CPU Scheduler */ using NEScheduler = Scheduler; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_NESCHEDULER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h deleted file mode 100644 index 2d6f94cde0..0000000000 --- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H -#define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H - -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEAbsoluteDifferenceKernel - * - * @note The image data type for the inputs must be U8 or S16 - * @note The function calculates the absolute difference also when the 2 inputs have different image data types - */ -class NEAbsoluteDifference : public INESimpleFunction -{ -public: - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); -}; -} -#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h deleted file mode 100644 index 0426bf94d7..0000000000 --- a/arm_compute/runtime/NEON/functions/NEAccumulate.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACCUMULATE_H -#define ARM_COMPUTE_NEACCUMULATE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEAccumulateKernel */ -class NEAccumulate : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and accumulation tensors - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *output); -}; - -/** Basic function to run @ref NEAccumulateWeightedKernel */ -class NEAccumulateWeighted : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and accumulation tensors, and the scale value - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] alpha The input scalar value with a value input the range of [0, 1.0] - * @param[in,out] output Accumulated tensor. Data type supported: U8. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - */ - void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false); -}; - -/** Basic function to run @ref NEAccumulateSquaredKernel */ -class NEAccumulateSquared : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] shift The input with a value input the range of [0, 15] - * @param[in,out] output Accumulated tensor. Data type supported: S16. - */ - void configure(const ITensor *input, uint32_t shift, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACCUMULATE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 95901dc2d8..5584fdc783 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,20 +24,24 @@ #ifndef ARM_COMPUTE_NEACTIVATIONLAYER_H #define ARM_COMPUTE_NEACTIVATIONLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IRuntimeContext.h" + +#include <memory> namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEActivationLayerKernel +/** Basic function to run @ref cpu::kernels::CpuActivationKernel * * @note The function simulates an activation layer with the specified activation function. */ -class NEActivationLayer : public INESimpleFunctionNoBorder +class NEActivationLayer : public IFunction { public: /** Constructor @@ -48,14 +52,28 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer(const NEActivationLayer &) = delete; /** Default move constructor */ - NEActivationLayer(NEActivationLayer &&) = default; + NEActivationLayer(NEActivationLayer &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer &operator=(const NEActivationLayer &) = delete; /** Default move assignment operator */ - NEActivationLayer &operator=(NEActivationLayer &&) = default; + NEActivationLayer &operator=(NEActivationLayer &&); + /** Destructor */ + ~NEActivationLayer(); /** [NEActivationLayer snippet] **/ /** Set the input and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 | + * |F16 |F16 | + * |F32 |F32 | + * * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result @@ -75,6 +93,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAddMulAdd.h b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h new file mode 100644 index 0000000000..6c65c055dd --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD +#define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include <memory> + +namespace arm_compute +{ +class ITensor; +class ITensorInfo; +class ActivationLayerInfo; + +/** Function to compute Add+Mul+Add fused operation */ +class NEAddMulAdd : public IFunction +{ +public: + /** Constructor */ + NEAddMulAdd(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAddMulAdd(const NEAddMulAdd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAddMulAdd(NEAddMulAdd &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAddMulAdd &operator=(const NEAddMulAdd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAddMulAdd &operator=(NEAddMulAdd &&) = delete; + /** Destructor */ + ~NEAddMulAdd(); + /** Initialize the function's inputs and outputs. + * + * Valid data layouts: + * - Any + * + * Valid data type configurations: + * |input1 |input2 |bn_mul |bn_add |add_output |final_output | + * |:--------------|:--------------|:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 |F32 |F32 | + * + * This is what this composite function (tailored for add followed by a batch norm operation) does: + * add_output <- input1 + input2 (add) + * final_output <- add_output * bn_mul + bn_add (batch norm = mul+add) + * + * @param[in] input1 First tensor input. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input. + * @param[in] bn_mul The multiplication coefficient on the feature dimension. Data types supported: Same as @p input. + * It's one dimensional tensor with size equal to the feature maps [FM] + * @param[in] bn_add The addition coefficient on the feature dimension. Data types supported: Same as @p input. + * It's one dimensional tensor with size equal to the feature maps [FM] + * @param[out] add_output Output of the first add. Data type supported: Same as @p input. + * @param[out] final_output Output of the add+mul+add+act composite operation. Data type supported: Same as @p input. + * @param[in] policy Policy to handle overflow + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + */ + void configure(ITensor *input1, + ITensor *input2, + ITensor *bn_mul, + ITensor *bn_add, + ITensor *add_output, + ITensor *final_output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEAddMulAdd + * + * Similar to @ref NEAddMulAdd::configure() except the arguments are @ref ITensorInfo * instead of @ref ITensor * + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *bn_mul, + const ITensorInfo *bn_add, + const ITensorInfo *add_output, + const ITensorInfo *final_output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD */ diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index c50f358d1f..3bb50a0f90 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NEARGMINMAXLAYER_H #define ARM_COMPUTE_NEARGMINMAXLAYER_H -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" @@ -33,11 +31,10 @@ namespace arm_compute { class ITensor; - /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. * - * This function calls the following NEON kernels: + * This function calls the following kernels: * * -# @ref NEReductionOperationKernel * -# @ref NEFillBorderKernel @@ -52,8 +49,30 @@ class NEArgMinMaxLayer : public IFunction public: /** Constructor */ NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArgMinMaxLayer(const NEArgMinMaxLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArgMinMaxLayer &operator=(const NEArgMinMaxLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEArgMinMaxLayer(NEArgMinMaxLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEArgMinMaxLayer &operator=(NEArgMinMaxLayer &&) = delete; + /** Default destructor */ + ~NEArgMinMaxLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------| + * |QASYMM8 |U32, S32 | + * |QASYMM8_SIGNED |U32, S32 | + * |S32 |U32, S32, S64 | + * |F16 |U32, S32 | + * |F32 |U32, S32 | + * * @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. @@ -64,7 +83,7 @@ public: * * @param[in] input Input source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. - * @param[in] output Output source tensor info. Data types supported: U32/S32. + * @param[in] output Output source tensor info. Data types supported: U32/S32/S64. * @param[in] op Operation to perform: min or max * * @return a status @@ -75,7 +94,8 @@ public: void run() override; private: - std::unique_ptr<NEReductionOperation> _reduction_function; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEARGMINMAXLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 6cab5b3547..73a43dbc44 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,36 +25,83 @@ #define ARM_COMPUTE_NEARITHMETICADDITION_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEArithmeticAdditionKernel */ -class NEArithmeticAddition : public INESimpleFunction +/** Basic function to run @ref cpu::kernels::CpuAddKernel */ +class NEArithmeticAddition : public IFunction { public: + /** Default Constructor */ + NEArithmeticAddition(); + /** Default Destructor */ + ~NEArithmeticAddition(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAddition(const NEArithmeticAddition &) = delete; + /** Default move constructor */ + NEArithmeticAddition(NEArithmeticAddition &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete; + /** Default move assignment operator */ + NEArithmeticAddition &operator=(NEArithmeticAddition &&); /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] output Output tensor. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] policy Policy to use to handle overflow * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /*ARM_COMPUTE_NEARITHMETICADDITION_H */ +#endif /* ARM_COMPUTE_NEARITHMETICADDITION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index 4774fb6adb..3e4f6356c5 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,43 +25,88 @@ #define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { class ITensor; -/** Basic function to run @ref NEArithmeticSubtractionKernel +/** Basic function to run @ref cpu::kernels::CpuSubKernel * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. * @note The function performs an arithmetic subtraction between two tensors. * * This function calls the following kernels: - * -# @ref NEArithmeticSubtractionKernel + * -# @ref cpu::kernels::CpuSubKernel */ -class NEArithmeticSubtraction : public INESimpleFunction +class NEArithmeticSubtraction : public IFunction { public: + /** Default Constructor */ + NEArithmeticSubtraction(); + /** Default Destructor */ + ~NEArithmeticSubtraction(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtraction(const NEArithmeticSubtraction &) = delete; + /** Default move constructor */ + NEArithmeticSubtraction(NEArithmeticSubtraction &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtraction &operator=(const NEArithmeticSubtraction &) = delete; + /** Default move assignment operator */ + NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 + * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 14416e7323..99e2dcadbb 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,16 @@ #ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H #define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H -#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include <memory> + namespace arm_compute { class ITensor; +class NEBatchNormalizationLayerKernel; /** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer. * @@ -42,10 +44,30 @@ class ITensor; class NEBatchNormalizationLayer : public IFunction { public: - /** Default constructor */ + /** Constructor */ NEBatchNormalizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayer(const NEBatchNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayer &operator=(const NEBatchNormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchNormalizationLayer(NEBatchNormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchNormalizationLayer &operator=(NEBatchNormalizationLayer &&) = delete; + /** Default destructor */ + ~NEBatchNormalizationLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | + * * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. @@ -59,7 +81,13 @@ public: * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f, + void configure(ITensor *input, + ITensor *output, + const ITensor *mean, + const ITensor *var, + const ITensor *beta = nullptr, + const ITensor *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayer * @@ -76,16 +104,20 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *mean, + const ITensorInfo *var, + const ITensorInfo *beta = nullptr, + const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; private: - NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */ + std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h index 2a62530246..ebed0bea29 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,26 +24,49 @@ #ifndef ARM_COMPUTE_NEBATCHTOSPACELAYER_H #define ARM_COMPUTE_NEBATCHTOSPACELAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEBatchToSpaceLayerKernel. */ class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEBatchToSpaceLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayer(const NEBatchToSpaceLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchToSpaceLayer &operator=(const NEBatchToSpaceLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchToSpaceLayer(NEBatchToSpaceLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBatchToSpaceLayer &operator=(NEBatchToSpaceLayer &&) = delete; + /** Default destructor */ + ~NEBatchToSpaceLayer() = default; /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:---------|:---------|:----------| + * |All |s32 |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input + * + * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release */ + ARM_COMPUTE_DEPRECATED_REL(23.05) void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); /** Set the input and output tensors. (Static block shape). * @@ -51,8 +74,13 @@ public: * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed */ - void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output); + void configure(const ITensor *input, + int32_t block_shape_x, + int32_t block_shape_y, + ITensor *output, + const CropInfo &crop_info = CropInfo{}); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. @@ -60,7 +88,9 @@ public: * @param[out] output Tensor output info. Data types supported: same as @p input * * @return a status + * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release */ + ARM_COMPUTE_DEPRECATED_REL(23.05) static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape). * @@ -68,10 +98,15 @@ public: * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output info. Data types supported: same as @p input + * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed * * @return a status */ - static Status validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + int32_t block_shape_x, + int32_t block_shape_y, + const ITensorInfo *output, + const CropInfo &crop_info = CropInfo{}); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEBATCHTOSPACELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h index c254c30ce5..1f95f193d3 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,28 @@ class ITensor; class NEBitwiseAnd : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEBitwiseAnd() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAnd(const NEBitwiseAnd &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAnd &operator=(const NEBitwiseAnd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBitwiseAnd(NEBitwiseAnd &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEBitwiseAnd &operator=(NEBitwiseAnd &&) = delete; + /** Default destructor */ + ~NEBitwiseAnd() = default; /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h index 15e12509c8..c66bebf7cc 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ class NEBitwiseNot : public INESimpleFunctionNoBorder public: /** Initialise the kernel's input and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input Input tensor. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h index 0e62620fc0..183df212e4 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ class NEBitwiseOr : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h index 1dcc6e2216..126aaa6ddd 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ class NEBitwiseXor : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h index 27c1c5198b..aa41fc0df2 100644 --- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h +++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,23 +24,31 @@ #ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H #define ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEBoundingBoxTransformKernel. - * - * This function calls the following Neon kernels: - * -# @ref NEBoundingBoxTransformKernel - */ -class NEBoundingBoxTransform : public INESimpleFunction +/** Basic function to run @ref NEBoundingBoxTransformKernel. */ +class NEBoundingBoxTransform : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM16 |QASYMM8 |QASYMM16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. @@ -49,7 +57,8 @@ public: * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. */ - void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); + void + configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEBoundingBoxTransform * @@ -63,7 +72,10 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + static Status validate(const ITensorInfo *boxes, + const ITensorInfo *pred_boxes, + const ITensorInfo *deltas, + const BoundingBoxTransformInfo &info); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEBOUNDINGBOXTRANSFORM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h deleted file mode 100644 index c382ea9114..0000000000 --- a/arm_compute/runtime/NEON/functions/NEBox3x3.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOX3x3_H -#define ARM_COMPUTE_NEBOX3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute box filter 3x3. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEBox3x3Kernel - * - */ -class NEBox3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's input, output and border mode. - * - * @note The border handler is run on the input tensor. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); -}; -} -#endif /*ARM_COMPUTE_NEBOX3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h deleted file mode 100644 index 84cc2de6d6..0000000000 --- a/arm_compute/runtime/NEON/functions/NECannyEdge.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECANNYEDGE_H -#define ARM_COMPUTE_NECANNYEDGE_H - -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions: - * - * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) - * -# @ref NESobel3x3 (if gradient_size == 3) or - * @ref NESobel5x5 (if gradient_size == 5) or - * @ref NESobel7x7 (if gradient_size == 7) - * -# @ref NEGradientKernel - * -# @ref NEEdgeNonMaxSuppressionKernel - * -# @ref NEEdgeTraceKernel - * - */ -class NECannyEdge : public IFunction -{ -public: - /** Constructor - * - * Initialize Sobel kernel to nullptr. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECannyEdge(const NECannyEdge &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECannyEdge &operator=(const NECannyEdge &) = delete; - /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] upper_thr Upper threhold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis. - * @param[in] gradient_size Gradient size (3, 5 or 7) - * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel */ - std::unique_ptr<INEKernel> _gradient; /**< Gradient kernel */ - NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */ - NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */ - NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ - NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */ - Tensor _gx; /**< Source tensor - Gx component */ - Tensor _gy; /**< Source tensor - Gy component */ - Tensor _magnitude; /**< Source tensor - Magnitude */ - Tensor _phase; /**< Source tensor - Phase */ - Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ - ITensor *_output; /**< Output tensor provided by the user. */ -}; -} -#endif /* ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h index 55c21a01ec..43cae777f6 100644 --- a/arm_compute/runtime/NEON/functions/NECast.h +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,47 +25,73 @@ #define ARM_COMPUTE_NECAST_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEDepthConvertLayerKernel. +/** Basic function to run @ref cpu::kernels::CpuCastKernel. * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values. */ -class NECast : public INESimpleFunction +class NECast : public IFunction { public: + /** Constructor */ + NECast(); + /** Destructor */ + ~NECast(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECast(const NECast &) = delete; + /** Default move constructor */ + NECast(NECast &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECast &operator=(const NECast &) = delete; + /** Default move assignment operator */ + NECast &operator=(NECast &&); /** Initialize the function's source, destination * - * Input data type must be different than output data type. + * Valid data layouts: + * - All * - * Valid conversions Input -> Output : + * Valid data type configurations: + * |src |dst | + * |:--------------|:-----------------------------------------------| + * |QASYMM8_SIGNED | S16, S32, F32, F16 | + * |QASYMM8 | U16, S16, S32, F32, F16 | + * |U8 | U16, S16, S32, F32, F16 | + * |U16 | U8, U32 | + * |S16 | QASYMM8_SIGNED, U8, S32 | + * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 | + * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 | + * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8| * - * - QASYMM8_SIGNED -> S16, S32, F32, F16 - * - QASYMM8 -> U16, S16, S32, F32, F16 - * - U8 -> U16, S16, S32, F32, F16 - * - U16 -> U8, U32 - * - S16 -> QASYMM8_SIGNED, U8, S32 - * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8 + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. - * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32. * @param[in] policy Conversion policy. */ void configure(ITensor *input, ITensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NECast * * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. - * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32. * @param[in] policy Conversion policy. * * @return a status */ - static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NECAST_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h deleted file mode 100644 index ba159160e0..0000000000 --- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELCOMBINE_H -#define ARM_COMPUTE_NECHANNELCOMBINE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */ -class NEChannelCombine : public INESimpleFunctionNoBorder -{ -public: - /** Initialize function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 - * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - */ - void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); - /** Initialize function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444 - */ - void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECHANNELCOMBINE_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h deleted file mode 100644 index 96ba1c8ecc..0000000000 --- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELEXTRACT_H -#define ARM_COMPUTE_NECHANNELEXTRACT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */ -class NEChannelExtract : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function's source, destination - * - * @param[in] input The input tensor to extract the channel from. Formats supported: Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel The channel to extract. - * @param[out] output The extracted channel. Format supported: U8 - */ - void configure(const ITensor *input, Channel channel, ITensor *output); - /** Initialize the function's source, destination - * - * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel The channel to extract. - * @param[out] output The extracted channel. Format supported: U8 - */ - void configure(const IMultiImage *input, Channel channel, IImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECHANNELEXTRACT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h index 716518a8da..bc19e1a4af 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,14 @@ #ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYER_H #define ARM_COMPUTE_NECHANNELSHUFFLELAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEChannelShuffleLayerKernel * @@ -42,6 +44,15 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder public: /** Initialize the function * + * Valid data layouts: + * - NCHW + * - NHWC + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h deleted file mode 100644 index 5da0b91766..0000000000 --- a/arm_compute/runtime/NEON/functions/NECol2Im.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOL2IM_H -#define ARM_COMPUTE_NECOL2IM_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NECol2Im */ -class NECol2Im : public INESimpleFunctionNoBorder -{ -public: - /** Configure the col2im NEON kernel - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - */ - void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); - /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECOL2IM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h deleted file mode 100644 index ee76db2787..0000000000 --- a/arm_compute/runtime/NEON/functions/NEColorConvert.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOLORCONVERT_H -#define ARM_COMPUTE_NECOLORCONVERT_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class IMultiImage; -using IImage = ITensor; - -/**Basic function to run @ref NEColorConvertKernel to perform color conversion */ -class NEColorConvert : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function's source, destination - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ITensor *input, ITensor *output); - /** Initialize the function's source, destination - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const IMultiImage *input, IImage *output); - /** Initialize the function's source, destination - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const IImage *input, IMultiImage *output); - /** Initialize the function's source, destination - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const IMultiImage *input, IMultiImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOLORCONVERT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h deleted file mode 100644 index 09c0c9d985..0000000000 --- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H -#define ARM_COMPUTE_NECOMPUTEALLANCHORS_H - -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEComputeAllAnchorsKernel. - * - * This function calls the following NEON kernels: - * -# @ref NEComputeAllAnchorsKernel - */ -class NEComputeAllAnchors : public INESimpleFunction -{ -public: - /** Set the input and output tensors. - * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel - * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32 - * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - * @return a Status - */ - static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECOMPUTEALLANCHORS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 8207589680..1600f85488 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,10 @@ #ifndef ARM_COMPUTE_NECONCATENATELAYER_H #define ARM_COMPUTE_NECONCATENATELAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/Requires.h" +#include "arm_compute/runtime/IFunction.h" #include <memory> -#include <vector> namespace arm_compute { @@ -40,33 +36,49 @@ class ITensor; class ITensorInfo; class Status; -/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: - * - * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). - * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). - * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3). - */ +/** Basic function to execute concatenate tensors along a given axis */ class NEConcatenateLayer : public IFunction { public: /** Default constructor */ NEConcatenateLayer(); + /** Destructor */ + ~NEConcatenateLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenateLayer(const NEConcatenateLayer &) = delete; + /** Default move constructor */ + NEConcatenateLayer(NEConcatenateLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenateLayer &operator=(const NEConcatenateLayer &) = delete; + /** Default move assignment operator */ + NEConcatenateLayer &operator=(NEConcatenateLayer &&); /** Initialise the kernel's inputs vector and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel, + * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel. * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ - void configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis); void configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel, + * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel. * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. @@ -74,23 +86,15 @@ public: * * @return a status */ - static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); - static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); + static Status + validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: void run() override; private: - template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)> - void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis); - - template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)> - static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis); - -private: - std::vector<std::unique_ptr<INEKernel>> _concat_kernels; - unsigned int _num_inputs; - unsigned int _axis; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NECONCATENATELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConv3D.h b/arm_compute/runtime/NEON/functions/NEConv3D.h new file mode 100644 index 0000000000..525f37f3e7 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConv3D.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONV3D_H +#define ARM_COMPUTE_NECONV3D_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Basic function to simulate a 3d convolution. This function calls one of the following functions: + * -# @ref cpu::CpuDirectConv3d + * + */ +class NEConv3D : public IFunction +{ +public: + /** Constructor */ + NEConv3D(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConv3D(const NEConv3D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConv3D &operator=(const NEConv3D &) = delete; + /** Default move constructor */ + NEConv3D(NEConv3D &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConv3D &operator=(NEConv3D &&) = default; + /** Default destructor */ + ~NEConv3D(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NDHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. 4 lower dimensions represent a single input [IFM, width, height, depth], + * while every optional dimension from 5 and above represent a batch of inputs. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [OFM, IFM, kernel_x, kernel_y, kernel_z]. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * @param[out] output Destination tensor. 4 lower dimensions represent a single output [OFM, width, height, depth], while the rest represent batch of outputs. + * @param[in] conv_info Contains padding, stride, acitvation information described in @ref Conv3dInfo. + */ + void configure( + ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv3dInfo &conv_info); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to NEConv3D::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const Conv3dInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECONV3D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h index 42f787090e..dc6b22d717 100644 --- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,31 +24,49 @@ #ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H #define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H -#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/ITransformWeights.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ +/** Basic function to run @ref cpu::kernels::CpuConvertFullyConnectedWeightsKernel. */ class NEConvertFullyConnectedWeights : public IFunction { public: /** Default constructor */ NEConvertFullyConnectedWeights(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeights(const NEConvertFullyConnectedWeights &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvertFullyConnectedWeights &operator=(const NEConvertFullyConnectedWeights &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvertFullyConnectedWeights(NEConvertFullyConnectedWeights &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvertFullyConnectedWeights &operator=(NEConvertFullyConnectedWeights &&) = delete; + /** Default destructor */ + ~NEConvertFullyConnectedWeights(); /** Initialize the function. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. */ - void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + void + configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights * * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. @@ -58,53 +76,17 @@ public: * * @return A Status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const TensorShape &original_input_shape, + DataLayout data_layout); // Inherited methods overriden: void run() override; private: - NEConvertFullyConnectedWeightsKernel _kernel; -}; - -namespace weights_transformations -{ -/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ -class NEConvertFullyConnectedWeightsManaged : public ITransformWeights -{ -public: - void run() override - { - _output.allocator()->allocate(); - _func.run(); - _reshape_run = true; - } - - void release() override - { - _output.allocator()->free(); - } - - ITensor *get_weights() override - { - return &_output; - } - - uint32_t uid() override - { - return _uid; - } - - void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout) - { - _func.configure(input, &_output, original_input_shape, data_layout); - } - -private: - static constexpr uint32_t _uid = 0x4; - Tensor _output{}; - NEConvertFullyConnectedWeights _func{}; + struct Impl; + std::unique_ptr<Impl> _impl; }; -} // namespace weights_transformations } // namespace arm_compute #endif /* ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h deleted file mode 100644 index c297589013..0000000000 --- a/arm_compute/runtime/NEON/functions/NEConvolution.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVOLUTION_H -#define ARM_COMPUTE_NECONVOLUTION_H - -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEConvolution3x3Kernel - * - */ -class NEConvolution3x3 : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8/S16. - * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); -}; - -/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEConvolutionKernel or<br/> - * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable) - * - */ -template <unsigned int matrix_size> -class NEConvolutionSquare : public IFunction -{ -public: - /** Default constructor */ - NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8 or S16. - * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function memory group */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - bool _is_separable; /**< true if the convolution can be separated */ - NESeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */ - NESeparableConvolutionVertKernel<matrix_size> _kernel_vert; /**< kernel for vertical pass of separated convolution */ - NEConvolutionKernel<matrix_size> _kernel; /**< kernel for non-separated convolution **/ - NEFillBorderKernel _border_handler; /**< kernel for border handling */ -}; - -/** Basic function to run 5x5 convolution. */ -using NEConvolution5x5 = NEConvolutionSquare<5>; -/** Basic function to run 7x7 convolution. */ -using NEConvolution7x7 = NEConvolutionSquare<7>; -/** Basic function to run 9x9 convolution. */ -using NEConvolution9x9 = NEConvolutionSquare<9>; - -/** Basic function to execute non-square convolution. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEConvolutionRectangleKernel or<br/> - * - * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 - */ -class NEConvolutionRectangle : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8 or S16. - * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. - * @param[in] rows Rows of convolution kernel. - * @param[in] cols Columns of convolution kernel. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NECONVOLUTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index b76695b80c..2d07980ade 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,27 +21,26 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NECONVOLUTIONLAYER_H -#define ARM_COMPUTE_NECONVOLUTIONLAYER_H - -#include "arm_compute/runtime/IFunction.h" +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" + #include <memory> namespace arm_compute { +// Forward declarations class ITensor; -/** Basic function to simulate a convolution layer. This function calls one of the following NEON functions: - * -# @ref NEGEMMConvolutionLayer (executed only in case GEMM is required for the operation) - * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation) - * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation) +/** Basic function to simulate a convolution layer. This function calls one of the following functions: + * -# @ref cpu::CpuGemmConv2d (executed only in case GEMM is required for the operation) + * -# @ref cpu::CpuWinogradConv2d (executed only in case Winograd is required for the operation) + * -# @ref cpu::CpuDirectConv2d (executed only in case Direct Convolution is required for the operation) * -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation) * * @@ -75,41 +74,74 @@ class NEConvolutionLayer : public IFunction public: /** Constructor */ NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayer(const NEConvolutionLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete; + /** Default move constructor */ + NEConvolutionLayer(NEConvolutionLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConvolutionLayer &operator=(NEConvolutionLayer &&) = default; + /** Default destructor */ + ~NEConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation @@ -118,20 +150,28 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, - unsigned int num_groups = 1); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation @@ -139,15 +179,21 @@ public: * * @return the Convolution Method Hint */ - static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + static ConvolutionMethod get_convolution_method(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; void prepare() override; private: - std::shared_ptr<IMemoryManager> _memory_manager; - std::unique_ptr<IFunction> _function; /**< Function to run */ + struct Impl; + std::unique_ptr<Impl> _impl; }; -} -#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */
\ No newline at end of file +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h index b03f408ad8..840c03e968 100644 --- a/arm_compute/runtime/NEON/functions/NECopy.h +++ b/arm_compute/runtime/NEON/functions/NECopy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,18 +25,41 @@ #define ARM_COMPUTE_NECOPY_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NECopyKernel */ -class NECopy : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuCopyKernel */ +class NECopy : public IFunction { public: + /** Default Constructor */ + NECopy(); + /** Default Destructor */ + ~NECopy(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECopy(const NECopy &) = delete; + /** Default move constructor */ + NECopy(NECopy &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECopy &operator=(const NECopy &) = delete; + /** Default move assignment operator */ + NECopy &operator=(NECopy &&); /** Initialise the function's source and destination. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. * @@ -50,6 +73,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NECOPY_H */ diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h index 1c15beded4..f806762158 100644 --- a/arm_compute/runtime/NEON/functions/NECropResize.h +++ b/arm_compute/runtime/NEON/functions/NECropResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEON_CROP_RESIZE_H #define ARM_COMPUTE_NEON_CROP_RESIZE_H -#include "arm_compute/core/NEON/kernels/NECropKernel.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" #include <memory> @@ -32,7 +31,9 @@ namespace arm_compute { // Forward Declarations +class Tensor; class ITensor; +class NECropKernel; /** Function to perform cropping and resizing */ class NECropResize : public IFunction @@ -49,10 +50,18 @@ public: /** Allow instances of this class to be moved */ NECropResize &operator=(NECropResize &&) = default; /** Default destructor */ - virtual ~NECropResize() = default; + ~NECropResize(); /** Configure kernel * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------|:--------|:------|:--------| + * |All |F32 |F32 |F32 | + * * @note Supported tensor rank: up to 4 * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used. * @note Start and end indices of boxes are inclusive. @@ -66,8 +75,13 @@ public: * @param[in] method The policy to be used when resizing image. Default is bilinear. * @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0. */ - void configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size, - InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0); + void configure(const ITensor *input, + const ITensor *boxes, + const ITensor *box_ind, + ITensor *output, + Coordinates2D crop_size, + InterpolationPolicy method = InterpolationPolicy::BILINEAR, + float extrapolation_value = 0); /** Static function to check if given info will lead to a valid configuration of @ref NESlice * @@ -87,8 +101,13 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output, - Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value); + static Status validate(const ITensorInfo *input, + const ITensorInfo *boxes, + const ITensorInfo *box_ind, + const ITensorInfo *output, + Coordinates2D crop_size, + InterpolationPolicy method, + float extrapolation_value); void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index e2ed0e0abc..aabe42f928 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,15 +24,14 @@ #ifndef ARM_COMPUTE_NEDECONVOLUTIONLAYER_H #define ARM_COMPUTE_NEDECONVOLUTIONLAYER_H -#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEReverse.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReverse.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -64,11 +63,10 @@ namespace arm_compute * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse. * - * This function calls the following NEON kernels/functions: + * This function calls the following kernels/functions: * * -# @ref CPPUpsample * -# @ref NEConvolutionLayer - * -# @ref NEPermute * -# @ref NEReverse * */ @@ -77,39 +75,77 @@ class NEDeconvolutionLayer : public IFunction public: /** Constructor */ NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete; + /** Default move constructor */ + NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete; - /** Allow instances of this class to be moved */ - NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default; - /** Allow instances of this class to be moved */ + /** Default move assignment operator */ NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default; /** Default destructor */ - virtual ~NEDeconvolutionLayer() = default; + ~NEDeconvolutionLayer() = default; /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. + * Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. + * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for + * the GEMM convolution. * */ - void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *bias, + ITensor *output, + const PadStrideInfo &info, + bool enable_fast_math = false, + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. + * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for + * the GEMM convolution. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *bias, + const ITensorInfo *output, + const PadStrideInfo &info, + bool enable_fast_math = false, + const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: void run() override; @@ -127,6 +163,7 @@ private: ITensor *_input; PadStrideInfo _info; bool _is_prepared; + bool _do_upsampling; }; -} // arm_compute +} // namespace arm_compute #endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index b784480887..7bfdfbd13d 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,35 +25,48 @@ #define ARM_COMPUTE_NEDEPTHCONVERT_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" -#include <cstdint> +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/**Basic function to run @ref NEDepthConvertLayerKernel */ -class NEDepthConvertLayer : public INESimpleFunctionNoBorder +/**Basic function to run @ref cpu::kernels::CpuCastKernel */ +class NEDepthConvertLayer : public IFunction { public: - /* Contructor */ - NEDepthConvertLayer() = default; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ + /** Constructor */ + NEDepthConvertLayer(); + /** Destructor */ + ~NEDepthConvertLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDepthConvertLayer(const NEDepthConvertLayer &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete; + /** Default move constructor */ + NEDepthConvertLayer(NEDepthConvertLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete; + /** Default move assignment operator */ + NEDepthConvertLayer &operator=(NEDepthConvertLayer &&); /** Initialize the function's source, destination * - * Valid conversions Input -> Output : + * Valid data layouts: + * - All * - * - QASYMM8 -> F16, F32 - * - U8 -> U16, S16, S32 - * - U16 -> U8, U32 - * - S16 -> U8, S32 - * - BFLOAT16 -> F32 - * - F16 -> QASYMM8, F32 - * - F32 -> QASYMM8, F16, BFLOAT16 + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------------------| + * |QASYMM8 | F16, F32 | + * |U8 | U16, S16, S32 | + * |U16 | U8, U32 | + * |S16 | U8, S32 | + * |BFLOAT16 | F32 | + * |F16 | QASYMM8, F32 | + * |F32 | QASYMM8, F16, BFLOAT16 | + * + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. @@ -70,7 +83,15 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEDEPTHCONVERT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h index 3c21d1a33a..d27369670e 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,26 +21,48 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYER_H -#define ARM_COMPUTE_NEDEPTHTOSPACELAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include <memory> namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; +class NEDepthToSpaceLayerKernel; /** Basic function to run @ref NEDepthToSpaceLayerKernel. */ -class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder +class NEDepthToSpaceLayer : public IFunction { public: + /** Constructor */ + NEDepthToSpaceLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayer &operator=(const NEDepthToSpaceLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDepthToSpaceLayer(NEDepthToSpaceLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete; + /** Default destructor */ + ~NEDepthToSpaceLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. @@ -55,6 +77,11 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + void run() override; + +private: + std::unique_ptr<NEDepthToSpaceLayerKernel> _kernel; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index 811dc82843..6ad5aa7bfa 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,18 +24,18 @@ #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" + +#include <memory> namespace arm_compute { // Forward declarations class ITensor; +class NEDepthwiseConvolutionLayerNativeKernel; /** Function to execute a depthwise convolution. */ @@ -52,8 +52,24 @@ public: NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete; /** Default move assignment operator */ NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayer(); /** Initialize the function's source, destination, weights and convolution information. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. @@ -65,8 +81,14 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer * @@ -83,40 +105,27 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; void prepare() override; private: - /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a Depthwise Convolution Function - */ - static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); - - /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels: + /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels: * * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported * * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present - * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present + * -# @ref cpu::CpuDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required * -# @ref NEActivationLayer if fused activation is required * @@ -131,9 +140,13 @@ private: /** Default move constructor */ NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; + NEDepthwiseConvolutionLayerOptimizedInternal & + operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; /** Default move assignment operator */ - NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; + NEDepthwiseConvolutionLayerOptimizedInternal & + operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayerOptimizedInternal() = default; /** Initialize the function's source, destination, kernels and border_size. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). @@ -146,8 +159,14 @@ private: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3 * @@ -163,71 +182,26 @@ private: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; void prepare() override; private: - /** Configure the kernels/functions for the generic pipeline. - * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - */ - void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U)); - /** Configure the kernels/functions for the optimized pipeline. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info Activation layer information in case of a fused activation. - */ - void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U)); - /** Run generic kernel */ - void run_generic(); - /** Run optimized function */ - void run_optimized(); - - MemoryGroup _memory_group; - NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel; - NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func; - NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; - NEFillBorderKernel _border_handler; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - const ITensor *_original_weights; - bool _has_bias; - bool _is_quantized; - bool _is_optimized; - bool _is_nchw; - bool _permute; - bool _is_activationlayer_enabled; - bool _is_prepared; + MemoryGroup _memory_group; + struct Impl; + std::unique_ptr<Impl> _impl; }; - /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel: + /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: * * -# @ref NEDepthwiseConvolutionLayerNativeKernel * @@ -245,6 +219,8 @@ private: NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete; /** Default move assignment operator */ NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionLayerGeneric() = default; /** Initialize the function's source, destination, weights and convolution information. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). @@ -258,8 +234,14 @@ private: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric * @@ -276,32 +258,25 @@ private: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; - void prepare() override; private: - NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel; - NEFillBorderKernel _fill_border; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_prepared; - bool _is_nchw; - bool _is_activationlayer_enabled; - const ITensor *_original_weights; + struct Impl; + std::unique_ptr<Impl> _impl; }; - - DepthwiseConvolutionFunction _depth_conv_func; - NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized; - NEDepthwiseConvolutionLayerGeneric _func_generic; + MemoryGroup _memory_group; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
\ No newline at end of file +#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index f8d0ce8b2d..8b49930ef5 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,33 +24,66 @@ #ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H #define ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */ -class NEDequantizationLayer : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::CpuDequantize that dequantizes an input tensor */ +class NEDequantizationLayer : public IFunction { public: + /** Default Constructor */ + NEDequantizationLayer(); + /** Default Destructor */ + ~NEDequantizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayer(const NEDequantizationLayer &) = delete; + /** Default move constructor */ + NEDequantizationLayer(NEDequantizationLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayer &operator=(const NEDequantizationLayer &) = delete; + /** Default move assignment operator */ + NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default; /** Configure the kernel. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------------------|:-----------| + * |QASYMM8 |F16, F32 | + * |QASYMM8_SIGNED |F16, F32 | + * |QSYMM8_PER_CHANNEL |F16, F32 | + * |QSYMM8 |F16, F32 | + * |QSYMM16 |F16, F32 | + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayer * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data type supported: F16/F32. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h deleted file mode 100644 index 65d0654612..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDerivative.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDERIVATIVE_H -#define ARM_COMPUTE_NEDERIVATIVE_H - -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute first order derivative operator. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEDerivativeKernel - * - */ -class NEDerivative : public IFunction -{ -public: - /** Default constructor */ - NEDerivative(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16. - * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16. - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - NEDerivativeKernel _kernel; /**< Derivative kernel */ - NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /* ARM_COMPUTE_NEDERIVATIVE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h index d616762a5a..7a94833d10 100644 --- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,12 @@ #ifndef ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H #define ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/functions/CPPDetectionPostProcessLayer.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/runtime/Tensor.h" #include <map> @@ -53,11 +52,23 @@ public: NEDetectionPostProcessLayer(const NEDetectionPostProcessLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete; + /** Default destructor */ + ~NEDetectionPostProcessLayer() = default; /** Configure the detection output layer NE function * - * @param[in] input_box_encoding The bounding box input tensor. Data types supported: F32, QASYMM8. - * @param[in] input_score The class prediction input tensor. Data types supported: Same as @p input_box_encoding. - * @param[in] input_anchors The anchors input tensor. Data types supported: Same as @p input_box_encoding. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src2 |dst0 - dst3 | + * |:--------------|:--------------| + * |QASYMM8 |F32 | + * |QASYMM8_SIGNED |F32 | + * |F32 |F32 | + * + * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. + * @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding. + * @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding. * @param[out] output_boxes The boxes output tensor. Data types supported: F32. * @param[out] output_classes The classes output tensor. Data types supported: Same as @p output_boxes. * @param[out] output_scores The scores output tensor. Data types supported: Same as @p output_boxes. @@ -66,23 +77,34 @@ public: * * @note Output contains all the detections. Of those, only the ones selected by the valid region are valid. */ - void configure(const ITensor *input_box_encoding, const ITensor *input_score, const ITensor *input_anchors, - ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); + void configure(const ITensor *input_box_encoding, + const ITensor *input_score, + const ITensor *input_anchors, + ITensor *output_boxes, + ITensor *output_classes, + ITensor *output_scores, + ITensor *num_detection, + DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDetectionPostProcessLayer * - * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: F32, QASYMM8. - * @param[in] input_class_score The class prediction input tensor info. Data types supported: F32, QASYMM8. - * @param[in] input_anchors The anchors input tensor. Data types supported: F32, QASYMM8. - * @param[out] output_boxes The output tensor. Data types supported: F32. - * @param[out] output_classes The output tensor. Data types supported: Same as @p output_boxes. - * @param[out] output_scores The output tensor. Data types supported: Same as @p output_boxes. - * @param[out] num_detection The number of output detection. Data types supported: Same as @p output_boxes. - * @param[in] info (Optional) DetectionPostProcessLayerInfo information. + * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. + * @param[in] input_class_score The class prediction input tensor info. Data types supported: same as @p input_box_encoding. + * @param[in] input_anchors The anchors input tensor info. Data types supported: same as @p input_box_encoding. + * @param[in] output_boxes The output tensor info. Data types supported: F32. + * @param[in] output_classes The output tensor info. Data types supported: Same as @p output_boxes. + * @param[in] output_scores The output tensor info. Data types supported: Same as @p output_boxes. + * @param[in] num_detection The number of output detection tensor info. Data types supported: Same as @p output_boxes. + * @param[in] info (Optional) DetectionPostProcessLayerInfo information. * * @return a status */ - static Status validate(const ITensorInfo *input_box_encoding, const ITensorInfo *input_class_score, const ITensorInfo *input_anchors, - ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection, + static Status validate(const ITensorInfo *input_box_encoding, + const ITensorInfo *input_class_score, + const ITensorInfo *input_anchors, + ITensorInfo *output_boxes, + ITensorInfo *output_classes, + ITensorInfo *output_scores, + ITensorInfo *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h deleted file mode 100644 index 39a37af74c..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDilate.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDILATE_H -#define ARM_COMPUTE_NEDILATE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute dilate. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEDilateKernel - * - */ -class NEDilate : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and border mode. - * - * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEDILATE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h index 68454be88f..3ae3b2a15c 100644 --- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,35 +24,51 @@ #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H #define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { +class ITensor; +class ITensorInfo; /** Function to run the direct convolution. * - * This function calls the following NEON kernels: + * This function calls the following: * - * -# @ref NEFillBorderKernel for the input - * -# @ref NEDirectConvolutionLayerOutputStageKernel - * -# @ref NEDirectConvolutionLayerKernel + * -# @ref cpu::CpuDirectConv2d */ class NEDirectConvolutionLayer : public IFunction { public: /** Constructor */ NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayer(const NEDirectConvolutionLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayer &operator=(const NEDirectConvolutionLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDirectConvolutionLayer(NEDirectConvolutionLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEDirectConvolutionLayer &operator=(NEDirectConvolutionLayer &&) = delete; + /** Default destructor */ + ~NEDirectConvolutionLayer(); /** Set the input, weights, biases and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * * @note: DirectConvolution only works in the following configurations: * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 @@ -69,7 +85,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *bias, + ITensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer * * @note: DirectConvolution only works in the following configurations: @@ -90,22 +111,20 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info, + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *bias, + const ITensorInfo *output, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; - NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; - NEDirectConvolutionLayerKernel _conv_kernel; - NEFillBorderKernel _input_border_handler; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - bool _has_bias; - bool _is_activationlayer_enabled; - unsigned int _dim_split; + struct Impl; + std::shared_ptr<IMemoryManager> _memory_manager; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index cac105cdb9..ebf2277d1f 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,29 +25,59 @@ #define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { class ITensor; -/** Basic function to run @ref NEArithmeticOperationKernel for max +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a max operation between two tensors. */ -class NEElementwiseMax : public INESimpleFunction +class NEElementwiseMax : public IFunction { public: + /** Default Constructor */ + NEElementwiseMax(); + /** Default Destructor */ + ~NEElementwiseMax(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseMax(const NEElementwiseMax &) = delete; + /** Default move constructor */ + NEElementwiseMax(NEElementwiseMax &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseMax &operator=(const NEElementwiseMax &) = delete; + /** Default move assignment operator */ + NEElementwiseMax &operator=(NEElementwiseMax &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -56,26 +86,64 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for min +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a min operation between two tensors. */ -class NEElementwiseMin : public INESimpleFunction +class NEElementwiseMin : public IFunction { public: + /** Default Constructor */ + NEElementwiseMin(); + /** Default Destructor */ + ~NEElementwiseMin(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseMin(const NEElementwiseMin &) = delete; + /** Default move constructor */ + NEElementwiseMin(NEElementwiseMin &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseMin &operator=(const NEElementwiseMin &) = delete; + /** Default move assignment operator */ + NEElementwiseMin &operator=(NEElementwiseMin &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -84,26 +152,64 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for squared difference +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 */ -class NEElementwiseSquaredDiff : public INESimpleFunction +class NEElementwiseSquaredDiff : public IFunction { public: + /** Default Constructor */ + NEElementwiseSquaredDiff(); + /** Default Destructor */ + ~NEElementwiseSquaredDiff(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseSquaredDiff(const NEElementwiseSquaredDiff &) = delete; + /** Default move constructor */ + NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseSquaredDiff &operator=(const NEElementwiseSquaredDiff &) = delete; + /** Default move assignment operator */ + NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -112,26 +218,60 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for division +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division * * @note The tensor data type for the inputs must be F16/F32. * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i]) */ -class NEElementwiseDivision : public INESimpleFunction +class NEElementwiseDivision : public IFunction { public: + /** Default Constructor */ + NEElementwiseDivision(); + /** Default Destructor */ + ~NEElementwiseDivision(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseDivision(const NEElementwiseDivision &) = delete; + /** Default move constructor */ + NEElementwiseDivision(NEElementwiseDivision &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseDivision &operator=(const NEElementwiseDivision &) = delete; + /** Default move assignment operator */ + NEElementwiseDivision &operator=(NEElementwiseDivision &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division * * @param[in] input1 First tensor input info. Data types supported: F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -140,27 +280,61 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for power +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power * * @note The tensor data type for the inputs must be F16/F32. * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) * @note For an exponent that is a float, this function will only work with a positive base. */ -class NEElementwisePower : public INESimpleFunction +class NEElementwisePower : public IFunction { public: + /** Default Constructor */ + NEElementwisePower(); + /** Default Destructor */ + ~NEElementwisePower(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwisePower(const NEElementwisePower &) = delete; + /** Default move constructor */ + NEElementwisePower(NEElementwisePower &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwisePower &operator=(const NEElementwisePower &) = delete; + /** Default move assignment operator */ + NEElementwisePower &operator=(NEElementwisePower &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power * * @param[in] input1 First tensor input info. Data types supported: F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -169,62 +343,125 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEComparisonOperationKernel. +/** Basic function to run @ref cpu::kernels::CpuComparisonKernel. * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ -class NEElementwiseComparison : public INESimpleFunction +class NEElementwiseComparison : public IFunction { public: + /** Default Constructor */ + NEElementwiseComparison(); + /** Default Destructor */ + ~NEElementwiseComparison(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseComparison(const NEElementwiseComparison &) = delete; + /** Default move constructor */ + NEElementwiseComparison(NEElementwiseComparison &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseComparison &operator=(const NEElementwiseComparison &) = delete; + /** Default move assignment operator */ + NEElementwiseComparison &operator=(NEElementwiseComparison &&); /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:-----| + * |QASYMM8 |QASYMM8 |U8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 | + * |S32 |S32 |U8 | + * |U8 |U8 |U8 | + * |S16 |S16 |U8 | + * |F16 |F16 |U8 | + * |F32 |F32 |U8 | + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[out] output Output tensor. Data types supported: U16/U32. + * @param[out] output Output tensor. Data types supported: U8. * @param[in] op Comparison Operation to be performed. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. + * @param[in] output Output tensor info. Data types supported: U8. * @param[in] op Comparison Operation to be performed. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op); + static Status + validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEComparisonOperationKernel +/** Basic function to run @ref cpu::kernels::CpuComparisonKernel * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. */ template <ComparisonOperation op> -class NEElementwiseComparisonStatic : public INESimpleFunction +class NEElementwiseComparisonStatic : public IFunction { public: + /** Default Constructor */ + NEElementwiseComparisonStatic(); + /** Default Destructor */ + ~NEElementwiseComparisonStatic(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseComparisonStatic(const NEElementwiseComparisonStatic &) = delete; + /** Default move constructor */ + NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseComparisonStatic &operator=(const NEElementwiseComparisonStatic &) = delete; + /** Default move assignment operator */ + NEElementwiseComparisonStatic &operator=(NEElementwiseComparisonStatic &&); /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U16/U32. */ void configure(ITensor *input1, ITensor *input2, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: U16/U32. * * @return a status */ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; /** Basic function to run equal comparison. */ @@ -239,5 +476,6 @@ using NEGreaterEqual = NEElementwiseComparisonStatic<ComparisonOperation::Greate using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>; /** Basic function to run less-equal comparison. */ using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>; + } // namespace arm_compute #endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index 094f875b35..63e47b8377 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,150 +24,73 @@ #ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; - -/** Basic function to perform inverse square root on an input tensor. */ -class NERsqrtLayer : public INESimpleFunction -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Basic function to perform exponential on an input tensor. */ -class NEExpLayer : public INESimpleFunction +class ITensorInfo; +/** Basic function to perform unary elementwise operations */ +template <ElementWiseUnary op> +class NEElementwiseUnaryLayer : public IFunction { public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEExpLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; + /** Default Constructor */ + NEElementwiseUnaryLayer(); + /** Default Destructor */ + ~NEElementwiseUnaryLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryLayer(const NEElementwiseUnaryLayer &) = delete; + /** Default move constructor */ + NEElementwiseUnaryLayer(NEElementwiseUnaryLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryLayer &operator=(const NEElementwiseUnaryLayer &) = delete; + /** Default move assignment operator */ + NEElementwiseUnaryLayer &operator=(NEElementwiseUnaryLayer &&); -/** Basic function to negate an input tensor. */ -class NENegLayer : public INESimpleFunction -{ -public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NENegLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. + * Valid data layouts: + * - All * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Basic function to compute the natural logarithm of an input tensor. */ -class NELogLayer : public INESimpleFunction -{ -public: - /** Initialize the function + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. + * @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[out] output Output tensor. Data types supported: Same as @p input. */ void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELogLayer + /** Static function to check if given info will lead to a valid configuration * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. + * @param[in] input Input tensor info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. * @param[in] output Output tensor info. Data types supported: Same as @p input. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; + // Inherited methods overridden: + void run() override; -/** Basic function to compute the absolute value of an input tensor. */ -class NEAbsLayer : public INESimpleFunction -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEAbsLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to compute the round value elementwise of an input tensor. */ -class NERoundLayer : public INESimpleFunction -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NERoundLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; +using NERsqrtLayer = NEElementwiseUnaryLayer<ElementWiseUnary::RSQRT>; +using NEExpLayer = NEElementwiseUnaryLayer<ElementWiseUnary::EXP>; +using NENegLayer = NEElementwiseUnaryLayer<ElementWiseUnary::NEG>; +using NELogLayer = NEElementwiseUnaryLayer<ElementWiseUnary::LOG>; +using NEAbsLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ABS>; +using NERoundLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ROUND>; +using NESinLayer = NEElementwiseUnaryLayer<ElementWiseUnary::SIN>; -/** Basic function to compute the sine of an input tensor. */ -class NESinLayer : public INESimpleFunction -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NESinLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; } // namespace arm_compute #endif /* ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h deleted file mode 100644 index e9d58f3e0c..0000000000 --- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H -#define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H - -#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" -#include "arm_compute/runtime/Distribution1D.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/Lut.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute histogram equalization. This function calls the following NEON kernels: - * - * -# @ref NEHistogramKernel - * -# @ref NECumulativeDistributionKernel - * -# @ref NETableLookupKernel - * - */ -class NEEqualizeHistogram : public IFunction -{ -public: - /** Default Constructor. */ - NEEqualizeHistogram(); - /** Initialise the kernel's inputs. - * - * @note Currently the width of the input image must be a multiple of 16. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] output Output image. Data type supported: same as @p input - */ - void configure(const IImage *input, IImage *output); - - // Inherited methods overridden: - void run() override; - -private: - NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ - NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution - and creates the relevant LookupTable. */ - NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ - Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ - Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ - Lut _cd_lut; /**< Holds the equalization lookuptable. */ - static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ - static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ -}; -} -#endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h deleted file mode 100644 index 1d6ea42e16..0000000000 --- a/arm_compute/runtime/NEON/functions/NEErode.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEERODE_H -#define ARM_COMPUTE_NEERODE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute erode. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEErodeKernel - * - */ -class NEErode : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and border mode - * - * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEERODE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h index c710b937b4..99c6fd4eb4 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT1D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,21 +24,22 @@ #ifndef ARM_COMPUTE_NEFFT1D_H #define ARM_COMPUTE_NEFFT1D_H -#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h" -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" +#include <memory> + namespace arm_compute { // Forward declaration class ITensor; +class NEFFTDigitReverseKernel; +class NEFFTRadixStageKernel; +class NEFFTScaleKernel; -/** Basic function to execute one dimensional FFT. This function calls the following NEON kernels: +/** Basic function to execute one dimensional FFT. This function calls the following kernels: * * -# @ref NEFFTDigitReverseKernel Performs digit reverse * -# @ref NEFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition @@ -49,8 +50,26 @@ class NEFFT1D : public IFunction public: /** Default Constructor */ NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT1D(const NEFFT1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT1D &operator=(const NEFFT1D &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT1D(NEFFT1D &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT1D &operator=(NEFFT1D &&) = delete; + /** Default destructor */ + ~NEFFT1D(); /** Initialise the function's source and destinations. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex. @@ -71,15 +90,15 @@ public: void run() override; protected: - MemoryGroup _memory_group; - NEFFTDigitReverseKernel _digit_reverse_kernel; - std::vector<NEFFTRadixStageKernel> _fft_kernels; - NEFFTScaleKernel _scale_kernel; - Tensor _digit_reversed_input; - Tensor _digit_reverse_indices; - unsigned int _num_ffts; - unsigned int _axis; - bool _run_scale; + MemoryGroup _memory_group; + std::unique_ptr<NEFFTDigitReverseKernel> _digit_reverse_kernel; + std::vector<std::unique_ptr<NEFFTRadixStageKernel>> _fft_kernels; + std::unique_ptr<NEFFTScaleKernel> _scale_kernel; + Tensor _digit_reversed_input; + Tensor _digit_reverse_indices; + unsigned int _num_ffts; + unsigned int _axis; + bool _run_scale; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEFFT1D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h index e25ebb9e80..cefd3df17a 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT2D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NEFFT2D_H #define ARM_COMPUTE_NEFFT2D_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEFFT1D.h" #include "arm_compute/runtime/Tensor.h" @@ -36,7 +35,7 @@ namespace arm_compute // Forward declaration class ITensor; -/** Basic function to execute two dimensional FFT. This function calls the following NEON kernels: +/** Basic function to execute two dimensional FFT. This function calls the following kernels: * * -# @ref NEFFT1D 1D FFT is performed on the first given axis * -# @ref NEFFT1D 1D FFT is performed on the second given axis @@ -46,8 +45,26 @@ class NEFFT2D : public IFunction public: /** Default Constructor */ NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT2D(const NEFFT2D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFFT2D &operator=(const NEFFT2D &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT2D(NEFFT2D &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFT2D &operator=(NEFFT2D &&) = delete; + /** Default destructor */ + ~NEFFT2D(); /** Initialise the function's source and destinations * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: F32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] config FFT related configuration diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h index 23df459e04..84bfe6b02f 100644 --- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H #define ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEFFT2D.h" @@ -43,7 +42,7 @@ namespace arm_compute // Forward declarations class ITensor; -/** Basic function to execute FFT-based convolution on NEON. This function calls the following NEON functions/kernels: +/** Basic function to execute FFT-based convolution on CPU. This function calls the following functions/kernels: * * -# @ref NEPermute Permute input if NHWC(only NCHW is supported). * -# @ref NEPadLayer Pad input. @@ -63,46 +62,68 @@ public: NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFFTConvolutionLayer(const NEFFTConvolutionLayer &) = delete; - /** Default move constructor */ - NEFFTConvolutionLayer(NEFFTConvolutionLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFTConvolutionLayer(NEFFTConvolutionLayer &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFFTConvolutionLayer &operator=(const NEFFTConvolutionLayer &) = delete; - /** Default move assignment operator */ - NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = delete; + /** Default destructor */ + ~NEFFTConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); /** Static function to check if given info will lead to a valid configuration of @ref NEFFTConvolutionLayer * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input - * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h deleted file mode 100644 index e2decb177b..0000000000 --- a/arm_compute/runtime/NEON/functions/NEFastCorners.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFASTCORNERS_H -#define ARM_COMPUTE_NEFASTCORNERS_H - -#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute fast corners. This function call the following NEON kernels: - * - * -# @ref NEFastCornersKernel - * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) - * -# @ref NEFillArrayKernel - * - */ -class NEFastCorners : public IFunction -{ -public: - /** Constructor */ - NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. - * @param[out] corners Array of keypoints to store the results. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEFastCornersKernel _fast_corners_kernel; - NEFillBorderKernel _border_handler; - NENonMaximaSuppression3x3Kernel _nonmax_kernel; - NEFillArrayKernel _fill_kernel; - Image _output; - Image _suppressed; - bool _non_max; -}; -} -#endif /*ARM_COMPUTE_NEFASTCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h index f8a15078c3..1829c71fef 100644 --- a/arm_compute/runtime/NEON/functions/NEFill.h +++ b/arm_compute/runtime/NEON/functions/NEFill.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,25 +24,53 @@ #ifndef ARM_COMPUTE_NEFILL_H #define ARM_COMPUTE_NEFILL_H -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; -/** Basic function to run @ref NEMemsetKernel */ -class NEFill : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuFillKernel */ +class NEFill : public IFunction { public: + /** Default Constructor */ + NEFill(); + /** Default Destructor */ + ~NEFill(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFill(const NEFill &) = delete; + /** Default move constructor */ + NEFill(NEFill &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFill &operator=(const NEFill &) = delete; + /** Default move assignment operator */ + NEFill &operator=(NEFill &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in,out] tensor Source tensor. Data types supported: All * @param[in] constant_value Constant value to use to fill tensor. */ void configure(ITensor *tensor, PixelValue constant_value); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_FILL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h index 0ae04cbf00..44b1d4a62b 100644 --- a/arm_compute/runtime/NEON/functions/NEFillBorder.h +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,22 +24,33 @@ #ifndef ARM_COMPUTE_NEFILLBORDER_H #define ARM_COMPUTE_NEFILLBORDER_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include <memory> + namespace arm_compute { // Forward declaration class ITensor; +class NEFillBorderKernel; /** Basic function to run @ref NEFillBorderKernel */ class NEFillBorder : public IFunction { public: + NEFillBorder(); /** Initialize the function's source, destination and border_mode. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @note This function fills the borders within the XY-planes. * * @param[in, out] input Source tensor. Data type supported: All @@ -47,13 +58,16 @@ public: * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + void configure(ITensor *input, + unsigned int border_width, + BorderMode border_mode, + const PixelValue &constant_border_value = PixelValue()); // Inherited methods overridden: void run() override; private: - NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */ }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEFILLBORDER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 7b4801cd1c..3e92143824 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,18 +25,40 @@ #define ARM_COMPUTE_NEFLATTENLAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to execute flatten layer kernel. */ -class NEFlattenLayer : public INESimpleFunctionNoBorder +class NEFlattenLayer : public IFunction { public: + NEFlattenLayer(); + /** Destructor */ + ~NEFlattenLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayer(const NEFlattenLayer &) = delete; + /** Default move constructor */ + NEFlattenLayer(NEFlattenLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayer &operator=(const NEFlattenLayer &) = delete; + /** Default move assignment operator */ + NEFlattenLayer &operator=(NEFlattenLayer &&); /** Initialise the kernel's input and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input @@ -53,6 +75,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h index a11907ba6c..77ac484bab 100644 --- a/arm_compute/runtime/NEON/functions/NEFloor.h +++ b/arm_compute/runtime/NEON/functions/NEFloor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,20 +24,44 @@ #ifndef ARM_COMPUTE_NEFLOOR_H #define ARM_COMPUTE_NEFLOOR_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { +// Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEFloorKernel */ -class NEFloor : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuFloorKernel */ +class NEFloor : public IFunction { public: + /** Constructor */ + NEFloor(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloor(const NEFloor &) = delete; + /** Default move constructor */ + NEFloor(NEFloor &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloor &operator=(const NEFloor &) = delete; + /** Default move assignment operator */ + NEFloor &operator=(NEFloor &&); + /** Destructor */ + ~NEFloor(); /** Set the source, destination of the kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input Source tensor. Data type supported: F16/F32. * @param[out] output Destination tensor. Same as @p input */ @@ -50,6 +74,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEFLOOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index b14650c0e9..885f8430cf 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,46 +24,20 @@ #ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H #define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H +#include "arm_compute/function_info/FullyConnectedLayerInfo.h" #include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/Tensor.h" +#include <memory> + namespace arm_compute { -/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: - * - * -# @ref NETransposeKernel - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights - * - * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - namespace weights_transformations { -/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */ +/** Basic function to manage the reshape weights generated from @ref NETranspose */ class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights { public: @@ -95,17 +69,17 @@ public: } private: - static constexpr uint32_t _uid = 0x0; - Tensor _output{}; - NEFullyConnectedLayerReshapeWeights _func{}; + static constexpr uint32_t _uid = 0x0; + Tensor _output{}; + NETranspose _func{}; }; } // namespace weights_transformations -/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) - * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) +/** Basic function to compute a Fully Connected layer. This function calls the following kernels: + * -# @ref cpu::kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref NETranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) + * -# @ref NEGEMM or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) + * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel or @ref NEGEMMLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -113,77 +87,91 @@ class NEFullyConnectedLayer : public IFunction { public: /** Constructor */ - NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, + IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete; - /** Default move constructor */ - NEFullyConnectedLayer(NEFullyConnectedLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NEFullyConnectedLayer(NEFullyConnectedLayer &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete; - /** Default move assignment operator */ - NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = delete; + /** Default destructor */ + ~NEFullyConnectedLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. + * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. + * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. + * Data type supported: Same as @p input. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: + * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer + * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. + * Data type supported: Same as @p input. + * @param[in] fc_info (Optional) Fully connected layer additional info + * @param[in] weights_info (Optional) Stores neccessary compute information when weights are already reshaped */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info + * Similar to @ref NEFullyConnectedLayer::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), + const WeightsInfo &weights_info = WeightsInfo()); + + /** Static function that queries whether fixed-format kernel exists for a given problem description + * + * @param[out] expected_weight_format Format in which weights should be for found fixed format kernel + * @param[in] input Source tensor + * @param[in] weights Weights tensor. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor + * @param[in] fc_info Fully connected layer additional info + * @param[in] weights_info Describes weights shape + * + * @return a status + */ + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const FullyConnectedLayerInfo &fc_info, + const WeightsInfo &weights_info); //Inherited methods override void run() override; void prepare() override; private: - void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); - void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); - void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); - - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEFlattenLayerKernel _flatten_kernel; - NEConvertFullyConnectedWeights _convert_weights; - weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed; - NEFullyConnectedLayerReshapeWeights _reshape_weights_function; - weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - Tensor _flatten_output; - Tensor _converted_weights_output; - Tensor _reshape_weights_output; - const ITensor *_original_weights; - bool _are_weights_converted; - bool _are_weights_reshaped; - bool _is_fc_after_conv; - bool _is_quantized_asymmetric; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h index b3b41c5445..f53b3de7f6 100644 --- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h +++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #define ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -33,6 +32,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class NEFuseBatchNormalizationKernel; /** Basic function to fuse the batch normalization node to a preceding convolution node */ class NEFuseBatchNormalization : public IFunction @@ -49,9 +49,19 @@ public: /** Allow instances of this class to be moved */ NEFuseBatchNormalization &operator=(NEFuseBatchNormalization &&) = default; /** Default destructor */ - ~NEFuseBatchNormalization() = default; + ~NEFuseBatchNormalization(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights @@ -65,9 +75,16 @@ public: * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to Convolution. */ - void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias, - const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + void configure(const ITensor *input_weights, + const ITensor *bn_mean, + const ITensor *bn_var, + ITensor *fused_weights, + ITensor *fused_bias, + const ITensor *input_bias = nullptr, + const ITensor *bn_beta = nullptr, + const ITensor *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalization * * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC @@ -85,16 +102,22 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + static Status validate(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, + const ITensorInfo *bn_beta = nullptr, + const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); // Inherited methods overridden: void run() override; private: - NEFuseBatchNormalizationKernel _fuse_bn_kernel; + std::unique_ptr<NEFuseBatchNormalizationKernel> _fuse_bn_kernel; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 8dc6b88bb0..29650a5eca 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,37 +24,18 @@ #ifndef ARM_COMPUTE_NEGEMM_H #define ARM_COMPUTE_NEGEMM_H -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/function_info/GEMMInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" + +#include <memory> namespace arm_compute { -/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: - * - * If optimized assembly is available: - * -# @ref NEGEMMAssemblyDispatch - * -# @ref NEActivationLayer (if alpha != 1.0) - * Else: - * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) - * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) - * -# @ref NEGEMMMatrixMultiplyKernel - * In both cases: - * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) - * Else: - * -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place) +/** Basic function to execute GEMM. This function calls the following kernels: * - * -# @ref NEActivationLayer (if activation is specified in GEMMInfo) + * -# @ref cpu::CpuGemm */ class NEGEMM : public IFunction { @@ -69,11 +50,25 @@ public: NEGEMM &operator=(const NEGEMM &) = delete; /** Default move assignment operator */ NEGEMM &operator=(NEGEMM &&) = default; + /** Default destructor */ + ~NEGEMM(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. * + * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around + * * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a @@ -83,49 +78,49 @@ public: * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and * if the reshape of matrix B should happen only for the first run */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + void configure(const ITensor *a, + const ITensor *b, + const ITensor *c, + ITensor *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMM. * - * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32 - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. - * @param[out] output Output tensor info. Data type supported: same as @p a - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of matrix C - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run + * Similar to @ref NEGEMM::configure() * * @return a status */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format + * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same + * as in @ref NEGEMM::validate() except that all arguments are required. + * + * @return a status + */ + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEGEMMInterleave4x4Kernel _interleave_kernel; - NEGEMMTranspose1xWKernel _transpose_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NEGEMMAssemblyDispatch _asm_glue; - NEGEMMMatrixAdditionKernel _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAdditionKernel _add_bias_kernel; - NEActivationLayer _activation_func; - - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _tmp_d; - const ITensor *_original_b; - bool _run_vector_matrix_multiplication; - bool _run_alpha_scale; - bool _run_addition; - bool _run_bias_addition; - bool _run_activation; - bool _reshape_b_only_on_first_run; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h deleted file mode 100644 index ae0ae440d8..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H - -#include "arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp" - -namespace arm_compute -{ -/** Assembly kernel glue */ -class NEGEMMAssemblyDispatch : public IFunction -{ -public: - /** Constructor */ - NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); - /** Prevent instances of this class from being copy constructed */ - NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; - /** Prevent instances of this class from being copied */ - NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete; - NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default; - NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default; - ~NEGEMMAssemblyDispatch() = default; - - class IFallback - { - public: - virtual void run() = 0; - virtual void prepare() = 0; - virtual bool is_configured() const = 0; - virtual ~IFallback() = default; - }; - -private: - /** Interface for the arm_gemm fallback */ - std::unique_ptr<IFallback> _arm_gemm; - MemoryGroup _memory_group; /**< Function memory group */ - IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ -public: - /** If supported create an ACL function else fallback to the arm_gemm function. - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations - * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] gemm_info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const GEMMInfo &gemm_info); - - /** Indicates whether or not this function can be used to process the given parameters. - * - * @param[in] a Input tensor info (Matrix A) - * @param[in] b Input tensor info (Matrix B) - * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations - * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] gemm_info GEMM meta-data - * - * @return a status. - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info); - /** Checks if activation is supported by the gemm assembly dispatcher - * - * @param[in] activation Activation to check - * - * @return True if activation is supported else false - */ - static bool is_activation_supported(const ActivationLayerInfo &activation); - /** Was the function successfully configured ? - * - * @return True if the function is configured and ready to run - */ - bool is_configured() const; - // Inherited methods overridden: - /** Runs a preparation step, usually for pre-transposing matrix b */ - void prepare() override; - void run() override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h new file mode 100644 index 0000000000..d1c5a1c9b3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEGEMMCONV2D_H +#define ARM_COMPUTE_NEGEMMCONV2D_H + +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include <memory> + +namespace arm_compute +{ +// Forward declarations +class ITensor; +class ITensorInfo; + +/** Basic function to compute the convolution layer. This function calls the following kernels/functions: + * + * Supports only NHWC data layout + * + * -# @ref cpu::CpuGemmAssemblyDispatch + * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch + * + * Weights are transformed from OHWI to HWIO format using the following kernels: + * -# @ref NEPermute + */ +class NEGEMMConv2d : public IFunction +{ +public: + /** Constructor */ + NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMConv2d(const NEGEMMConv2d &) = delete; + /** Default move constructor */ + NEGEMMConv2d(NEGEMMConv2d &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete; + /** Default move assignment operator */ + NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default; + /** Destructor */ + ~NEGEMMConv2d(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] info Convolution layer descriptor + */ + void + configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d + * + * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const Conv2dInfo &info); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index e7da1006e0..3e84c3e2cf 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,254 +24,191 @@ #ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H #define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Function to reshape the weights. This function calls the following kernel: - * -# @ref NEWeightsReshapeKernel - */ -class NEConvolutionLayerReshapeWeights : public IFunction -{ -public: - /** Constructor */ - NEConvolutionLayerReshapeWeights(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete; - /** Default move constructor */ - NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete; - /** Default move assignment operator */ - NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default; - /** Set the input and output tensors. - * - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BFLOAT16 - * @param[out] output Destination tensor. - * Data types supported: Same as @p weights, FP32 if @p weights is BFLOAT16 - */ - void configure(const ITensor *weights, const ITensor *biases, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights - * - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BFLOAT16 - * @param[in] output Destination tensor. - * Data types supported: Same as @p weights FP32 if @p weights is BFLOAT16 - * - * @return an error status - */ - static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - NEWeightsReshapeKernel _weights_reshape_kernel; -}; - -namespace weights_transformations -{ -/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */ -class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights -{ -public: - void configure(const ITensor *input, const ITensor *biases) - { - _bias_bit = (biases != nullptr) ? 1 : 0; - _func.configure(input, biases, &_output); - } - - void run() override - { - _output.allocator()->allocate(); - _func.run(); - _reshape_run = true; - } - - ITensor *get_weights() override - { - return &_output; - } - - void release() override - { - _output.allocator()->free(); - } - - uint32_t uid() override - { - return ((0x8) | (_bias_bit << 7)); - } - - bool is_reshape_run() - { - return _reshape_run; - } - -private: - Tensor _output{}; - NEConvolutionLayerReshapeWeights _func{}; - int32_t _bias_bit{ 0 }; -}; -} // namespace weights_transformations - -/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: +/** Basic function to compute the convolution layer. This function calls the following kernels/functions: * - * -# @ref NEIm2ColKernel - * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32) - * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED) - * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED) - * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout) - * -# @ref NECol2ImKernel (if NCHW data layout) + * -# @ref cpu::CpuGemmConv2d * */ class NEGEMMConvolutionLayer : public IFunction { public: /** Constructor */ - NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, + IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete; - /** Default move constructor */ - NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete; - /** Default move assignment operator */ - NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = delete; + /** Default destructor */ + ~NEGEMMConvolutionLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer * - * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - /** Configures the appropriate matrix multiply routine + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); + + /** Static function to check if there is an optimized version of + * GEMM available for the input parameters. * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) - */ - void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines + * The method is intended to be used to find out the optimal + * memory layout to be used for the weights tensor when running + * variable weights execution. * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] output Output tensor info. Data types supported: Same as @p input, - * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) - * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false) + * The user can query the database of optimised kernels in + * arm_gemm by specifying one of the enumerations of + * arm_compute::WeightFormat in the weight_format field of the input + * parameter weights_info. In case of success, the method + * writes the expected format in the output parameter + * expected_weight_format. The expected_weight_format can than be + * used in the configure method of the class for retrieving the + * best optimal kernel. * - * @return a status - */ - static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - int gemm_3d_depth = 1, bool skip_im2col = false); - /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore + * Use case one - query for a specific format: * - * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] gemm_3d_depth Depth of GEMM 3D - * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout + * WeightInfo weights_info(..., arm_compute::WeightFormat::OHWIo4, ...); // Set the value of the input query. + * if (NEGEMMConvolutionlayer::has_opt_impl(WeightFormat(), ...., weights_info, ...)) + * { + * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>(); + * conv->configure(..., weights_info, ...); // uses the same WeightFormat the user wanted originally, OHWYo4. + * conv->run(...); + * } * - * @return a status + * Use case two - query for any format that would be optimal for the GEMM to execute: + * + * WeightInfo weights_info(..., arm_compute::WeightFormat::ANY, ...); // Set the value of the input query. + * arm_compute::WeightFormat expected_wf; + * if (NEGEMMConvolutionlayer::has_opt_impl(expected_wf, ...., weights_info, ...)) + * { + * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>(); + * // ... code to convert the layout of the weights tensor to the layout returned by has_opt_impl + * WeightInfo new_weights_info(..., expected_wf, ...); // Set the value of the WeightFormat returned by has_opt_impl. + * conv->configure(..., new_weights_info, ...); + * conv->run(...); + * } + * + * Notice that a GEMM configured with a WeightFormat other than + * UNSPECIFIED will run GEMM with variable weights mode. + * + * @param[out] expected_weight_format The arm_compute::WeightFormat expected by the kernel. + * @param[in] src Source tensor info. + * @param[in] weights Weights tensor info. + * @param[in] biases Biases tensor info. Shared biases supported. + * @param[in] dst Destination tensor info. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info (optional) Specifies additional configuration parameters for the weights of the GEMM computation. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. And no activation (i.e. Linear) which is the default value. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * + * @return a Status */ - static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col); + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); + // Inherited methods overridden: + void run() override; + void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEConvolutionLayerReshapeWeights _reshape_weights; - weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; - NEIm2ColKernel _im2col_kernel; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NECol2ImKernel _col2im_kernel; - NEReshapeLayer _reshape_layer; - - const ITensor *_original_weights; - - Tensor _im2col_output; - Tensor _weights_reshaped; - Tensor _gemm_output; - Tensor _tmp_output; - - DataLayout _data_layout; - - bool _skip_im2col; - bool _skip_col2im; - bool _is_quantized; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */ +#endif /* ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h deleted file mode 100644 index 10d9c378ae..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H -#define ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute NEGEMMInterleave4x4Kernel. This function calls the following NEON kernel: - * - * -# @ref NEGEMMInterleave4x4Kernel - * - */ -class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output - * - * @param[in] input First input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h deleted file mode 100644 index a8ce1e511b..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H -#define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to execute matrix multiply assembly kernels. */ -class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction -{ -public: - /** Constructor */ - NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialise the kernel's inputs, output - * - * @param[in] a First input tensor (Matrix A). Data type supported: U8, S8. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). Data type supported: same as @p a - * @param[out] output Output tensor. Data type supported: Data type supported: U32, S32 - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr<INEKernel> _mm_kernel; - std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; - std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; - Tensor _tmp_a; - Tensor _tmp_b; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 11683c5b95..6d07675d3d 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,49 +21,34 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H -#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H -#include "NEActivationLayer.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/function_info/GEMMInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/IWeightsManager.h" #include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available: +/** Function to run Gemm on quantized types. * - * -# @ref NEGEMMInterleave4x4Kernel - * -# @ref NEGEMMTranspose1xWKernel - * -# @ref NEGEMMLowpMatrixMultiplyKernel - * -# @ref NEGEMMLowpOffsetContributionKernel - * -# @ref NEActivationLayer + * This function calls the following: * - * otherwise if the DOT product instruction is available: - * - * -# @ref NEGEMMLowpOffsetContributionKernel - * -*/ + * -# @ref cpu::CpuGemmLowpMatrixMultiplyCore + */ class NEGEMMLowpMatrixMultiplyCore : public IFunction { public: /** Constructor */ - NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, + IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete; /** Default move constructor */ @@ -72,8 +57,31 @@ public: NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete; /** Default move assignment operator */ NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default; + /** Default destructor */ + ~NEGEMMLowpMatrixMultiplyCore(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QASYMM8 |S32 |S32 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8 |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 | + * * @note GEMM_LOWP: low precision GEMM kernel * This kernel performs the following computations: * @@ -81,69 +89,36 @@ public: * -# Convert b values from QASYMM8 to int32 add b_offset to each of them. * -# Compute the matrix product of the resulting a * b in int32. * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise + * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED/F32 otherwise * * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. * @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED + * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32/F32 + * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED/F32 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and * if the reshape of matrix B should be executed only for the first run */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo()); + void configure( + const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should be executed only for the first run + * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure() * * @return a status */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + const GEMMInfo &gemm_info = GEMMInfo()); // Inherited methods overridden void run() override; void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEGEMMAssemblyDispatch _asm_glue; - NEGEMMLowpMatrixMultiplyKernel _mm_kernel; - NEGEMMInterleave4x4Kernel _mtx_a_reshape_kernel; - NEGEMMTranspose1xWKernel _mtx_b_reshape_kernel; - NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; - NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; - NEActivationLayer _activation_func; - NEConvertQuantizedSignednessKernel _convert_to_signed_asymm; - NEConvertQuantizedSignednessKernel _convert_from_signed_asymm; - - Tensor _vector_sum_col; - Tensor _vector_sum_row; - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _mm_result_s32; - Tensor _signed_a; - Tensor _signed_output; - const ITensor *_original_b; - int32_t _a_offset; - int32_t _b_offset; - - bool _run_vector_matrix_multiplication; - bool _assembly_path; - bool _fused_assembly_path; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - bool _fuse_output_stage; - bool _run_activation; - bool _flip_signedness; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index cbdc788c0a..0d932bb4af 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,10 @@ #ifndef ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H #define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" -/** This file contains all available output stages for GEMMLowp on NEON. +/** This file contains all available output stages for GEMMLowp. * * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore), * and processes it to obtain the final ASYMM8 value. @@ -37,253 +38,39 @@ namespace arm_compute { class ITensor; - -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8Scale on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToUint8Scale depends on 3 parameters: result_offset, result_mult_int, result_shift - * The final result is: - * - * ((input[i][k] + result_offset) * result_mult_int) >> result_shift - * - * In case the bias tensor is provided, the final result is: - * - * ((input[i][k] + bias[k] + result_offset) * result_mult_int) >> result_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift -*/ -class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output - * - * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_offset Offset to be added to each element of the input matrix - * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - ARM_COMPUTE_DEPRECATED_REL(20.05) - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(), - int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale - * - * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - * - * @return a status - */ - ARM_COMPUTE_DEPRECATED_REL(20.05) - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; - -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters: - * - * result_fixedpoint_multiplier, result_shift, result_offset_after_shift - * - * The final result is: - * - * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68 - * - * In case the bias tensor is provided, the final result is: - * - * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift -*/ -class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint - * - * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters: - * - * result_fixedpoint_multiplier, result_shift, result_offset_after_shift - * - * The final result is: +class ITensorInfo; +/** Basic function to execute GEMMLowpQuantizeDown kernels. * - * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift + * This function calls the following operators: * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68 - * - * In case the bias tensor is provided, the final result is: - * - * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift -*/ -class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint - * - * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters: - * - * result_fixedpoint_multiplier, result_shift - * - * The final result is: - * - * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68 - * - * In case the bias tensor is provided, the final result is: - * - * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift + * -# @ref cpu::CpuGemmLowpOutputStage */ -class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder +class NEGEMMLowpOutputStage : public IFunction { public: + /** Constructor */ + NEGEMMLowpOutputStage(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMLowpOutputStage &operator=(const NEGEMMLowpOutputStage &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpOutputStage(NEGEMMLowpOutputStage &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEGEMMLowpOutputStage &operator=(NEGEMMLowpOutputStage &&) = delete; + /** Default destructor */ + ~NEGEMMLowpOutputStage(); /** Initialise the kernel's inputs, output * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(), - int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint - * - * @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. + * Valid data layouts: + * - All * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; - -/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON. - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel - * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel -*/ -class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | * * @param[in] input Input tensor. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. @@ -302,7 +89,17 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *bias, + const ITensorInfo *output, + const GEMMLowpOutputStageInfo &info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h deleted file mode 100644 index 6a38490ed4..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H -#define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: - * - * -# @ref NEGEMMTranspose1xWKernel - * - */ -class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output - * - * @param[in] input First input tensor. Data type supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW - * - * @param[in] input First input tensor. Data type supported: All - * @param[in] output Output tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h index 7ed45c0f15..9c7ae0134d 100644 --- a/arm_compute/runtime/NEON/functions/NEGather.h +++ b/arm_compute/runtime/NEON/functions/NEGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEGatherKernel */ class NEGather : public INESimpleFunctionNoBorder @@ -39,19 +40,26 @@ class NEGather : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable. + * @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor. * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * */ void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * Similar to @ref NEGather::configure() * * @return a status */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h deleted file mode 100644 index 45736664ea..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN3x3_H -#define ARM_COMPUTE_NEGAUSSIAN3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian3x3Kernel - * - */ -class NEGaussian3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's input, output and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEGAUSSIAN3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h deleted file mode 100644 index 847530169a..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H -#define ARM_COMPUTE_NEGAUSSIAN5x5_H - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian5x5HorKernel - * -# @ref NEGaussian5x5VertKernel - * - */ -class NEGaussian5x5 : public IFunction -{ -public: - /** Default constructor - */ - NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialise the function's input, output and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */ - NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h deleted file mode 100644 index a6b21278ba..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMID_H -#define ARM_COMPUTE_NEGAUSSIANPYRAMID_H - -#include "arm_compute/core/IPyramid.h" -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Common interface for all Gaussian pyramid functions */ -class NEGaussianPyramid : public IFunction -{ -public: - /** Default constructor */ - NEGaussianPyramid(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramid(const NEGaussianPyramid &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramid(NEGaussianPyramid &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default; - /** Default destructor */ - virtual ~NEGaussianPyramid() = default; - - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8. - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0; - -protected: - const ITensor *_input; - IPyramid *_pyramid; - Pyramid _tmp; -}; - -/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussianPyramidHorKernel - * -# @ref NEGaussianPyramidVertKernel - * - */ -class NEGaussianPyramidHalf : public NEGaussianPyramid -{ -public: - /** Constructor */ - NEGaussianPyramidHalf(); - - // Inherited methods overridden: - void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; - void run() override; - -private: - std::vector<NEFillBorderKernel> _horizontal_border_handler; - std::vector<NEFillBorderKernel> _vertical_border_handler; - std::vector<NEGaussianPyramidHorKernel> _horizontal_reduction; - std::vector<NEGaussianPyramidVertKernel> _vertical_reduction; -}; - -/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian5x5 - * -# @ref NEScaleKernel - * - */ -class NEGaussianPyramidOrb : public NEGaussianPyramid -{ -public: - /** Constructor */ - NEGaussianPyramidOrb(); - - // Inherited methods overridden: - void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; - void run() override; - -private: - std::vector<NEGaussian5x5> _gaus5x5; - std::vector<NEScale> _scale_nearest; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMID_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index 7c470fbaf0..0f294fde22 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,34 +23,35 @@ */ #ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H #define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" + #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" +#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPadLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPermute.h" +#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute { class ITensor; +class NEComputeAllAnchorsKernel; /** Basic function to generate proposals for a RPN (Region Proposal Network) * - * This function calls the following Neon kernels: - * -# @ref NEComputeAllAnchors + * This function calls the following Arm(R) Neon(TM) layers/kernels: + * -# @ref NEComputeAllAnchorsKernel * -# @ref NEPermute x 2 * -# @ref NEReshapeLayer x 2 * -# @ref NEBoundingBoxTransform * -# @ref NEPadLayerKernel - * -# @ref NEDequantizationLayerKernel x 2 - * -# @ref NEQuantizationLayerKernel + * -# @ref NEDequantizationLayer x 2 + * -# @ref NEQuantizationLayer * And the following CPP kernels: * -# @ref CPPBoxWithNonMaximaSuppressionLimit */ @@ -66,9 +67,21 @@ public: NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete; + /** Default destructor */ + ~NEGenerateProposalsLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 | + * * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. * Data types supported: QASYMM8/F16/F32 * @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores @@ -82,7 +95,12 @@ public: * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct. * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid. */ - void configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals, + void configure(const ITensor *scores, + const ITensor *deltas, + const ITensor *anchors, + ITensor *proposals, + ITensor *scores_out, + ITensor *num_valid_proposals, const GenerateProposalsInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer @@ -99,7 +117,11 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out, + static Status validate(const ITensorInfo *scores, + const ITensorInfo *deltas, + const ITensorInfo *anchors, + const ITensorInfo *proposals, + const ITensorInfo *scores_out, const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info); @@ -110,17 +132,17 @@ private: // Memory group manager MemoryGroup _memory_group; - // Neon kernels - NEPermuteKernel _permute_deltas_kernel; - NEReshapeLayerKernel _flatten_deltas_kernel; - NEPermuteKernel _permute_scores_kernel; - NEReshapeLayerKernel _flatten_scores_kernel; - NEComputeAllAnchorsKernel _compute_anchors_kernel; - NEBoundingBoxTransformKernel _bounding_box_kernel; - NEPadLayerKernel _pad_kernel; - NEDequantizationLayerKernel _dequantize_anchors; - NEDequantizationLayerKernel _dequantize_deltas; - NEQuantizationLayerKernel _quantize_all_proposals; + // kernels/layers + NEPermute _permute_deltas; + NEReshapeLayer _flatten_deltas; + NEPermute _permute_scores; + NEReshapeLayer _flatten_scores; + std::unique_ptr<NEComputeAllAnchorsKernel> _compute_anchors; + NEBoundingBoxTransform _bounding_box; + NEPadLayer _pad; + NEDequantizationLayer _dequantize_anchors; + NEDequantizationLayer _dequantize_deltas; + NEQuantizationLayer _quantize_all_proposals; // CPP functions CPPBoxWithNonMaximaSuppressionLimit _cpp_nms; diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h deleted file mode 100644 index f0f46ce3ee..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H -#define ARM_COMPUTE_NEHOGDESCRIPTOR_H - -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class IHOG; -/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels: - * - * -# @ref NEHOGGradient - * -# @ref NEHOGOrientationBinningKernel - * -# @ref NEHOGBlockNormalizationKernel - * - */ -class NEHOGDescriptor : public IFunction -{ -public: - /** Default constructor */ - NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialise the function's source, destination, HOG data-object and border mode - * - * @param[in, out] input Input tensor. Data type supported: U8 - * (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block - * @param[in] hog HOG data object which describes the HOG descriptor - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEHOGGradient _gradient; - NEHOGOrientationBinningKernel _orient_bin; - NEHOGBlockNormalizationKernel _block_norm; - Tensor _mag; - Tensor _phase; - Tensor _hog_space; -}; -} - -#endif /* ARM_COMPUTE_NEHOGDESCRIPTOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h deleted file mode 100644 index c0bd3da468..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDETECTOR_H -#define ARM_COMPUTE_NEHOGDETECTOR_H - -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel: - * - * -# @ref NEHOGDetectorKernel - * - */ -class NEHOGDetector : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class - * - * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. - * - * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 - * @param[in] hog HOG data-object that describes the HOG descriptor - * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the block stride stored in hog - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEHOGDETECTOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h deleted file mode 100644 index f8c3827049..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGGRADIENT_H -#define ARM_COMPUTE_NEHOGGRADIENT_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels: - * - * -# @ref NEDerivative - * -# NEMagnitudePhaseKernel - * - */ -class NEHOGGradient : public IFunction -{ -public: - /** Default constructor */ - NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialise the function's source, destinations, phase type and border mode - * - * @param[in, out] input Input tensor. Data type supported: U8. - * (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. - * @param[out] output_phase Output tensor.(phase). Format supported: U8 - * @param[in] phase_type Type of @ref PhaseType - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEDerivative _derivative; - std::unique_ptr<INEKernel> _mag_phase; - Tensor _gx; - Tensor _gy; -}; -} -#endif /*ARM_COMPUTE_NEHOGGRADIENT_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h deleted file mode 100644 index 3840b9c0ad..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGMULTIDETECTION_H -#define ARM_COMPUTE_NEHOGMULTIDETECTION_H - -#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IMultiHOG.h" -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels: - * - * -# @ref NEHOGGradient - * -# @ref NEHOGOrientationBinningKernel - * -# @ref NEHOGBlockNormalizationKernel - * -# @ref NEHOGDetector - * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) - * - * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: - * -# Phase type - -# Normalization type - -# L2 hysteresis threshold if the normalization type is L2HYS_NORM - * - */ -class NEHOGMultiDetection : public IFunction -{ -public: - /** Default constructor */ - NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; - /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression - * - * @param[in, out] input Input tensor. Data type supported: U8 - * (Written to only for @p border_mode != UNDEFINED) - * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. - * This container should store the HOG data-objects in descending or ascending cell_size width order. - * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects - * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects - * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object - * The dimension of this array must be the same of multi_hog->num_models() - * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. - * True if the non-maxima suppression stage has to be computed - * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage - * - */ - void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, - uint8_t constant_border_value = 0, - float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEHOGGradient _gradient_kernel; - std::vector<NEHOGOrientationBinningKernel> _orient_bin_kernel; - std::vector<NEHOGBlockNormalizationKernel> _block_norm_kernel; - std::vector<NEHOGDetector> _hog_detect_kernel; - CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel; - std::vector<Tensor> _hog_space; - std::vector<Tensor> _hog_norm_space; - IDetectionWindowArray *_detection_windows; - Tensor _mag; - Tensor _phase; - bool _non_maxima_suppression; - size_t _num_orient_bin_kernel; - size_t _num_block_norm_kernel; - size_t _num_hog_detect_kernel; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGMULTIDETECTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h deleted file mode 100644 index caf887d492..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHARRISCORNERS_H -#define ARM_COMPUTE_NEHARRISCORNERS_H - -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions: - * - * -# @ref NESobel3x3 (if gradient_size == 3) or<br/> - * @ref NESobel5x5 (if gradient_size == 5) or<br/> - * @ref NESobel7x7 (if gradient_size == 7) - * -# @ref NEFillBorderKernel - * -# NEHarrisScoreKernel<3> (if block_size == 3) or<br/> - * NEHarrisScoreKernel<5> (if block_size == 5) or<br/> - * NEHarrisScoreKernel<7> (if block_size == 7) - * -# @ref NENonMaximaSuppression3x3 - * -# @ref CPPCornerCandidatesKernel - * -# @ref CPPSortEuclideanDistanceKernel - * - */ -class NEHarrisCorners : public IFunction -{ -public: - /** Constructor - * - * Initialize _sobel, _harris_score and _corner_list to nullptr. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation - * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 - * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. - * @param[out] corners Array of keypoints to store the results. - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(IImage *input, float threshold, float min_dist, float sensitivity, - int32_t gradient_size, int32_t block_size, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr<IFunction> _sobel; /**< Sobel function */ - std::unique_ptr<INEHarrisScoreKernel> _harris_score; /**< Harris score kernel */ - NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ - CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ - CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ - NEFillBorderKernel _border_gx; /**< Border handler before running harris score */ - NEFillBorderKernel _border_gy; /**< Border handler before running harris score */ - Image _gx; /**< Source image - Gx component */ - Image _gy; /**< Source image - Gy component */ - Image _score; /**< Source image - Harris score */ - Image _nonmax; /**< Source image - Non-Maxima suppressed image */ - std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ - int32_t _num_corner_candidates; /**< Number of potential corner candidates */ -}; -} -#endif /*ARM_COMPUTE_NEHARRISCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h deleted file mode 100644 index e1a5e42de9..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHistogram.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHISTOGRAM_H -#define ARM_COMPUTE_NEHISTOGRAM_H - -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" -#include "arm_compute/runtime/IFunction.h" - -#include <cstddef> -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class IDistribution1D; - -/** Basic function to run @ref NEHistogramKernel. */ -class NEHistogram : public IFunction -{ -public: - /** Default Constructor. */ - NEHistogram(); - /** Initialise the kernel's inputs. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] output Output distribution. - */ - void configure(const IImage *input, IDistribution1D *output); - - // Inherited methods overridden: - void run() override; - -private: - NEHistogramKernel _histogram_kernel; - std::vector<uint32_t> _local_hist; - std::vector<uint32_t> _window_lut; - size_t _local_hist_size; - /** 256 possible pixel values as we handle only U8 images */ - static constexpr unsigned int window_lut_default_size = 256; -}; -} -#endif /*ARM_COMPUTE_NEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h deleted file mode 100644 index cb905a3652..0000000000 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEIM2COL_H -#define ARM_COMPUTE_NEIM2COL_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NEIm2ColKernel */ -class NEIm2Col : public IFunction -{ -public: - /** Default constructor */ - NEIm2Col(); - /** Configure the im2col NEON kernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - - // Inherited methods overridden: - void run() override; - -private: - NEIm2ColKernel _kernel; - unsigned int _y_dim; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEIM2COL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h index e128ec5835..0bc57be09e 100644 --- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H #define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -37,6 +36,7 @@ namespace arm_compute { class ITensor; +class NEInstanceNormalizationLayerKernel; /** Basic function to perform a Instance normalization. * @@ -48,8 +48,28 @@ class NEInstanceNormalizationLayer : public IFunction public: /** Constructor */ NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayer(const NEInstanceNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayer &operator=(const NEInstanceNormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEInstanceNormalizationLayer(NEInstanceNormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEInstanceNormalizationLayer &operator=(NEInstanceNormalizationLayer &&) = delete; + /** Default destructor */ + ~NEInstanceNormalizationLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. * Data types supported: F16/F32. Data layout supported: NHWC, NCHW * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. @@ -69,19 +89,23 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + float gamma = 1.0f, + float beta = 0.0f, + float epsilon = 1e-12f); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; - NEInstanceNormalizationLayerKernel _normalization_kernel; - bool _is_nchw; - NEPermute _permute_input; - NEPermute _permute_output; - Tensor _permuted_input; - Tensor _permuted_output; + MemoryGroup _memory_group; + std::unique_ptr<NEInstanceNormalizationLayerKernel> _normalization_kernel; + bool _is_nchw; + NEPermute _permute_input; + NEPermute _permute_output; + Tensor _permuted_input; + Tensor _permuted_output; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h deleted file mode 100644 index 2d7669d3ef..0000000000 --- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINTEGRALIMAGE_H -#define ARM_COMPUTE_NEINTEGRALIMAGE_H - -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run a @ref NEIntegralImageKernel */ -class NEIntegralImage : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U32. - */ - void configure(const ITensor *input, ITensor *output); -}; -} -#endif /*ARM_COMPUTE_NEINTEGRALIMAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h index a581600dbb..8502cee5d2 100644 --- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_NEL2NORMALIZELAYER_H #define ARM_COMPUTE_NEL2NORMALIZELAYER_H -#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -36,6 +35,7 @@ namespace arm_compute { class ITensor; +class NEL2NormalizeLayerKernel; /** Basic function to perform a L2 normalization on a given axis. * @@ -48,14 +48,34 @@ class NEL2NormalizeLayer : public IFunction public: /** Constructor */ NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayer(const NEL2NormalizeLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayer &operator=(const NEL2NormalizeLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEL2NormalizeLayer(NEL2NormalizeLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEL2NormalizeLayer &operator=(NEL2NormalizeLayer &&) = delete; + /** Default destructor */ + ~NEL2NormalizeLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 * @param[in] epsilon (Optional) Lower bound value for the normalization. */ - void configure(ITensor *input, ITensor *output, int axis, float epsilon = 1e-12f); + void configure(ITensor *input, ITensor *output, int axis, float epsilon = 1e-6f); /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayer. * @@ -66,16 +86,16 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon = 1e-12f); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon = 1e-6f); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; - NEReductionOperation _reduce_func; - NEL2NormalizeLayerKernel _normalize_kernel; - Tensor _sumsq; + MemoryGroup _memory_group; + NEReductionOperation _reduce_func; + std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel; + Tensor _sumsq; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index e85e87b88e..629c5d10a0 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,18 @@ #ifndef ARM_COMPUTE_NELSTMLAYER_H #define ARM_COMPUTE_NELSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" -#include "arm_compute/runtime/common/LSTMParams.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" namespace arm_compute { @@ -49,8 +48,27 @@ class NELSTMLayer : public IFunction public: /** Default constructor */ NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELSTMLayer(const NELSTMLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELSTMLayer &operator=(const NELSTMLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELSTMLayer(NELSTMLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELSTMLayer &operator=(NELSTMLayer &&) = delete; + /** Default destructor */ + ~NELSTMLayer(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src13 | dst0 - dst3 | + * |:------------|:------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. @@ -86,13 +104,26 @@ public: * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. * If set to 0.0 then clipping is disabled. */ - void configure(const ITensor *input, - const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - const ITensor *output_state_in, const ITensor *cell_state_in, - ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, - const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + void configure(const ITensor *input, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + const ITensor *output_state_in, + const ITensor *cell_state_in, + ITensor *scratch_buffer, + ITensor *output_state_out, + ITensor *cell_state_out, + ITensor *output, + const LSTMParams<ITensor> &lstm_params, + const ActivationLayerInfo &activation_info, + float cell_threshold = 0.f, + float projection_threshold = 0.f); /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer * @@ -133,102 +164,115 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, - const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, - const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + static Status validate(const ITensorInfo *input, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_in, + const ITensorInfo *scratch_buffer, + const ITensorInfo *output_state_out, + const ITensorInfo *cell_state_out, + const ITensorInfo *output, + const LSTMParams<ITensorInfo> &lstm_params, + const ActivationLayerInfo &activation_info, + float cell_threshold = 0.f, + float projection_threshold = 0.f); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - NEFullyConnectedLayer _fully_connected_input_gate; - NEArithmeticAddition _accum_input_gate1; - NEArithmeticSubtractionKernel _subtract_input_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; - NEActivationLayerKernel _activation_input_gate; - NEFullyConnectedLayer _fully_connected_forget_gate; - NEArithmeticAddition _accum_forget_gate1; - NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; - NEActivationLayerKernel _activation_forget_gate; - NEFullyConnectedLayer _fully_connected_cell_state; - NEGEMM _gemm_cell_state1; - NETransposeKernel _transpose_cell_state; - NEArithmeticAdditionKernel _accum_cell_state1; - NEArithmeticAdditionKernel _accum_cell_state2; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; - NEActivationLayerKernel _activation_cell_state; - NEActivationLayerKernel _cell_clip; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; - NEFullyConnectedLayer _fully_connected_output; - NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; - NEArithmeticAddition _accum_output1; - NEActivationLayerKernel _activation_output; - NEActivationLayerKernel _activation_output_state; - NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; - NEFullyConnectedLayer _fully_connected_output_state; - NEActivationLayerKernel _projection_clip; - NECopyKernel _copy_cell_state; - NECopyKernel _copy_output; - NEConcatenateLayer _concat_scratch_buffer; - NEConcatenateLayer _concat_inputs_forget_gate; - NEConcatenateLayer _concat_weights_forget_gate; - NEConcatenateLayer _concat_weights_input_gate; - NEConcatenateLayer _concat_weights_output; - NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff; - NEArithmeticAdditionKernel _accum_input_gate_bias; - NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate_coeff; - NEArithmeticAdditionKernel _accum_forget_gate_bias; - NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_gate_coeff; - NEArithmeticAdditionKernel _accum_cell_gate_bias; - NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; - NEPixelWiseMultiplicationKernel _pixelwise_mul_output_gate_coeff; - NEArithmeticAdditionKernel _accum_output_gate_bias; - Tensor _input_gate_out1; - Tensor _input_gate_out2; - Tensor _input_gate_out3; - Tensor _input_gate_out4; - Tensor _forget_gate_out1; - Tensor _forget_gate_out2; - Tensor _forget_gate_out3; - Tensor _forget_gate_out4; - Tensor _forget_gate_out5; - Tensor _forget_gate_out6; - Tensor _cell_state_out1; - Tensor _cell_state_out2; - Tensor _cell_state_out3; - Tensor _cell_state_out4; - Tensor _cell_state_out5; - Tensor _output1; - Tensor _output2; - Tensor _output3; - Tensor _output4; - Tensor _cell_state_activation; - Tensor _output_state1; - Tensor _ones; - Tensor _input_layer_norm_out1; - Tensor _input_layer_norm_out2; - Tensor _forget_layer_norm_out1; - Tensor _forget_layer_norm_out2; - Tensor _cell_layer_norm_out1; - Tensor _cell_layer_norm_out2; - Tensor _output_layer_norm_out1; - Tensor _output_layer_norm_out2; - bool _run_peephole_opt; - bool _run_cifg_opt; - bool _perform_cell_clipping; - bool _has_projection_weights; - bool _perform_projection_clipping; - bool _is_prepared; - bool _is_layer_norm_lstm; + MemoryGroup _memory_group; + NEFullyConnectedLayer _fully_connected_input_gate; + NEArithmeticAddition _accum_input_gate1; + NEArithmeticSubtraction _subtract_input_gate; + NEPixelWiseMultiplication _pixelwise_mul_input_gate; + NEActivationLayer _activation_input_gate; + NEFullyConnectedLayer _fully_connected_forget_gate; + NEArithmeticAddition _accum_forget_gate1; + NEPixelWiseMultiplication _pixelwise_mul_forget_gate; + NEActivationLayer _activation_forget_gate; + NEFullyConnectedLayer _fully_connected_cell_state; + NEGEMM _gemm_cell_state1; + NETranspose _transpose_cell_state; + NEArithmeticAddition _accum_cell_state1; + NEArithmeticAddition _accum_cell_state2; + NEPixelWiseMultiplication _pixelwise_mul_cell_state1; + NEActivationLayer _activation_cell_state; + NEActivationLayer _cell_clip; + NEPixelWiseMultiplication _pixelwise_mul_cell_state2; + NEFullyConnectedLayer _fully_connected_output; + NEPixelWiseMultiplication _pixelwise_mul_output_state1; + NEArithmeticAddition _accum_output1; + NEActivationLayer _activation_output; + NEActivationLayer _activation_output_state; + NEPixelWiseMultiplication _pixelwise_mul_output_state2; + NEFullyConnectedLayer _fully_connected_output_state; + NEActivationLayer _projection_clip; + NECopy _copy_cell_state; + NECopy _copy_output; + NEConcatenateLayer _concat_scratch_buffer; + NEConcatenateLayer _concat_inputs_forget_gate; + NEConcatenateLayer _concat_weights_forget_gate; + NEConcatenateLayer _concat_weights_input_gate; + NEConcatenateLayer _concat_weights_output; + NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; + NEPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; + NEArithmeticAddition _accum_input_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; + NEPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; + NEArithmeticAddition _accum_forget_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; + NEPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; + NEArithmeticAddition _accum_cell_gate_bias; + NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; + NEPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; + NEArithmeticAddition _accum_output_gate_bias; + Tensor _input_gate_out1; + Tensor _input_gate_out2; + Tensor _input_gate_out3; + Tensor _input_gate_out4; + Tensor _forget_gate_out1; + Tensor _forget_gate_out2; + Tensor _forget_gate_out3; + Tensor _forget_gate_out4; + Tensor _forget_gate_out5; + Tensor _forget_gate_out6; + Tensor _cell_state_out1; + Tensor _cell_state_out2; + Tensor _cell_state_out3; + Tensor _cell_state_out4; + Tensor _cell_state_out5; + Tensor _output1; + Tensor _output2; + Tensor _output3; + Tensor _output4; + Tensor _cell_state_activation; + Tensor _output_state1; + Tensor _ones; + Tensor _input_layer_norm_out1; + Tensor _input_layer_norm_out2; + Tensor _forget_layer_norm_out1; + Tensor _forget_layer_norm_out2; + Tensor _cell_layer_norm_out1; + Tensor _cell_layer_norm_out2; + Tensor _output_layer_norm_out1; + Tensor _output_layer_norm_out2; + bool _run_peephole_opt; + bool _run_cifg_opt; + bool _perform_cell_clipping; + bool _has_projection_weights; + bool _perform_projection_clipping; + bool _is_prepared; + bool _is_layer_norm_lstm; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NELSTMLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h index 2f3b8fd336..ae951669b3 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" @@ -38,8 +39,6 @@ #include "arm_compute/runtime/NEON/functions/NESlice.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/runtime/common/LSTMParams.h" - namespace arm_compute { // Forward declarations @@ -47,10 +46,10 @@ class ITensor; /** Basic function to run @ref NELSTMLayerQuantized * - * This function calls the following NEON functions/kernels: + * This function calls the following functions/kernels: * * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 + * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 * -# @ref NETranspose Matrix transpose * -# @ref NEConcatenateLayer Tensor concatenation * -# @ref NEActivationLayer Activation functions (tanh and logistic) @@ -67,14 +66,24 @@ public: NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete; - /** Default move constructor */ - NELSTMLayerQuantized(NELSTMLayerQuantized &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete; - /** Default move assignment operator */ - NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete; + /** Default destructor */ + ~NELSTMLayerQuantized(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | + * |:-----------|:------------|:-------|:------|:------|:------| + * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. @@ -94,11 +103,22 @@ public: * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. */ void configure(const ITensor *input, - const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - ITensor *cell_state_in, const ITensor *output_state_in, - ITensor *cell_state_out, ITensor *output_state_out); + const ITensor *input_to_input_weights, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_input_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *input_gate_bias, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + ITensor *cell_state_in, + const ITensor *output_state_in, + ITensor *cell_state_out, + ITensor *output_state_out); /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer * @@ -123,11 +143,22 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); + const ITensorInfo *input_to_input_weights, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_input_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *input_gate_bias, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out); // Inherited methods overridden: void run() override; @@ -137,30 +168,30 @@ private: MemoryGroup _memory_group; // Functions used - NEGEMMLowpMatrixMultiplyCore _gemmlowp; - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; - NETranspose _transpose_weights; - NEConcatenateLayer _concat_input_weights; - NEConcatenateLayer _concat_recurrent_weights; - NEConcatenateLayer _concat_weights; - NEConcatenateLayer _concat_inputs; - NEConcatenateLayer _concat_bias; - NEActivationLayer _sigmoid_forget_gate; - NEActivationLayer _sigmoid_input_gate; - NEActivationLayer _sigmoid_output_gate; - NEActivationLayer _tanh_modulation_gate; - NEActivationLayer _tanh_output_state; - NEArithmeticAddition _add1; - NEArithmeticAddition _add2; - NEPixelWiseMultiplication _mul1; - NEPixelWiseMultiplication _mul2; - NEPixelWiseMultiplication _mul3; - NESlice _slice_input_tensor; - NESlice _slice_forget_tensor; - NESlice _slice_cell_tensor; - NESlice _slice_output_tensor; - NEDequantizationLayer _dequantize; - NEQuantizationLayer _quantize; + NEGEMMLowpMatrixMultiplyCore _gemmlowp; + NEGEMMLowpOutputStage _output_stage; + NETranspose _transpose_weights; + NEConcatenateLayer _concat_input_weights; + NEConcatenateLayer _concat_recurrent_weights; + NEConcatenateLayer _concat_weights; + NEConcatenateLayer _concat_inputs; + NEConcatenateLayer _concat_bias; + NEActivationLayer _sigmoid_forget_gate; + NEActivationLayer _sigmoid_input_gate; + NEActivationLayer _sigmoid_output_gate; + NEActivationLayer _tanh_modulation_gate; + NEActivationLayer _tanh_output_state; + NEArithmeticAddition _add1; + NEArithmeticAddition _add2; + NEPixelWiseMultiplication _mul1; + NEPixelWiseMultiplication _mul2; + NEPixelWiseMultiplication _mul3; + NESlice _slice_input_tensor; + NESlice _slice_forget_tensor; + NESlice _slice_cell_tensor; + NESlice _slice_output_tensor; + NEDequantizationLayer _dequantize; + NEQuantizationLayer _quantize; // Tensor pointers const ITensor *_input_to_input_weights; diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h deleted file mode 100644 index 5389f67bad..0000000000 --- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELAPLACIANPYRAMID_H -#define ARM_COMPUTE_NELAPLACIANPYRAMID_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" -#include "arm_compute/runtime/Pyramid.h" - -#include <cstddef> -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions: - * - * -# @ref NEGaussianPyramidHalf - * -# @ref NEGaussian5x5 - * -# @ref NEArithmeticSubtraction - * - * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then - * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. - * L(i) = I(i) - Gaussian5x5(I(i)) - * Level 0 has always the same first two dimensions as the input tensor. -*/ -class NELaplacianPyramid : public IFunction -{ -public: - /** Constructor */ - NELaplacianPyramid(); - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16. - * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16. - * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: - * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run() override; - -private: - size_t _num_levels; - NEGaussianPyramidHalf _gaussian_pyr_function; - std::vector<NEGaussian5x5> _convf; - std::vector<NEArithmeticSubtraction> _subf; - Pyramid _gauss_pyr; - Pyramid _conv_pyr; - NEDepthConvertLayer _depth_function; -}; -} -#endif /*ARM_COMPUTE_NELAPLACIANPYRAMID_H */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h deleted file mode 100644 index f939725d51..0000000000 --- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H -#define ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Pyramid.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions: - * - * -# @ref NEArithmeticAddition - * -# @ref NEScale - * -# @ref NEDepthConvertLayer - * - * This function reconstructs the original image from a Laplacian Image Pyramid. - * - * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the - * resolution of the next pyramid level. - * - * I(n-2) = upsample( input + L(n-1) - * - * For each pyramid level i, except i=0 and i=n-1: - * I(i-1) = upsample(I(i) + L(i)) - * - * output = I(0) + L(0) -*/ -class NELaplacianReconstruct : public IFunction -{ -public: - /** Constructor */ - NELaplacianReconstruct(); - /** Initialise the function's source, destinations and border mode. - * - * The Output image must have the same size as the first level of the pyramid. - * The Input image must have the same size as the last level of the pyramid. - * - * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. - * - * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16. - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run() override; - -private: - Pyramid _tmp_pyr; - std::vector<NEArithmeticAddition> _addf; - std::vector<NEScale> _scalef; - NEDepthConvertLayer _depthf; -}; -} -#endif /*ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H */ diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h deleted file mode 100644 index b2f2b88fce..0000000000 --- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H -#define ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class INETensor; - -/** Basic function to compute the locally connected layer. This function calls the following NEON kernels: - * - * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) - * -# @ref NEIm2ColKernel - * -# @ref NELocallyConnectedMatrixMultiplyKernel - * -# @ref NECol2ImKernel - */ -class NELocallyConnectedLayer : public IFunction -{ -public: - /** Default constructor */ - NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedLayer(const NELocallyConnectedLayer &) = delete; - /** Default move constructor */ - NELocallyConnectedLayer(NELocallyConnectedLayer &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete; - /** Default move assignment operator */ - NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16, F32. - * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedLayer - * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16, F32. - * @param[in] weights Weights tensor info. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. - * @param[in] output Output tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEIm2ColKernel _input_im2col_kernel; - NEWeightsReshapeKernel _weights_reshape_kernel; - NELocallyConnectedMatrixMultiplyKernel _mm_kernel; - NECol2ImKernel _output_col2im_kernel; - Tensor _input_im2col_reshaped; - Tensor _weights_reshaped; - Tensor _gemm_output; - bool _is_prepared; - const ITensor *_original_weights; -}; -} -#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h new file mode 100644 index 0000000000..0ad23200c6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELogical.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NELOGICAL_H +#define ARM_COMPUTE_NELOGICAL_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +// Forward declarations +class ITensor; +class ITensorInfo; + +/** Basic function to perform logical AND */ +class NELogicalAnd : public IFunction +{ +public: + /** Constructor */ + NELogicalAnd(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalAnd(const NELogicalAnd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalAnd(NELogicalAnd &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalAnd &operator=(const NELogicalAnd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalAnd &operator=(NELogicalAnd &&) = delete; + /** Destructor */ + ~NELogicalAnd(); + + /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELogicalAnd + * + * @param[in] input1 First input tensor info. Data types supported: U8. + * @param[in] input2 Second input tensor info. Data types supported: U8. + * @param[in] output Output tensor info. Data type supported: U8 + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; + +/** Basic function to perform logical OR */ +class NELogicalOr : public IFunction +{ +public: + /** Constructor */ + NELogicalOr(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalOr(const NELogicalOr &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalOr(NELogicalOr &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalOr &operator=(const NELogicalOr &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalOr &operator=(NELogicalOr &&) = delete; + /** Destructor */ + ~NELogicalOr(); + + /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELogicalOr + * + * @param[in] input1 First input tensor info. Data types supported: U8. + * @param[in] input2 Second input tensor info. Data types supported: U8. + * @param[in] output Output tensor info. Data type supported: U8 + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; + +/** Basic function to perform logical NOT */ +class NELogicalNot : public IFunction +{ +public: + /** Constructor */ + NELogicalNot(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalNot(const NELogicalNot &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalNot(NELogicalNot &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalNot &operator=(const NELogicalNot &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalNot &operator=(NELogicalNot &&) = delete; + /** Destructor */ + ~NELogicalNot(); + + /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------| + * |U8 |U8 | + * + * @param[in] input Input tensor. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NELogicalNot + * + * @param[in] input Input tensor info. Data types supported: U8. + * @param[in] output Output tensor info. Data type supported: U8 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NELOGICAL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h deleted file mode 100644 index 168500050e..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMagnitude.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAGNITUDE_H -#define ARM_COMPUTE_NEMAGNITUDE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEMagnitudePhaseKernel */ -class NEMagnitude : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs. - * - * @param[in] input1 First tensor input. Data type supported: S16. - * @param[in] input2 Second tensor input. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: S16. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMAGNITUDE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h new file mode 100644 index 0000000000..58dd7a6f6b --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMatMul.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +/** Settings for MatMul Cpu implementation*/ +class CpuMatMulSettings +{ +public: + // get fast math flag + bool fast_math() const + { + return _fast_math; + } + // get fixed format flag + bool fixed_format() const + { + return _fixed_format; + } + // Set fast math flag + CpuMatMulSettings &fast_math(bool fmath) + { + _fast_math = fmath; + return *this; + } + // Set fixed format flag + CpuMatMulSettings &fixed_format(bool fixed_format) + { + _fixed_format = fixed_format; + return *this; + } + +private: + bool _fast_math{false}; + bool _fixed_format{false}; +}; + +// Forward declarations +class ITensor; +class ITensorInfo; +class MatMulInfo; +class Status; + +/** Basic function to run the following operators: + * + * -# @ref cpu::CpuMatMul + */ +class NEMatMul : public IFunction +{ +public: + /** Constructor */ + NEMatMul(); + /** Destructor */ + ~NEMatMul(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMatMul(const NEMatMul &) = delete; + /** Default move constructor */ + NEMatMul(NEMatMul &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMatMul &operator=(const NEMatMul &) = delete; + /** Default move assignment operator */ + NEMatMul &operator=(NEMatMul &&) = default; + /** Initialize + * + * Valid data layouts: + * - Any + * + * Valid data type configurations: + * |lhs |rhs |dst | + * |:--------------|:------------------|:--------------| + * |F32 |F32 |F32 | + * |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QASYMM8 |QASYMM8 |QASYMM8 | + * + * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8. + * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs. + * @param[out] dst Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs. + * @param[in] info Contains MatMul operation information described in @ref MatMulInfo. + * @param[in] settings Contains flags for function level settings i.e fast math + * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions. + */ + void configure(ITensor *lhs, + ITensor *rhs, + ITensor *dst, + const MatMulInfo &info, + const CpuMatMulSettings &settings, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul + * + * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8. + * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs. + * @param[out] dst Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs. + * @param[in] info Contains MatMul operation information described in @ref MatMulInfo. + * @param[in] settings Contains flags for function level settings i.e fast math + * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions. + * + * @return Status + */ + static Status validate(const ITensorInfo *lhs, + const ITensorInfo *rhs, + const ITensorInfo *dst, + const MatMulInfo &info, + const CpuMatMulSettings &settings, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h new file mode 100644 index 0000000000..e00fc4544f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H +#define ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +class ITensor; +class ITensorInfo; +class NEFill; + +/** Function to perform MaxUnpooling. This function calls the following kernels: + * + * -# @ref NEFill + */ +class NEMaxUnpoolingLayer : public IFunction +{ +public: + /** Constructor */ + NEMaxUnpoolingLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayer(const NEMaxUnpoolingLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMaxUnpoolingLayer &operator=(const NEMaxUnpoolingLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMaxUnpoolingLayer(NEMaxUnpoolingLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMaxUnpoolingLayer &operator=(NEMaxUnpoolingLayer &&) = delete; + /** Default destructor */ + ~NEMaxUnpoolingLayer(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * + * @note Only supported pool size 2 + * + * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[out] indices The indices of the maximal values. Data type supported: U32. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ITensor *input, ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayer + * + * @note Only supported pool size 2 + * + * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices The indices of the maximal values. Data type supported: U32. + * @param[in] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *indices, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr<NEFill> _fill_func; + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h deleted file mode 100644 index 954b2228dd..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEV_H -#define ARM_COMPUTE_NEMEANSTDDEV_H - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -/** Basic function to execute mean and std deviation. This function calls the following NEON kernels: - * - * @ref NEMeanStdDevKernel - * - */ -class NEMeanStdDev : public IFunction -{ -public: - /** Default Constructor. */ - NEMeanStdDev(); - /** Initialise the kernel's inputs and outputs. - * - * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) - * @param[out] mean Output average pixel value. - * @param[out] stddev (Optional) Output standard deviation of pixel values. - */ - void configure(IImage *input, float *mean, float *stddev = nullptr); - - // Inherited methods overridden: - void run() override; - -private: - NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ - NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */ - uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ - uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ -}; -} -#endif /*ARM_COMPUTE_NEMEANSTDDEV_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h index 3ce2b2792b..41aa81946b 100644 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,13 +30,36 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to execute mean and standard deviation normalization by calling @ref NEMeanStdDevNormalizationKernel */ class NEMeanStdDevNormalizationLayer : public INESimpleFunctionNoBorder { public: + /** Constructor */ + NEMeanStdDevNormalizationLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationLayer(const NEMeanStdDevNormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevNormalizationLayer &operator=(const NEMeanStdDevNormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevNormalizationLayer(NEMeanStdDevNormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEMeanStdDevNormalizationLayer &operator=(NEMeanStdDevNormalizationLayer &&) = delete; + /** Default destructor */ + ~NEMeanStdDevNormalizationLayer(); /** Initialise the function's input and outputs. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | + * * @note If the output tensor is a nullptr, the normalization will be performed in-place. * * @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h deleted file mode 100644 index 55064f8a8c..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEDIAN3x3_H -#define ARM_COMPUTE_NEMEDIAN3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute median filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEMedian3x3Kernel - * - */ -class NEMedian3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEMEDIAN3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h deleted file mode 100644 index 89b6874320..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMINMAXLOCATION_H -#define ARM_COMPUTE_NEMINMAXLOCATION_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute min and max location. This function calls the following NEON kernels: - * - * -# NEMinMaxKernel - * -# NEMinMaxLocationKernel - */ -class NEMinMaxLocation : public IFunction -{ -public: - /** Constructor */ - NEMinMaxLocation(); - /** Initialise the kernel's inputs and outputs. - * - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_loc (Optional) Array of minimum value locations. - * @param[out] max_loc (Optional) Array of maximum value locations. - * @param[out] min_count (Optional) Number of minimum value encounters. - * @param[out] max_count (Optional) Number of maximum value encounters. - */ - void configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, - uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); - - // Inherited methods overridden: - void run() override; - -private: - NEMinMaxKernel _min_max; /**< Kernel that performs min/max */ - NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */ -}; -} -#endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h deleted file mode 100644 index a758e040c6..0000000000 --- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONLINEARFILTER_H -#define ARM_COMPUTE_NENONLINEARFILTER_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute non linear filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NENonLinearFilterKernel - * - * @note Supported mask dimensions squares of sizes 3, 5 - */ -class NENonLinearFilter : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, - uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NENONLINEARFILTER_H */ diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h deleted file mode 100644 index cb8b202811..0000000000 --- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H -#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NENonMaximaSuppression3x3Kernel - * - */ -class NENonMaximaSuppression3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT - * The constant values used with CONSTANT border mode is 0 - * - * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input - * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT - * - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode); -}; -} -#endif /* ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H */ diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h index af34147bfe..27e3fa674e 100644 --- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,11 @@ #ifndef ARM_COMPUTE_NENORMALIZATIONLAYER_H #define ARM_COMPUTE_NENORMALIZATIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -39,10 +36,11 @@ namespace arm_compute { class ITensor; +class NENormalizationLayerKernel; -/** Basic function to compute a normalization layer. This function calls the following NEON kernels: +/** Basic function to compute a normalization layer. This function calls the following kernels: * - * -# @ref NEPixelWiseMultiplicationKernel + * -# @ref NEPixelWiseMultiplication * -# @ref NEFillBorderKernel * -# @ref NENormalizationLayerKernel * @@ -52,8 +50,28 @@ class NENormalizationLayer : public IFunction public: /** Default constructor */ NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayer(const NENormalizationLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayer &operator=(const NENormalizationLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NENormalizationLayer(NENormalizationLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NENormalizationLayer &operator=(NENormalizationLayer &&) = delete; + /** Default destructor */ + ~NENormalizationLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], * and an optional 4th dimension for batch of inputs. Data type supported: F16/F32. Data layouts supported: NCHW/NHWC. * @param[out] output Destination with the same dimensions, data type, data layout and number of channels of @p input @@ -69,17 +87,17 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; /**< Function memory group */ - NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */ - NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */ - NEFillBorderKernel _border_handler; /**< Kernel to handle borders */ - Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ + MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */ + NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */ + Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h deleted file mode 100644 index 95068aaee0..0000000000 --- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEOPTICALFLOW_H -#define ARM_COMPUTE_NEOPTICALFLOW_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstddef> -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class Pyramid; - -/** Array of LK Internel Keypoints */ -using LKInternalKeypointArray = Array<NELKInternalKeypoint>; -/** Basic function to execute optical flow. This function calls the following NEON kernels and functions: - * - * -# @ref NEScharr3x3 - * -# @ref NELKTrackerKernel - * - */ -class NEOpticalFlow : public IFunction -{ -public: - /** Constructor - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEOpticalFlow(const NEOpticalFlow &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; - /** Initialise the function input and output - * - * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 - * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8 - * @param[in] old_points Pointer to the IKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points - * @param[out] new_points Pointer to the IKeyPointArray storing new key points - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] border_mode The border mode applied at scharr kernel stage - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT - * - */ - void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, - IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, - bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::vector<NEScharr3x3> _func_scharr; - std::vector<NELKTrackerKernel> _kernel_tracker; - std::vector<Tensor> _scharr_gx; - std::vector<Tensor> _scharr_gy; - IKeyPointArray *_new_points; - const IKeyPointArray *_new_points_estimates; - const IKeyPointArray *_old_points; - LKInternalKeypointArray _new_points_internal; - LKInternalKeypointArray _old_points_internal; - unsigned int _num_levels; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEOPTICALFLOW_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h index 102a165383..81d5fd162c 100644 --- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,21 +25,47 @@ #define ARM_COMPUTE_NEPRELULAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEArithmeticOperationKernel for PRELU +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for PRELU * * @note The function implements an activation layer with the PRELU activation function. */ -class NEPReluLayer : public INESimpleFunction +class NEPReluLayer : public IFunction { public: + /** Default Constructor */ + NEPReluLayer(); + /** Default Destructor */ + ~NEPReluLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPReluLayer(const NEPReluLayer &) = delete; + /** Default move constructor */ + NEPReluLayer(NEPReluLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPReluLayer &operator=(const NEPReluLayer &) = delete; + /** Default move assignment operator */ + NEPReluLayer &operator=(NEPReluLayer &&); /** Set the input and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] alpha Source alpha tensor. Data types supported: same of @p input. * @param[out] output Destination tensor. Data type supported: same as @p input @@ -54,6 +80,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEPRELULAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h index d3074e70bc..494b1c0641 100644 --- a/arm_compute/runtime/NEON/functions/NEPadLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,24 +24,26 @@ #ifndef ARM_COMPUTE_NEPADLAYER_H #define ARM_COMPUTE_NEPADLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/runtime/SubTensor.h" - -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" -#include "arm_compute/core/Types.h" #include "arm_compute/runtime/Tensor.h" +#include <memory> + namespace arm_compute { -/** Basic function to pad a tensor. This function calls the following NEON functions/kernels: +class NEPadLayerKernel; + +/** Basic function to pad a tensor. This function calls the following functions/kernels: * * - For padding mode = PaddingMode::CONSTANT: * -# @ref NEPadLayerKernel * - Otherwise: - * -# @ref NECopyKernel + * -# @ref NECopy * -# @ref NEStridedSlice * -# @ref NEConcatenateLayer * @@ -49,10 +51,29 @@ namespace arm_compute class NEPadLayer : public IFunction { public: - /** Default constructor*/ + /** Default Constructor */ NEPadLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayer(const NEPadLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPadLayer &operator=(const NEPadLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPadLayer(NEPadLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPadLayer &operator=(NEPadLayer &&) = delete; + /** Default destructor */ + ~NEPadLayer(); /** Initialize the function * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |All |All | + * * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] @@ -61,7 +82,11 @@ public: * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ - void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + void configure(ITensor *input, + ITensor *output, + const PaddingList &padding, + const PixelValue constant_value = PixelValue(), + const PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. * * @param[in] input Source tensor info. Data types supported: All. @@ -74,7 +99,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const PaddingList &padding, + const PixelValue constant_value = PixelValue(), + const PaddingMode mode = PaddingMode::CONSTANT); // Inherited methods overridden: void run() override; @@ -88,7 +117,10 @@ private: * specifies the front and the end padding in the i-th dimension. * @param[in] constant_value Constant value to be used for the padding */ - void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value); + void configure_constant_mode(ITensor *input, + ITensor *output, + const PaddingList &padding, + const PixelValue constant_value); /** Configure functions for when reflect or symmetric padding is used. * * @param[in] input Source tensor. Data types supported: All. @@ -97,15 +129,15 @@ private: void configure_reflect_symmetric_mode(ITensor *input, ITensor *output); private: - NECopyKernel _copy_kernel; - NEPadLayerKernel _pad_kernel; - PaddingMode _mode; - PaddingList _padding; - uint32_t _num_dimensions; - std::vector<NEStridedSlice> _slice_functions; - std::vector<NEConcatenateLayer> _concat_functions; - std::vector<Tensor> _slice_results; - std::vector<Tensor> _concat_results; + NECopy _copy_function; + std::unique_ptr<NEPadLayerKernel> _pad_kernel; + PaddingMode _mode; + PaddingList _padding; + uint32_t _num_dimensions; + std::vector<NEStridedSlice> _slice_functions; + std::vector<NEConcatenateLayer> _concat_functions; + std::vector<Tensor> _slice_results; + std::vector<Tensor> _concat_results; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEPADLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h index 4651b30e8e..2cef64764d 100644 --- a/arm_compute/runtime/NEON/functions/NEPermute.h +++ b/arm_compute/runtime/NEON/functions/NEPermute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,20 +24,43 @@ #ifndef ARM_COMPUTE_NEPERMUTE_H #define ARM_COMPUTE_NEPERMUTE_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEPermuteKernel */ -class NEPermute : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuPermuteKernel */ +class NEPermute : public IFunction { public: - /** Configure the permute NEON kernel + /** Default Constructor */ + NEPermute(); + /** Default Destructor */ + ~NEPermute(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermute(const NEPermute &) = delete; + /** Default move constructor */ + NEPermute(NEPermute &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermute &operator=(const NEPermute &) = delete; + /** Default move assignment operator */ + NEPermute &operator=(NEPermute &&) = default; + /** Configure the permute function + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * @@ -57,6 +80,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEPERMUTE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h deleted file mode 100644 index 220681e9f1..0000000000 --- a/arm_compute/runtime/NEON/functions/NEPhase.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPHASE_H -#define ARM_COMPUTE_NEPHASE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEMagnitudePhaseKernel */ -class NEPhase : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in] input1 First tensor input. Data type supported: S16. - * @param[in] input2 Second tensor input. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPHASE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index 2b31032931..3d81bf6087 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,25 +24,60 @@ #ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H #define ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H +#include "arm_compute/core/Rounding.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEPixelWiseMultiplicationKernel */ -class NEPixelWiseMultiplication : public INESimpleFunction +/** Basic function to run @ref cpu::CpuMul */ +class NEPixelWiseMultiplication : public IFunction { public: + /** Default Constructor */ + NEPixelWiseMultiplication(); + /** Default Destructor */ + ~NEPixelWiseMultiplication(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplication(const NEPixelWiseMultiplication &) = delete; + /** Default move constructor */ + NEPixelWiseMultiplication(NEPixelWiseMultiplication &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplication &operator=(const NEPixelWiseMultiplication &) = delete; + /** Default move assignment operator */ + NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default; /** Initialise the kernel's inputs, output and convertion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |S32 |F32 | + * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, S32, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[out] output Output tensor. Data types supported: * - U8, only if both inputs are U8. @@ -50,49 +85,80 @@ public: * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. * - S16. * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. + * - S32, only if both inputs are S32 or both are QSYMM16. * - F16, only if @p input1 is F16. * - F32, only if both inputs are F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. + * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype * @param[in] rounding_policy Rounding policy. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, + void configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, S32, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). * @param[in] output Output tensor info. Data types supported: * - U8, only if both inputs are U8. * - QASYMM8, only if both inputs are QASYMM8. * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. * - S16. * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. + * - S32, only if both inputs are S32 or both are QSYMM16. * - F16, only if @p input1 is F16. * - F32, only if both inputs are F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. + * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype * @param[in] rounding_policy Rounding policy. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEComplexPixelWiseMultiplicationKernel. */ -class NEComplexPixelWiseMultiplication : public INESimpleFunction +/** Basic function to run @ref cpu::CpuComplexMul. */ +class NEComplexPixelWiseMultiplication : public IFunction { public: + /** Default Constructor */ + NEComplexPixelWiseMultiplication(); + /** Default Destructor */ + ~NEComplexPixelWiseMultiplication(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEComplexPixelWiseMultiplication(const NEComplexPixelWiseMultiplication &) = delete; + /** Default move constructor */ + NEComplexPixelWiseMultiplication(NEComplexPixelWiseMultiplication &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEComplexPixelWiseMultiplication &operator=(const NEComplexPixelWiseMultiplication &) = delete; + /** Default move assignment operator */ + NEComplexPixelWiseMultiplication &operator=(NEComplexPixelWiseMultiplication &&) = default; /** Initialise the kernel's inputs, output. * * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). @@ -102,7 +168,10 @@ public: * @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication * * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). @@ -110,7 +179,17 @@ public: * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h new file mode 100644 index 0000000000..09251f2a5f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NEPOOLING3DLAYER_H +#define ARM_COMPUTE_NEPOOLING3DLAYER_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +// Forward declarations +class ITensor; +class ITensorInfo; +class IMemoryManager; +/** Basic function to simulate a pooling 3d layer with the specified pooling operation. This function calls the following kernels: + * + * -# @ref cpu::CpuPool3d + */ +class NEPooling3dLayer : public IFunction +{ +public: + /** Constructor */ + NEPooling3dLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPooling3dLayer(const NEPooling3dLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPooling3dLayer &operator=(const NEPooling3dLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPooling3dLayer(NEPooling3dLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPooling3dLayer &operator=(NEPooling3dLayer &&) = delete; + /** Default destructor */ + ~NEPooling3dLayer(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NDHWC + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * + * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise + * + * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. + * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo. + */ + void configure(const ITensor *input, ITensor *output, const Pooling3dLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEPooling3dLayer + * + * + * @param[in] input Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. + * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Pooling3dLayerInfo &pool_info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEPOOLING3DLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index e43741c95b..768ad0d818 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,29 +24,55 @@ #ifndef ARM_COMPUTE_NEPOOLINGLAYER_H #define ARM_COMPUTE_NEPOOLINGLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" -#include "arm_compute/core/Types.h" +#include <memory> namespace arm_compute { +// Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels: +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels: * - * -# @ref NEFillBorderKernel (executed if padding size is different from zero) - * -# @ref NEPoolingLayerKernel + * -# @ref cpu::CpuPool2d */ class NEPoolingLayer : public IFunction { public: /** Constructor */ - NEPoolingLayer(); + NEPoolingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayer(const NEPoolingLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayer &operator=(const NEPoolingLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPoolingLayer(NEPoolingLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEPoolingLayer &operator=(NEPoolingLayer &&) = delete; + /** Default destructor */ + ~NEPoolingLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note F16 is supported for pool sizes 2 and 3 only + * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise + * Cases where pooling region is completely outside input tensor are only supported for floating point data type * * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. @@ -58,23 +84,24 @@ public: * * @note F16 is supported for pool sizes 2 and 3 only * - * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. + * @param[in] input Source tensor info. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * @param[in] indices (optional) Tensor info of the indices of the maximal values. Data type supported: U32. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info, + const ITensorInfo *indices = nullptr); // Inherited methods overridden: void run() override; private: - NEPoolingLayerKernel _pooling_layer_kernel; - NEFillBorderKernel _border_handler; - bool _is_global_pooling_layer; - DataLayout _data_layout; + struct Impl; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h index 242460d3a9..858e3299af 100644 --- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,13 @@ #ifndef ARM_COMPUTE_NEPRIORBOXLAYER_H #define ARM_COMPUTE_NEPRIORBOXLAYER_H -#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEPriorBoxLayerKernel. */ class NEPriorBoxLayer : public INESimpleFunctionNoBorder @@ -38,6 +38,15 @@ class NEPriorBoxLayer : public INESimpleFunctionNoBorder public: /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------|:--------|:--------| + * |F32 |F32 |F32 | + * * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input @@ -53,7 +62,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEPRIORBOXLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index d1cc962940..009a4e0911 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,37 +24,46 @@ #ifndef ARM_COMPUTE_NEQLSTMLAYER_H #define ARM_COMPUTE_NEQLSTMLAYER_H -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" +#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/runtime/common/LSTMParams.h" +#include <memory> namespace arm_compute { // Forward declarations class ITensor; - +class ITensorInfo; +class NEQLSTMLayerNormalizationKernel; +namespace cpu +{ +namespace kernels +{ +class CpuGemmLowpMatrixAReductionKernel; +} // namespace kernels +} // namespace cpu /** Basic function to run @ref NEQLSTMLayer * - * This function calls the following NEON functions/kernels: + * This function calls the following kernels: * * -# @ref NEActivationLayer Activation functions (tanh and logistic) - * -# @ref NEArithmeticAdditionKernel Elementwise addition - * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction - * -# @ref NECopyKernel Copy kernel for copying output_state_out to output + * -# @ref NEArithmeticAddition Elementwise addition + * -# @ref NEArithmeticSubtraction Elementwise subtraction + * -# @ref NECopy Copy kernel for copying output_state_out to output * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 - * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use - * -# @ref NEPixelWiseMultiplicationKernel Elementwise multiplication + * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 + * -# @ref cpu::kernels::CpuGemmLowpMatrixAReductionKernel For precomputing effective biases to use + * -# @ref NEPixelWiseMultiplication Elementwise multiplication * -# @ref NETranspose Transpose function for reshaping the weights * */ class NEQLSTMLayer : public IFunction @@ -64,14 +73,24 @@ public: NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEQLSTMLayer(const NEQLSTMLayer &) = delete; - /** Default move constructor */ - NEQLSTMLayer(NEQLSTMLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NEQLSTMLayer(NEQLSTMLayer &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete; - /** Default move assignment operator */ - NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NEQLSTMLayer &operator=(NEQLSTMLayer &&) = delete; + /** Default destructor */ + ~NEQLSTMLayer(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 | + * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------| + * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED | + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. @@ -111,12 +130,21 @@ public: * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. */ - void configure(const ITensor *input, - const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - const ITensor *cell_state_in, const ITensor *output_state_in, - ITensor *cell_state_out, ITensor *output_state_out, ITensor *output, + void configure(const ITensor *input, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + const ITensor *cell_state_in, + ITensor *output_state_in, + ITensor *cell_state_out, + ITensor *output_state_out, + ITensor *output, const LSTMParams<ITensor> &lstm_params); /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer @@ -161,12 +189,21 @@ public: * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. * @return a status */ - static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, + static Status validate(const ITensorInfo *input, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out, + const ITensorInfo *output, const LSTMParams<ITensorInfo> &lstm_params); // Inherited methods overridden: @@ -199,24 +236,33 @@ private: * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor. * */ - void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info, - const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias, Tensor *mm_res, - Tensor *outstage_res, float gemmlowp_scale, - const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info); + void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, + NEGEMMLowpOutputStage &outstage, + GEMMLowpOutputStageInfo &gemmlowp_info, + const ITensor *mm_input, + const ITensor *mm_weights, + const ITensor *bias, + Tensor *mm_res, + Tensor *outstage_res, + float gemmlowp_scale, + const TensorInfo &mm_res_info, + const TensorInfo &outstage_tensor_info); - MemoryGroup _memory_group{}; + MemoryGroup _memory_group; /** A small internel kernel do the copy between two tensors */ class TensorCopyKernel { static constexpr uint32_t max_dimension_supported = 2; - ITensor *_src{ nullptr }; - ITensor *_dst{ nullptr }; + ITensor *_src{nullptr}; + ITensor *_dst{nullptr}; size_t _row_size{}; Window _window{}; public: + /** Destructor */ + ~TensorCopyKernel(); /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel * * @param[in] src Source tensor info. @@ -236,93 +282,96 @@ private: }; // Functions used - NETranspose _transpose_input_to_forget_weights{}; - NETranspose _transpose_input_to_cell_weights{}; - NETranspose _transpose_input_to_output_weights{}; - NETranspose _transpose_input_to_input_weights{}; - NETranspose _transpose_recurrent_to_forget_weights{}; - NETranspose _transpose_recurrent_to_cell_weights{}; - NETranspose _transpose_recurrent_to_output_weights{}; - NETranspose _transpose_recurrent_to_input_weights{}; - NETranspose _transpose_projection_weights{}; - NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{}; - NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{}; - NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{}; - NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{}; - NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{}; - NEGEMMLowpMatrixAReductionKernel _projection_reduction{}; - NEArithmeticAdditionKernel _projection_bias_add{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{}; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_forget{}; - NEGEMMLowpOutputStage _input_to_forget_outstage{}; - NEGEMMLowpOutputStage _recurrent_to_forget_outstage{}; - NEGEMMLowpOutputStage _cell_to_forget_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_forget{}; - NEArithmeticAdditionKernel _accumulate_cell_forget{}; - NEActivationLayer _forget_gate_sigmoid{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{}; - NEGEMMLowpOutputStage _input_to_cell_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{}; - NEGEMMLowpOutputStage _recurrent_to_cell_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_modulation{}; - NEActivationLayer _cell_gate_tanh{}; - NEArithmeticSubtractionKernel _input_gate_sub{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{}; - NEGEMMLowpOutputStage _input_to_input_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{}; - NEGEMMLowpOutputStage _recurrent_to_input_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_input{}; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_input{}; - NEGEMMLowpOutputStage _cell_to_input_outstage{}; - NEArithmeticAdditionKernel _accumulate_cell_input{}; - NEActivationLayer _input_gate_sigmoid{}; - NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_cell{}; - NEPixelWiseMultiplicationKernel _pixelwise_mul_input_cell{}; - NEArithmeticAdditionKernel _add_forget_cell{}; - NEActivationLayer _cell_clip{}; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{}; - NEGEMMLowpOutputStage _input_to_output_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{}; - NEGEMMLowpOutputStage _recurrent_to_output_outstage{}; - NEArithmeticAdditionKernel _accumulate_input_recurrent_output{}; - NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_to_output{}; - NEGEMMLowpOutputStage _cell_to_output_outstage{}; - NEArithmeticAdditionKernel _accumulate_cell_to_output{}; - NEActivationLayer _output_gate_sigmoid{}; - NEActivationLayer _hidden_tanh{}; - NEPixelWiseMultiplicationKernel _pixelwise_mul_hidden{}; - NEGEMMLowpOutputStage _hidden_outstage{}; - NEGEMMLowpMatrixMultiplyCore _mm_projection{}; - NEGEMMLowpOutputStage _projection_outstage{}; - NEArithmeticAdditionKernel _accumulate_projection{}; - NEActivationLayer _projection_clip{}; - TensorCopyKernel _projection_bias_copy{}; - TensorCopyKernel _projection_output_to_accumulate_copy{}; - TensorCopyKernel _projection_accumulate_to_output_copy{}; - TensorCopyKernel _hidden_to_output_copy{}; + NEDequantizationLayer _dequantize_input_to_forget_weights; + NEQuantizationLayer _quantize_input_to_forget_weights; + NETranspose _transpose_input_to_forget_weights; + NETranspose _transpose_input_to_cell_weights; + NETranspose _transpose_input_to_output_weights; + NETranspose _transpose_input_to_input_weights; + NETranspose _transpose_recurrent_to_forget_weights; + NETranspose _transpose_recurrent_to_cell_weights; + NETranspose _transpose_recurrent_to_output_weights; + NETranspose _transpose_recurrent_to_input_weights; + NETranspose _transpose_projection_weights; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_input_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_forget_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_cell_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_output_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _projection_reduction; + NEArithmeticAddition _projection_bias_add; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget; + NEGEMMLowpOutputStage _input_to_forget_outstage; + NEGEMMLowpOutputStage _recurrent_to_forget_outstage; + NEGEMMLowpOutputStage _cell_to_forget_outstage; + NEArithmeticAddition _accumulate_input_recurrent_forget; + NEArithmeticAddition _accumulate_cell_forget; + NEActivationLayer _forget_gate_sigmoid; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell; + NEGEMMLowpOutputStage _input_to_cell_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell; + NEGEMMLowpOutputStage _recurrent_to_cell_outstage; + NEArithmeticAddition _accumulate_input_recurrent_modulation; + NEActivationLayer _cell_gate_tanh; + NEArithmeticSubtraction _input_gate_sub; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_input; + NEGEMMLowpOutputStage _input_to_input_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input; + NEGEMMLowpOutputStage _recurrent_to_input_outstage; + NEArithmeticAddition _accumulate_input_recurrent_input; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_input; + NEGEMMLowpOutputStage _cell_to_input_outstage; + NEArithmeticAddition _accumulate_cell_input; + NEActivationLayer _input_gate_sigmoid; + NEPixelWiseMultiplication _pixelwise_mul_forget_cell; + NEPixelWiseMultiplication _pixelwise_mul_input_cell; + NEArithmeticAddition _add_forget_cell; + NEActivationLayer _cell_clip; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_output; + NEGEMMLowpOutputStage _input_to_output_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output; + NEGEMMLowpOutputStage _recurrent_to_output_outstage; + NEArithmeticAddition _accumulate_input_recurrent_output; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_output; + NEGEMMLowpOutputStage _cell_to_output_outstage; + NEArithmeticAddition _accumulate_cell_to_output; + NEActivationLayer _output_gate_sigmoid; + NEActivationLayer _hidden_tanh; + NEPixelWiseMultiplication _pixelwise_mul_hidden; + NEGEMMLowpOutputStage _hidden_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_projection; + NEGEMMLowpOutputStage _projection_outstage; + NEArithmeticAddition _accumulate_projection; + NEActivationLayer _projection_clip; + + TensorCopyKernel _projection_bias_copy; + TensorCopyKernel _projection_output_to_accumulate_copy; + TensorCopyKernel _projection_accumulate_to_output_copy; + TensorCopyKernel _hidden_to_output_copy; - std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} }; + std::array<std::unique_ptr<NEQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms; - NECopyKernel _copy_output{}; + NECopy _copy_output; // Tensor pointers - const ITensor *_input_to_input_weights{ nullptr }; - const ITensor *_recurrent_to_input_weights{ nullptr }; - const ITensor *_projection_bias{ nullptr }; - const ITensor *_input_to_forget_weights{ nullptr }; - const ITensor *_input_to_cell_weights{ nullptr }; - const ITensor *_input_to_output_weights{ nullptr }; - const ITensor *_recurrent_to_forget_weights{ nullptr }; - const ITensor *_recurrent_to_cell_weights{ nullptr }; - const ITensor *_recurrent_to_output_weights{ nullptr }; - const ITensor *_projection_weights{ nullptr }; - std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{ {} }; - std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{ {} }; + const ITensor *_input_to_input_weights{nullptr}; + const ITensor *_recurrent_to_input_weights{nullptr}; + const ITensor *_projection_bias{nullptr}; + const ITensor *_input_to_forget_weights{nullptr}; + const ITensor *_input_to_cell_weights{nullptr}; + const ITensor *_input_to_output_weights{nullptr}; + const ITensor *_recurrent_to_forget_weights{nullptr}; + const ITensor *_recurrent_to_cell_weights{nullptr}; + const ITensor *_recurrent_to_output_weights{nullptr}; + const ITensor *_projection_weights{nullptr}; + std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{}; + std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{}; using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type; inline LayerNormIndexType getGateIndex(LayerNormGate g) @@ -350,99 +399,87 @@ private: return _layer_norm_bias[getGateIndex(g)]; } - inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g) + inline std::unique_ptr<NEQLSTMLayerNormalizationKernel> &get_layer_norm(LayerNormGate g) { return _layer_norms[getGateIndex(g)]; } - inline void configure_layer_norm(LayerNormGate g, const ITensor *in) - { - ARM_COMPUTE_ERROR_ON(!_has_layer_norm); - - Tensor &out = get_layer_norm_output(g); - _memory_group.manage(&out); - out.allocator()->init(*(in->info())); - - get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g)); - } - - inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias) - { - // Output quantization scale will be different, but ignored here - // since it will be configured at configure() stage. - const TensorInfo out{ in }; - return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias); - } + void configure_layer_norm(LayerNormGate g, const ITensor *in); + static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias); // Temporary tensors - Tensor _input_to_forget_weights_transposed{ nullptr }; - Tensor _input_to_cell_weights_transposed{ nullptr }; - Tensor _input_to_output_weights_transposed{ nullptr }; - Tensor _input_to_input_weights_transposed{ nullptr }; - Tensor _recurrent_to_forget_weights_transposed{ nullptr }; - Tensor _recurrent_to_cell_weights_transposed{ nullptr }; - Tensor _recurrent_to_output_weights_transposed{ nullptr }; - Tensor _recurrent_to_input_weights_transposed{ nullptr }; - Tensor _projection_weights_transposed{ nullptr }; - Tensor _input_to_input_eff_bias{ nullptr }; - Tensor _recurrent_to_input_eff_bias{ nullptr }; - Tensor _input_to_forget_eff_bias{ nullptr }; - Tensor _recurrent_to_forget_eff_bias{ nullptr }; - Tensor _input_to_cell_eff_bias{ nullptr }; - Tensor _recurrent_to_cell_eff_bias{ nullptr }; - Tensor _input_to_output_eff_bias{ nullptr }; - Tensor _recurrent_to_output_eff_bias{ nullptr }; - Tensor _projection_reduction_res{ nullptr }; - Tensor _projection_eff_bias{ nullptr }; - Tensor _mm_input_to_forget_res{ nullptr }; - Tensor _mm_recurrent_to_forget_res{ nullptr }; - Tensor _mul_cell_to_forget_res{ nullptr }; - Tensor _input_to_forget_outstage_res{ nullptr }; - Tensor _cell_to_forget_outstage_res{ nullptr }; - Tensor _recurrent_to_forget_outstage_res{ nullptr }; - Tensor _forget_gate{ nullptr }; - Tensor _mm_input_to_cell_res{ nullptr }; - Tensor _input_to_cell_outstage_res{ nullptr }; - Tensor _mm_recurrent_to_cell_res{ nullptr }; - Tensor _recurrent_to_cell_outstage_res{ nullptr }; - Tensor _cell_gate{ nullptr }; - Tensor _mul_input_cell_res{ nullptr }; - Tensor _mm_input_to_input_res{ nullptr }; - Tensor _input_to_input_outstage_res{ nullptr }; - Tensor _mm_recurrent_to_input_res{ nullptr }; - Tensor _mul_cell_to_input_res{ nullptr }; - Tensor _cell_to_input_outstage_res{ nullptr }; - Tensor _recurrent_to_input_outstage_res{ nullptr }; - Tensor _input_gate{ nullptr }; - Tensor _mm_input_to_output_res{ nullptr }; - Tensor _input_to_output_outstage_res{ nullptr }; - Tensor _mm_recurrent_to_output_res{ nullptr }; - Tensor _mul_cell_to_output_res{ nullptr }; - Tensor _cell_to_output_outstage_res{ nullptr }; - Tensor _recurrent_to_output_outstage_res{ nullptr }; - Tensor _output_gate{ nullptr }; - Tensor _hidden_mul_res{ nullptr }; - Tensor _hidden_gate{ nullptr }; - Tensor _mm_projection_res{ nullptr }; - Tensor _projection_outstage_res{ nullptr }; - Tensor _projection_out_res{ nullptr }; - Tensor _projection_accumulate_res{ nullptr }; - Tensor _ones{ nullptr }; - std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} }; + Tensor _input_to_forget_weights_f32{nullptr}; + Tensor _input_to_forget_weights_symm8{nullptr}; + + Tensor _input_to_forget_weights_transposed{nullptr}; + Tensor _input_to_cell_weights_transposed{nullptr}; + Tensor _input_to_output_weights_transposed{nullptr}; + Tensor _input_to_input_weights_transposed{nullptr}; + Tensor _recurrent_to_forget_weights_transposed{nullptr}; + Tensor _recurrent_to_cell_weights_transposed{nullptr}; + Tensor _recurrent_to_output_weights_transposed{nullptr}; + Tensor _recurrent_to_input_weights_transposed{nullptr}; + Tensor _projection_weights_transposed{nullptr}; + Tensor _input_to_input_eff_bias{nullptr}; + Tensor _recurrent_to_input_eff_bias{nullptr}; + Tensor _input_to_forget_eff_bias{nullptr}; + Tensor _recurrent_to_forget_eff_bias{nullptr}; + Tensor _input_to_cell_eff_bias{nullptr}; + Tensor _recurrent_to_cell_eff_bias{nullptr}; + Tensor _input_to_output_eff_bias{nullptr}; + Tensor _recurrent_to_output_eff_bias{nullptr}; + Tensor _projection_reduction_res{nullptr}; + Tensor _projection_eff_bias{nullptr}; + Tensor _mm_input_to_forget_res{nullptr}; + Tensor _mm_recurrent_to_forget_res{nullptr}; + Tensor _mul_cell_to_forget_res{nullptr}; + Tensor _input_to_forget_outstage_res{nullptr}; + Tensor _cell_to_forget_outstage_res{nullptr}; + Tensor _recurrent_to_forget_outstage_res{nullptr}; + Tensor _forget_gate{nullptr}; + Tensor _mm_input_to_cell_res{nullptr}; + Tensor _input_to_cell_outstage_res{nullptr}; + Tensor _mm_recurrent_to_cell_res{nullptr}; + Tensor _recurrent_to_cell_outstage_res{nullptr}; + Tensor _cell_gate{nullptr}; + Tensor _mul_input_cell_res{nullptr}; + Tensor _mm_input_to_input_res{nullptr}; + Tensor _input_to_input_outstage_res{nullptr}; + Tensor _mm_recurrent_to_input_res{nullptr}; + Tensor _mul_cell_to_input_res{nullptr}; + Tensor _cell_to_input_outstage_res{nullptr}; + Tensor _recurrent_to_input_outstage_res{nullptr}; + Tensor _input_gate{nullptr}; + Tensor _mm_input_to_output_res{nullptr}; + Tensor _input_to_output_outstage_res{nullptr}; + Tensor _mm_recurrent_to_output_res{nullptr}; + Tensor _mul_cell_to_output_res{nullptr}; + Tensor _cell_to_output_outstage_res{nullptr}; + Tensor _recurrent_to_output_outstage_res{nullptr}; + Tensor _output_gate{nullptr}; + Tensor _hidden_mul_res{nullptr}; + Tensor _hidden_gate{nullptr}; + Tensor _mm_projection_res{nullptr}; + Tensor _projection_outstage_res{nullptr}; + Tensor _projection_out_res{nullptr}; + Tensor _projection_accumulate_res{nullptr}; + Tensor _ones{nullptr}; + std::array<Tensor, _layer_norm_count> _layer_norm_output{}; inline Tensor &get_layer_norm_output(LayerNormGate g) { return _layer_norm_output[getGateIndex(g)]; } - bool _is_prepared{ false }; - bool _has_cifg{ false }; - bool _has_cell_clipping{ false }; - bool _has_projection{ false }; - bool _has_projection_clipping{ false }; - bool _has_peephole{ false }; - bool _has_layer_norm{ false }; - bool _projection_tensor_copy_required{ false }; + bool _is_prepared{false}; + bool _has_cifg{false}; + bool _has_cell_clipping{false}; + bool _has_projection{false}; + bool _has_projection_clipping{false}; + bool _has_peephole{false}; + bool _has_layer_norm{false}; + bool _projection_tensor_copy_required{false}; + bool _convert_input_to_forget_weights_to_qsymm8{false}; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEQLSTMLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index fc317be81e..7bf97e28a5 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,30 +24,45 @@ #ifndef ARM_COMPUTE_NEQUANTIZATIONLAYER_H #define ARM_COMPUTE_NEQUANTIZATIONLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IRuntimeContext.h" -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to simulate a quantization layer. This function calls the following NEON kernels: - * - * - * -# @ref NEQuantizationLayerKernel - * - */ -class NEQuantizationLayer : public INESimpleFunctionNoBorder +/** Basic function to run a quantization layer using @ref cpu::CpuQuantize */ +class NEQuantizationLayer : public IFunction { public: - /** Default constructor */ - NEQuantizationLayer() = default; + NEQuantizationLayer(); + /** Default Destructor */ + ~NEQuantizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayer(const NEQuantizationLayer &) = delete; + /** Default move constructor */ + NEQuantizationLayer(NEQuantizationLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayer &operator=(const NEQuantizationLayer &) = delete; + /** Default move assignment operator */ + NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default; /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------------------|:--------------------------------------| + * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16 */ @@ -60,6 +75,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEQUANTIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index 0bfb905e19..af7f464ac9 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,10 @@ #ifndef ARM_COMPUTE_NERNNLAYER_H #define ARM_COMPUTE_NERNNLAYER_H -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -45,14 +44,26 @@ public: NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NERNNLayer(const NERNNLayer &) = delete; - /** Default move constructor */ - NERNNLayer(NERNNLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NERNNLayer(NERNNLayer &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NERNNLayer &operator=(const NERNNLayer &) = delete; - /** Default move assignment operator */ - NERNNLayer &operator=(NERNNLayer &&) = default; + /** Prevent instances of this class from being moved (As this class contains pointers) */ + NERNNLayer &operator=(NERNNLayer &&) = delete; + /** Default destructor */ + ~NERNNLayer(); /** Initialize the function * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |src3 |dst0 |dst1 | + * |:------|:------|:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 |F32 |F32 | + * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input @@ -61,7 +72,13 @@ public: * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input * @param[in] info Activation layer parameter. */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, const ITensor *bias, ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *recurrent_weights, + const ITensor *bias, + ITensor *hidden_state, + ITensor *output, + ActivationLayerInfo &info); /** Initialize the function * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 @@ -74,7 +91,12 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state, const ITensorInfo *output, + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *recurrent_weights, + const ITensorInfo *bias, + const ITensorInfo *hidden_state, + const ITensorInfo *output, const ActivationLayerInfo &info); // Inherited methods overridden: @@ -82,16 +104,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEGEMM _gemm_state_f; - NEArithmeticAdditionKernel _add_kernel; - NEActivationLayerKernel _activation_kernel; - NEFullyConnectedLayer _fully_connected; - NECopyKernel _copy_kernel; - Tensor _fully_connected_out; - Tensor _gemm_output; - Tensor _add_output; - bool _is_prepared; + MemoryGroup _memory_group; + NEGEMM _gemm_state_f; + NEArithmeticAddition _add_f; + NEActivationLayer _activation; + NEFullyConnectedLayer _fully_connected; + NECopy _copy_f; + Tensor _fully_connected_out; + Tensor _gemm_output; + Tensor _add_output; + bool _is_prepared; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NERNNLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h index 04a24ac7ec..b06ebe899d 100644 --- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,27 +24,35 @@ #ifndef ARM_COMPUTE_NEROIALIGNLAYER_H #define ARM_COMPUTE_NEROIALIGNLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEROIAlignLayerKernel. - * - * This function calls the following NEON kernels: - * -# @ref NEROIAlignLayerKernel - * - */ -class NEROIAlignLayer : public INESimpleFunction +/** Basic function to run @ref NEROIAlignLayerKernel. */ +class NEROIAlignLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * |QASYMM8 |QASYMM16 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. * @@ -56,8 +64,8 @@ public: void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, * otherwise same as @p input * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. @@ -69,7 +77,10 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *rois, + ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEROIALIGNLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h index 887b5712da..929111ad4b 100644 --- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,29 +24,46 @@ #ifndef ARM_COMPUTE_NEROIPOOLINGLAYER_H #define ARM_COMPUTE_NEROIPOOLINGLAYER_H +#include "arm_compute/core/IArray.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; +class NEROIPoolingLayerKernel; +class ROIPoolingLayerInfo; -/** Basic function to run @ref NEROIPoolingLayerKernel. - * - * This function calls the following NEON kernels: - * -# @ref NEROIPoolingLayerKernel - * - */ +/** Basic function to run @ref NEROIPoolingLayerKernel. */ class NEROIPoolingLayer : public IFunction { public: /** Constructor */ NEROIPoolingLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayer(const NEROIPoolingLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEROIPoolingLayer &operator=(const NEROIPoolingLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEROIPoolingLayer(NEROIPoolingLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEROIPoolingLayer &operator=(NEROIPoolingLayer &&) = delete; + /** Default destructor */ + ~NEROIPoolingLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F32 |U16 |F32 | + * |QASYMM8 |U16 |QASYMM8 | + * + * @param[in] input Source tensor. Data types supported: QASYMM8/F32 * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 * @param[out] output Destination tensor. Data types supported: Same as @p input. @@ -57,13 +74,32 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); + void + configure(const ITensor *input, const ITensor *rois, const ITensor *output, const ROIPoolingLayerInfo &pool_info); // Inherited methods overridden: void run() override; + /** Static function to check if given info will lead to a valid configuration of @ref NEROIPoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/F32. + * @param[in] rois TensorInfo for rois tensor which is a 2D tensor of size [5,N] (where 5 is the number ROIs). Data types supported: U16 + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + * @return a Status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *rois, + const ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info); + private: - NEROIPoolingLayerKernel _roi_kernel; + std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEROIPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h index 83ca625aa7..609456a4ef 100644 --- a/arm_compute/runtime/NEON/functions/NERange.h +++ b/arm_compute/runtime/NEON/functions/NERange.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,26 +24,55 @@ #ifndef ARM_COMPUTE_NERANGE_H #define ARM_COMPUTE_NERANGE_H -#include "arm_compute/core/NEON/kernels/NERangeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include <memory> + namespace arm_compute { class ITensor; +class ITensorInfo; +class NERangeKernel; /** Basic function to run @ref NERangeKernel * * @note The tensor data type for the output must be U8/S8/U16/S16/U32/S32/F16/F32. * @note The function performs generates a sequence with the given start, end and step. + * */ class NERange : public IFunction { public: /** Default constructor */ NERange(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERange(const NERange &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERange &operator=(const NERange &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NERange(NERange &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NERange &operator=(NERange &&) = delete; + /** Default destructor */ + ~NERange(); /** Initialize the kernel's start, end, step and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |dst | + * |:---------| + * |U8 | + * |S8 | + * |U16 | + * |S16 | + * |U32 | + * |S32 | + * |F16 | + * |F32 | + * * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] start The starting value of the sequence. * @param[in] end The ending (not including) value of the sequence. @@ -65,7 +94,7 @@ public: void run() override; private: - NERangeKernel _kernel; + std::unique_ptr<NERangeKernel> _kernel; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NERANGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h index 3c7cc21929..5b8d8cdf2b 100644 --- a/arm_compute/runtime/NEON/functions/NEReduceMean.h +++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,10 +24,8 @@ #ifndef ARM_COMPUTE_NEON_REDUCE_MEAN_H #define ARM_COMPUTE_NEON_REDUCE_MEAN_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" @@ -41,8 +39,29 @@ class NEReduceMean : public IFunction public: /** Constructor */ NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReduceMean(const NEReduceMean &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReduceMean &operator=(const NEReduceMean &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReduceMean(NEReduceMean &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReduceMean &operator=(NEReduceMean &&) = delete; + /** Default destructor */ + ~NEReduceMean(); /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note Supported tensor rank: up to 4 * * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32 @@ -61,7 +80,8 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output); + static Status + validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index abda4159ba..f5391a6d0e 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,20 +25,19 @@ #define ARM_COMPUTE_NEREDUCTIONOPERATION_H #include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" +#include <memory> + namespace arm_compute { class ITensor; +class NEReductionOperationKernel; -/** Basic function to simulate a reduction operation. This function calls the following NEON kernels: +/** Basic function to simulate a reduction operation. This function calls the following kernels: * - * -# @ref NEFillBorderKernel + * -# @ref NEReshapeLayer * -# @ref NEReductionOperationKernel * */ @@ -47,19 +46,41 @@ class NEReductionOperation : public IFunction public: /** Default constructor */ NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperation(const NEReductionOperation &) = delete; + /** Default move constructor */ + NEReductionOperation(NEReductionOperation &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperation &operator=(const NEReductionOperation &) = delete; + /** Default move assignment operator */ + NEReductionOperation &operator=(NEReductionOperation &&) = default; + /** Default destructor */ + ~NEReductionOperation(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. (Written to only for border_size != 0) + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -67,20 +88,23 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + unsigned int axis, + ReductionOperation op, + bool keep_dims = true); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; - NEReductionOperationKernel _reduction_kernel; - NEFillBorderKernel _fill_border_kernel; - NEReshapeLayerKernel _reshape_kernel; - Tensor _output_internal; - size_t _window_split; - int _reduction_axis; - bool _is_reshape_required; + MemoryGroup _memory_group; + std::unique_ptr<NEReductionOperationKernel> _reduction_kernel; + NEReshapeLayer _reshape; + Tensor _output_internal; + size_t _window_split; + int _reduction_axis; + bool _is_reshape_required; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEREDUCTIONOPERATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h deleted file mode 100644 index 05a7a8ffd6..0000000000 --- a/arm_compute/runtime/NEON/functions/NERemap.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREMAP_H -#define ARM_COMPUTE_NEREMAP_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute remap. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NERemapKernel - */ -class NERemap : public INESimpleFunction -{ -public: - /** Initialise the function's sources, destination, interpolation policy and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] map_x Map for X coordinates. Data type supported: F32. - * @param[in] map_y Map for Y coordinates. Data type supported: F32. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. - * @param[in] border_mode Border mode to use on the input tensor. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, - InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEREMAP_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReorderLayer.h b/arm_compute/runtime/NEON/functions/NEReorderLayer.h new file mode 100644 index 0000000000..e3fa7b9c16 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEReorderLayer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__aarch64__) + +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; +class ITensorInfo; +class NEReorderKernel; +/** Function to compute blocked reorder. */ +class NEReorderLayer : public IFunction +{ +public: + /** Default constructor */ + NEReorderLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorderLayer(const NEReorderLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorderLayer &operator=(const NEReorderLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReorderLayer(NEReorderLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReorderLayer &operator=(NEReorderLayer &&) = delete; + /** Default destructor */ + ~NEReorderLayer(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * + * @param[in] input Source tensor. Data type supported: F32. Data layouts supported: NCHW. + * @param[out] output Destination with the same dimensions, data type, data layout as @p input + * except last dimension of data layout which needs to be multiple of blocking parameter ksize + * @param[in] input_wf WeightFormat of input. + * @param[in] output_wf WeightFormat of output. + */ + void configure(const ITensor *input, + ITensor *output, + arm_compute::WeightFormat input_wf, + arm_compute::WeightFormat output_wf); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReorderLayer + * + * Similar to @ref NEReorderLayer::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + arm_compute::WeightFormat input_wf, + arm_compute::WeightFormat output_wf); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr<NEReorderKernel> _reorder_kernel; /**< Reorder layer kernel */ +}; +} // namespace arm_compute +#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER */ + +#endif // defined(__aarch64__) diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h index 8ef7f8a1b2..0a7d824d10 100644 --- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NEReorgLayerKernel */ class NEReorgLayer : public INESimpleFunctionNoBorder @@ -38,6 +39,15 @@ class NEReorgLayer : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input First tensor input. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] stride Stride to be used during data re-organization diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index d6643842d9..3e6e33f797 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,32 +25,61 @@ #define ARM_COMPUTE_NERESHAPELAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" +#include "arm_compute/runtime/Types.h" namespace arm_compute { // Forward declarations class ITensor; -/** Basic function to run @ref NEReshapeLayerKernel */ -class NEReshapeLayer : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuReshapeKernel */ +class NEReshapeLayer : public IFunction { public: + /** Default Constructor */ + NEReshapeLayer(); + /** Default Destructor */ + ~NEReshapeLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayer(const NEReshapeLayer &) = delete; + /** Default move constructor */ + NEReshapeLayer(NEReshapeLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayer &operator=(const NEReshapeLayer &) = delete; + /** Default move assignment operator */ + NEReshapeLayer &operator=(NEReshapeLayer &&); /** Initialise the kernel's inputs and outputs * - * @param[in] input First tensor input. Data type supported: All + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * + * @param[in] input Input tensor. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer * - * @param[in] input First tensor info. Data type supported: All + * @param[in] input Input tensor info. Data type supported: All * @param[in] output Output tensor info. Data type supported: Same as @p input * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h index ab5a5d0869..e03e415068 100644 --- a/arm_compute/runtime/NEON/functions/NEReverse.h +++ b/arm_compute/runtime/NEON/functions/NEReverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,16 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEREVERSE_H -#define ARM_COMPUTE_NEREVERSE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEReverseKernel */ class NEReverse : public INESimpleFunctionNoBorder @@ -38,20 +38,39 @@ class NEReverse : public INESimpleFunctionNoBorder public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |All |U32, S32 |All | + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32 + * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis + * + * @note The value of each axis should be between [-rank, rank) + * @note If there are duplicate values in the tensor, the subsequent axis values are ignored. e.g. an array of [2, 2] has the same effects as [2]. + * + * @deprecated Support for U32 in axis tensor will be removed in 24.02 release + * */ - void configure(const ITensor *input, ITensor *output, const ITensor *axis); + void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false); /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32 + * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *axis, + const bool use_inverted_axis = false); }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEREVERSE_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h index 5350d0646c..72dfa3bda4 100644 --- a/arm_compute/runtime/NEON/functions/NEScale.h +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,65 +24,60 @@ #ifndef ARM_COMPUTE_NESCALEIMAGE_H #define ARM_COMPUTE_NESCALEIMAGE_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/Tensor.h" -#include <cstdint> +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEScaleKernel */ +/** Basic function to compute Scale */ class NEScale : public IFunction { public: - /** Constructor - * - * Initialize NEScale - */ + /** Constructor */ NEScale(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScale(const NEScale &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEScale(NEScale &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScale &operator=(const NEScale &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEScale &operator=(NEScale &&) = delete; + /** Destructor */ + ~NEScale(); /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in, out] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER - * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true. - * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. - */ - void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(), - SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false); - /** Initialize the function's source, destination, interpolation type and border_mode. + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |U8 |U8 | + * |S8 |S8 | + * |S16 |S16 | * - * @param[in, out] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for configuration - */ - void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEScale - * - * @param[in] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) - * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER - * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true. - * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. * - * @return a status + * @note Using S8 data type only supports NHWC, @p border_mode Replicate, and @p policy Bilinear */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, - PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false); + void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEScale * - * @param[in] input Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for validation * @@ -94,13 +89,8 @@ public: void run() override; private: - Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */ - Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */ - Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */ - NEScaleKernel _scale_kernel; /**< Kernel to perform the scaling */ - NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ - bool _use_padding; /**< Is padding used on the tensors */ - bool _align_corners; /**< Align corners of input and output */ + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NESCALEIMAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h deleted file mode 100644 index 6091121e03..0000000000 --- a/arm_compute/runtime/NEON/functions/NEScharr3x3.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCHARR3x3_H -#define ARM_COMPUTE_NESCHARR3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEScharr3x3Kernel - * - */ -class NEScharr3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NESCHARR3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h index 6ac328080d..c8e5a204dd 100644 --- a/arm_compute/runtime/NEON/functions/NESelect.h +++ b/arm_compute/runtime/NEON/functions/NESelect.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,19 +25,28 @@ #define ARM_COMPUTE_NESELECT_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; /** Basic function to run @ref NESelect */ -class NESelect : public INESimpleFunction +class NESelect : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |U8 |All |All |All | + * * @param[in] c Condition input tensor. Data types supported: U8. * @param[in] x First input tensor. Data types supported: All. * @param[in] y Second input tensor. Data types supported: Same as @p x diff --git a/arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h b/arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h deleted file mode 100644 index a162b6513b..0000000000 --- a/arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H -#define ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H - -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" -#include "arm_compute/runtime/IFunction.h" - -#include <memory> - -namespace arm_compute -{ -/** Basic interface for functions which have a single NEON GEMM wrapper kernel to run */ -class NESimpleAssemblyFunction : public IFunction -{ -public: - /** Constructor */ - NESimpleAssemblyFunction(); - - /** Configure the function with the kernel to run - * - * @param[in] kernel GEMM Wrapper kernel configured and ready to run - * - * @note The kernel is expected to have a 1D window. The function will multi-thread this window across the X dimension. - */ - void configure(std::unique_ptr<INEGEMMWrapperKernel> kernel); - - // Inherited methods overridden: - void run() override final; - -protected: - std::unique_ptr<INEGEMMWrapperKernel> _kernel; /**< Kernel to run */ -}; -} //namespace arm_compute -#endif /*ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h index 834ec27a33..70a688d3b0 100644 --- a/arm_compute/runtime/NEON/functions/NESlice.h +++ b/arm_compute/runtime/NEON/functions/NESlice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,8 @@ #ifndef ARM_COMPUTE_NE_SLICE_H #define ARM_COMPUTE_NE_SLICE_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { @@ -32,11 +33,32 @@ namespace arm_compute class ITensor; /** Basic function to perform tensor slicing */ -class NESlice : public INESimpleFunctionNoBorder +class NESlice : public IFunction { public: + /** Default Constructor */ + NESlice(); + /** Default Destructor */ + ~NESlice(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESlice(const NESlice &) = delete; + /** Default move constructor */ + NESlice(NESlice &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESlice &operator=(const NESlice &) = delete; + /** Default move assignment operator */ + NESlice &operator=(NESlice &&); + /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Supported tensor rank: up to 4 * @note Start indices must be non-negative. 0 <= starts[i] * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. @@ -63,7 +85,54 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; + +namespace experimental +{ +/** Basic function to perform tensor slicing */ +class NESlice : public INEOperator +{ +public: + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * @note Start indices must be non-negative. 0 <= starts[i] + * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. + * @note End indices are not inclusive unless negative. + * + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + + /** Static function to check if given info will lead to a valid configuration of @ref NESlice + * + * @note Supported tensor rank: up to 4 + * @note Start indices must be non-negative. 0 <= starts[i] + * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. + * @note End indices are not inclusive unless negative. + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * + * @return A status + */ + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NE_SLICE_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h deleted file mode 100644 index 0cd633ec3a..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel3x3.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL3x3_H -#define ARM_COMPUTE_NESOBEL3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel3x3Kernel - * - */ -class NESobel3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NESOBEL3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h deleted file mode 100644 index af52292359..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel5x5.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL5x5_H -#define ARM_COMPUTE_NESOBEL5x5_H - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel5x5HorKernel - * -# @ref NESobel5x5VertKernel - * - */ -class NESobel5x5 : public IFunction -{ -public: - /** Default constructor */ - NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ - NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NESOBEL5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h deleted file mode 100644 index e9098880f5..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel7x7.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL7x7_H -#define ARM_COMPUTE_NESOBEL7x7_H - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel7x7HorKernel - * -# @ref NESobel7x7VertKernel - * - */ -class NESobel7x7 : public IFunction -{ -public: - /** Default constructor */ - NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32. - * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ - NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NESOBEL7x7_H */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index b80ceaf25c..1787de6237 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,31 +24,18 @@ #ifndef ARM_COMPUTE_NESOFTMAXLAYER_H #define ARM_COMPUTE_NESOFTMAXLAYER_H -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. - * - * Softmax is calculated by : - * @f[ out = \frac{e^{x - max(x)}}{\sum{e^{x - max(x)}}} @f] - * - * Log Softmax is calculated by : - * @f[ out = (x - max(x)) - \sum{e^{x - max(x)}} @f] - * - * This function runs the following kernels: - * -# @ref NEFillBorderKernel - * -# @ref NELogits1DMaxKernel - * -# @ref NELogits1DSoftmaxKernel - */ +/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. */ template <bool IS_LOG = false> class NESoftmaxLayerGeneric : public IFunction { @@ -58,69 +45,53 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NESoftmaxLayerGeneric(const NESoftmaxLayerGeneric &) = delete; /** Default move constructor */ - NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&) = default; + NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete; /** Default move assignment operator */ - NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default; + NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&); + /** Default destructor */ + ~NESoftmaxLayerGeneric(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a - * multiple of the internal processing block size, @ref NEFillBorderKernel replicates the + * multiple of the internal processing block size, @ref NEFillBorder replicates the * last value of each row to the nearest multiple. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) Reduction axis. Defaults to -1. - * Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions). - * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image, - * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ - void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = -1); + void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) Reduction axis. Defaults to -1. - * Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions). - * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image, - * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = -1); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = 0); // Inherited methods overridden: void run() override; private: - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) Reduction axis. Defaults to -1. - * Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions). - * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image, - * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. - */ - void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis); - - MemoryGroup _memory_group; - NELogits1DMaxKernel _max_kernel; - NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel; - std::unique_ptr<INEKernel> _flat_or_reshape_kernel_ptr; - NEFillBorderKernel _fill_border_kernel; - NEReshapeLayerKernel _reshape_kernel; - Tensor _max; - Tensor _tmp; - Tensor _input_flattened; - Tensor _output_flattened; - bool _needs_flattening; + struct Impl; + std::unique_ptr<Impl> _impl; }; using NESoftmaxLayer = NESoftmaxLayerGeneric<false>; diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h index 75fa50c1b0..5dee61a4a8 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,21 @@ #ifndef ARM_COMPUTE_NESPACETOBATCHLAYER_H #define ARM_COMPUTE_NESPACETOBATCHLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" -#include "arm_compute/core/Types.h" +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; +class NESpaceToBatchLayerKernel; +class NEFill; -/** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions: +/** Basic function to spatial divide a tensor. This function calls the following kernels/functions: * - * -# @ref NEMemsetKernel + * -# @ref NEFill * -# @ref NESpaceToBatchLayerKernel */ class NESpaceToBatchLayer : public IFunction @@ -53,12 +55,21 @@ public: /** Allow instances of this class to be moved */ NESpaceToBatchLayer &operator=(NESpaceToBatchLayer &&) = default; /** Default destructor */ - virtual ~NESpaceToBatchLayer() = default; + ~NESpaceToBatchLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:---------|:---------|:---------|:---------| + * |All |S32 |S32 |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 + * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); @@ -67,41 +78,54 @@ public: * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. + * @param[in] padding_left The padding at the beginning of every dimension of the output tensor. + * @param[in] padding_right The padding at the end of every dimension of the output tensor. * @param[out] output Tensor output. Data types supported: same as @p input */ - void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); + void configure(const ITensor *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32 - * @param[in] paddings paddings tensor info with shape [2, M]. Data types supported: S32 + * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32 + * @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32 * @param[in] output Tensor output info. Data types supported: same as @p input * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + const ITensorInfo *block_shape, + const ITensorInfo *paddings, + const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings) * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. + * @param[in] padding_left The padding at the beginning of every dimension of the output tensor. + * @param[in] padding_right The padding at the end of every dimension of the output tensor. * @param[in] output Tensor output info. Data types supported: same as @p input * * @return a status */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + const ITensorInfo *output); // Inherited methods overridden: void run() override; private: - NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - NEMemsetKernel _memset_kernel; /**< Memset kernel to run */ - bool _has_padding; /**< Flag to check if the output has padding */ + std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ + std::unique_ptr<NEFill> _fill_f; /**< Fill function to run */ + bool _has_padding; /**< Flag to check if the output has padding */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_NESPACETOBATCHLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h index 6a7a9c83a1..1820cb8f6b 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,20 +24,18 @@ #ifndef ARM_COMPUTE_NESPACETODEPTHLAYER_H #define ARM_COMPUTE_NESPACETODEPTHLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h" -#include "arm_compute/core/Types.h" +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; +class NESpaceToDepthLayerKernel; -/** This function calls the following NEON kernels/functions: - * - * -# @ref NESpaceToDepthLayerKernel - */ +/** Basic function to run @ref NESpaceToDepthLayerKernel. */ class NESpaceToDepthLayer : public IFunction { public: @@ -52,9 +50,18 @@ public: /** Allow instances of this class to be moved */ NESpaceToDepthLayer &operator=(NESpaceToDepthLayer &&) = default; /** Default destructor */ - virtual ~NESpaceToDepthLayer() = default; + ~NESpaceToDepthLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value @@ -74,7 +81,7 @@ public: void run() override; private: - NESpaceToDepthLayerKernel _space_to_depth_kernel; /**< SpaceToDepth kernel to run */ + std::unique_ptr<NESpaceToDepthLayerKernel> _space_to_depth_kernel; /**< SpaceToDepth kernel to run */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_NESPACETODEPTHLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h index 69aef793d5..36358a7094 100644 --- a/arm_compute/runtime/NEON/functions/NESplit.h +++ b/arm_compute/runtime/NEON/functions/NESplit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" - #include "arm_compute/runtime/CPP/functions/CPPSplit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NESlice.h" @@ -40,6 +39,18 @@ namespace arm_compute class NESplit : public CPPSplit<NESlice> { public: + /** NESplit + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * + */ + // Inherited methods overridden: void run() override; }; diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h index 9288035060..98dacde0c1 100644 --- a/arm_compute/runtime/NEON/functions/NEStackLayer.h +++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,20 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NESTACKLAYER_H -#define ARM_COMPUTE_NESTACKLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h" - #include <memory> #include <vector> namespace arm_compute { class ITensor; +class ITensorInfo; +class NEStackLayerKernel; /** Basic function to stack tensors along an axis. This function calls the following kernel: * @@ -46,8 +46,26 @@ class NEStackLayer : public IFunction public: /** Default constructor */ NEStackLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayer(const NEStackLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStackLayer &operator=(const NEStackLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEStackLayer(NEStackLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEStackLayer &operator=(NEStackLayer &&) = delete; + /** Default destructor */ + ~NEStackLayer(); /** Initialise the kernel's inputs vector and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @note Supported input tensor rank: up to 4 * * @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All @@ -73,9 +91,8 @@ public: void run() override; private: - std::vector<ITensor *> _input; - std::vector<NEStackLayerKernel> _stack_kernels; - unsigned int _num_inputs; + std::unique_ptr<NEStackLayerKernel> _stack_kernel; + bool _is_prepared; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h index 6d5e6392f5..fa1113ffec 100644 --- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h +++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,8 @@ #ifndef ARM_COMPUTE_NE_STRIDED_SLICE_H #define ARM_COMPUTE_NE_STRIDED_SLICE_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { @@ -32,11 +33,32 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEStridedSliceKernel */ -class NEStridedSlice : public INESimpleFunction +class NEStridedSlice : public IFunction { public: + /** Default Constructor */ + NEStridedSlice(); + /** Default Destructor */ + ~NEStridedSlice(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSlice(const NEStridedSlice &) = delete; + /** Default move constructor */ + NEStridedSlice(NEStridedSlice &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSlice &operator=(const NEStridedSlice &) = delete; + /** Default move assignment operator */ + NEStridedSlice &operator=(NEStridedSlice &&); + /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Supported tensor rank: up to 4 * * @param[in] input Source tensor. Data type supported: All @@ -49,9 +71,74 @@ public: * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - void configure(const ITensor *input, ITensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + void configure(const ITensor *input, + ITensor *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); + + /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; + +namespace experimental +{ +/** Basic function to run @ref NEStridedSliceKernel */ +class NEStridedSlice : public INEOperator +{ +public: + /** Configure kernel + * + * @note Supported tensor rank: up to 4 + * + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). + * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + * A slice of size 1 starting from starts[i] in the dimension must be preserved. + */ + void configure(const ITensorInfo *input, + ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice * @@ -67,9 +154,15 @@ public: * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NE_STRIDED_SLICE_H */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h deleted file mode 100644 index b0685afd5b..0000000000 --- a/arm_compute/runtime/NEON/functions/NETableLookup.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETABLELOOKUP_H -#define ARM_COMPUTE_NETABLELOOKUP_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ILut; - -/** Basic function to run @ref NETableLookupKernel */ -class NETableLookup : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and output - * - * @param[in] input First tensor input. Data types supported: U8/S16 - * @param[in] lut Input lookup table. - * @param[out] output Output tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ILut *lut, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETABLELOOKUP_H */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h deleted file mode 100644 index c955283e9e..0000000000 --- a/arm_compute/runtime/NEON/functions/NEThreshold.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETHRESHOLD_H -#define ARM_COMPUTE_NETHRESHOLD_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEThresholdKernel */ -class NEThreshold : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the function's source, destination, thresholds and threshold type - * - * @param[in] input First tensor input. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold - * @param[in] false_value Value to assign when the condition is false - * @param[in] true_value value to assign when the condition is true - * @param[in] type Thresholding type. Can either be BINARY or RANGE. - * @param[in] upper Upper threshold. Only used with RANGE thresholding - */ - void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, - ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLD_H */ diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h index 14d4f221f1..001a0a4128 100644 --- a/arm_compute/runtime/NEON/functions/NETile.h +++ b/arm_compute/runtime/NEON/functions/NETile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,13 @@ #ifndef ARM_COMPUTE_NETILE_H #define ARM_COMPUTE_NETILE_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NETileKernel */ class NETile : public INESimpleFunctionNoBorder @@ -38,6 +38,14 @@ class NETile : public INESimpleFunctionNoBorder public: /** Set the source, destination of the kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Same as @p input * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 03c90e5b28..5d2d1f1b01 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,22 +25,42 @@ #define ARM_COMPUTE_NETRANSPOSE_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { +// Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel: - * - * -# @ref NETransposeKernel - * - */ -class NETranspose : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuTransposeKernel */ +class NETranspose : public IFunction { public: + /** Default Constructor */ + NETranspose(); + /** Default Destructor */ + ~NETranspose(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETranspose(const NETranspose &) = delete; + /** Default move constructor */ + NETranspose(NETranspose &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETranspose &operator=(const NETranspose &) = delete; + /** Default move assignment operator */ + NETranspose &operator=(NETranspose &&) = default; /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ @@ -53,7 +73,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute - #endif /* ARM_COMPUTE_NETRANSPOSE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h index dbb04f08e5..e1af96d08d 100644 --- a/arm_compute/runtime/NEON/functions/NEUnstack.h +++ b/arm_compute/runtime/NEON/functions/NEUnstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include <memory> @@ -45,10 +44,28 @@ class NEUnstack : public IFunction public: /** Default constructor */ NEUnstack(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUnstack(const NEUnstack &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEUnstack &operator=(const NEUnstack &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEUnstack(NEUnstack &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEUnstack &operator=(NEUnstack &&) = delete; + /** Default destructor */ + ~NEUnstack() = default; /** Set the input, output and unstacking axis. * - * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * + * @param[in] input A tensor to be unstacked. Data type supported: All. + * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. * Note: The number of elements of the vector will be used as the number of slices to be taken from the axis. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * @@ -56,8 +73,8 @@ public: void configure(const ITensor *input, const std::vector<ITensor *> &output_vector, int axis); /** Static function to check if given info will lead to a valid configuration of @ref NEUnstack * - * @param[in] input Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output_vector Vector of output tensors' info. Data types supported: Same as @p input. + * @param[in] input Input tensor info. Data type supported: All. + * @param[in] output_vector Vector of output tensors' info. Data types supported: same as @p input. * @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h deleted file mode 100644 index ff465e54a0..0000000000 --- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H -#define ARM_COMPUTE_NEUPSAMPLELAYER_H - -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -class ITensor; - -/** Function to run upsample layer */ -class NEUpsampleLayer : public IFunction -{ -public: - /** Constructor */ - NEUpsampleLayer(); - /** Set the input output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. - * - */ - void configure(const ITensor *input, ITensor *output, const Size2D &info, - const InterpolationPolicy &policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayer - * - * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[out] output Destination tensor info. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, - const InterpolationPolicy &policy); - - // Inherited methods overridden: - void run() override; - -private: - NEUpsampleLayerKernel _kernel; - DataLayout _data_layout; -}; -} // arm_compute -#endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h deleted file mode 100644 index 768ef0c6d3..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPAFFINE_H -#define ARM_COMPUTE_NEWARPAFFINE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEWarpAffineKernel */ -class NEWarpAffine : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] matrix The perspective matrix. Must be 2x3 of type float. - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEWARPAFFINE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h deleted file mode 100644 index 66fb9acc3f..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPPERSPECTIVE_H -#define ARM_COMPUTE_NEWARPPERSPECTIVE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEWarpPerspectiveKernel */ -class NEWarpPerspective : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEWARPPERSPECTIVE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 4a8fe61614..6caa2aeb59 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,18 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H -#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CPP/functions/CPPPermute.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" - +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -42,11 +36,11 @@ namespace arm_compute // Forward declarations class ITensor; -/** Basic function to simulate a convolution layer. This function calls the following NEON kernels: - * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) - * -# @ref NEWinogradLayerTransformInputKernel - * -# @ref NEWinogradLayerTransformOutputKernel - * -# @ref NEGEMMAssemblyDispatch +/** Basic function to simulate a convolution layer. This function calls the following kernels: + * + * -# @ref cpu::CpuWinogradConv2dTransformInputKernel + * -# @ref cpu::CpuWinogradConv2dTransformOutputKernel + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref CPPPermute (three times: weights, input and output) * * @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true @@ -56,14 +50,35 @@ class NEWinogradConvolutionLayer : public IFunction public: /** Constructor */ NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete; + /** Default move constructor */ + NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete; + /** Default move assignment operator */ + NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = default; + /** Destructor */ + ~NEWinogradConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. + * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32 + * -> 3x3 for Fp16 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. @@ -72,62 +87,35 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - bool enable_fast_math = false); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; void prepare() override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer + /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * Similar to @ref NEWinogradConvolutionLayer::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete; + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); private: - MemoryGroup _memory_group; - NEGEMM _gemm_function; - std::unique_ptr<INEKernel> _transform_input_kernel; - std::unique_ptr<INEKernel> _transform_output_kernel; - std::unique_ptr<INEKernel> _transform_weights_kernel; - NEActivationLayer _activationlayer_function; - - CPPPermute _permute_input; - CPPPermute _permute_weights; - CPPPermute _permute_output; - Tensor _input_transformed; - Tensor _output_transformed; - Tensor _input_workspace; - Tensor _output_workspace; - Tensor _kernel_storage; - Tensor _input_nhwc; - Tensor _output_nhwc; - Tensor _weights_hwio; - const ITensor *_input; - const ITensor *_weights; - ITensor *_output; - bool _is_prepared; - bool _is_activationlayer_enabled; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h deleted file mode 100644 index 5e0c34b9b1..0000000000 --- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEYOLOLAYER_H -#define ARM_COMPUTE_NEYOLOLAYER_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEYOLOLayerKernel */ -class NEYOLOLayer : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer parameters. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayer - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEYOLOLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h deleted file mode 100644 index f16bb46d35..0000000000 --- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/** Depthwise convolution assembly kernel glue */ -class NEDepthwiseConvolutionAssemblyDispatch : public IFunction -{ -public: - /** Default constructor - * - * @param[in,out] memory_manager Memory manager to use - */ - NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move constructor */ - NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move assignment operator */ - NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Default destructor */ - ~NEDepthwiseConvolutionAssemblyDispatch(); - /** Initialize the function's source, destination, kernels and border_size. - * - * @note Supports only NHWC format - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - const Size2D &dilation = Size2D(1, 1)); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch - * - * @note Supports only NHWC format - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return An error status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - const Size2D &dilation = Size2D(1, 1)); - /** Check if the optimized kernel can be used for the given kernel sizes and strides - * - * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC - * - * @param[in] input Input tensor info. - * @param[in] weights Weights tensor info. - * @param[in] conv_info Convolution layer metadata. - * @param[in] depth_multiplier (Optional) Depth multiplier to be used. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed. - */ - static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1)); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - struct LocalImpl; - -private: - MemoryGroup _memory_group; - const ITensor *_input; - const ITensor *_weights; - const ITensor *_bias; - ITensor *_output; - Tensor _packed_weights; - Tensor _workspace; - bool _is_prepared; - std::unique_ptr<LocalImpl> _pImpl; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */ |