diff options
Diffstat (limited to 'arm_compute/runtime/NEON/functions')
137 files changed, 3263 insertions, 6689 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h deleted file mode 100644 index f00b144475..0000000000 --- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H -#define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEAbsoluteDifferenceKernel - * - * @note The image data type for the inputs must be U8 or S16 - * @note The function calculates the absolute difference also when the 2 inputs have different image data types - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEAbsoluteDifference : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAbsoluteDifference() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifference(const NEAbsoluteDifference &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAbsoluteDifference(NEAbsoluteDifference &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete; - /** Default destructor */ - ~NEAbsoluteDifference(); - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); -}; -} -#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h deleted file mode 100644 index 1881411880..0000000000 --- a/arm_compute/runtime/NEON/functions/NEAccumulate.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACCUMULATE_H -#define ARM_COMPUTE_NEACCUMULATE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEAccumulateKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEAccumulate : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAccumulate() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulate(const NEAccumulate &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulate &operator=(const NEAccumulate &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulate(NEAccumulate &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulate &operator=(NEAccumulate &&) = delete; - /** Default destructor */ - ~NEAccumulate(); - /** Set the input and accumulation tensors - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *output); -}; - -/** Basic function to run @ref NEAccumulateWeightedKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEAccumulateWeighted : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAccumulateWeighted() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeighted(const NEAccumulateWeighted &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateWeighted(NEAccumulateWeighted &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete; - /** Default destructor */ - ~NEAccumulateWeighted(); - /** Set the input and accumulation tensors, and the scale value - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] alpha The input scalar value with a value input the range of [0, 1.0] - * @param[in,out] output Accumulated tensor. Data type supported: U8. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - */ - void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false); -}; - -/** Basic function to run @ref NEAccumulateSquaredKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEAccumulateSquared : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAccumulateSquared() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateSquared(const NEAccumulateSquared &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateSquared(NEAccumulateSquared &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete; - /** Default destructor */ - ~NEAccumulateSquared(); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] shift The input with a value input the range of [0, 15] - * @param[in,out] output Accumulated tensor. Data type supported: S16. - */ - void configure(const ITensor *input, uint32_t shift, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACCUMULATE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 3f410fcd8c..5584fdc783 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,17 +24,20 @@ #ifndef ARM_COMPUTE_NEACTIVATIONLAYER_H #define ARM_COMPUTE_NEACTIVATIONLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INEOperator.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IRuntimeContext.h" + +#include <memory> namespace arm_compute { // Forward declarations class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEActivationLayerKernel +/** Basic function to run @ref cpu::kernels::CpuActivationKernel * * @note The function simulates an activation layer with the specified activation function. */ @@ -59,6 +62,18 @@ public: /** [NEActivationLayer snippet] **/ /** Set the input and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 | + * |F16 |F16 | + * |F32 |F32 | + * * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result @@ -86,43 +101,5 @@ private: struct Impl; std::unique_ptr<Impl> _impl; }; - -namespace experimental -{ -/** Basic function to run @ref NEActivationLayerKernel */ -class NEActivationLayer : public INEOperator -{ -public: - /** Constructor */ - NEActivationLayer() = default; - /** Prevent instances of this class from being copied */ - NEActivationLayer(const NEActivationLayer &) = delete; - /** Default move constructor */ - NEActivationLayer(NEActivationLayer &&) = default; - /** Prevent instances of this class from being copied */ - NEActivationLayer &operator=(const NEActivationLayer &) = delete; - /** Default move assignment operator */ - NEActivationLayer &operator=(NEActivationLayer &&) = default; - /** Destructor */ - ~NEActivationLayer(); - - /** Set the input and output tensor. - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor info. Data type supported: same as @p input - * @param[in] activation_info Activation layer parameters. - */ - void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); -}; -} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAddMulAdd.h b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h new file mode 100644 index 0000000000..6c65c055dd --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAddMulAdd.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD +#define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include <memory> + +namespace arm_compute +{ +class ITensor; +class ITensorInfo; +class ActivationLayerInfo; + +/** Function to compute Add+Mul+Add fused operation */ +class NEAddMulAdd : public IFunction +{ +public: + /** Constructor */ + NEAddMulAdd(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAddMulAdd(const NEAddMulAdd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAddMulAdd(NEAddMulAdd &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAddMulAdd &operator=(const NEAddMulAdd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEAddMulAdd &operator=(NEAddMulAdd &&) = delete; + /** Destructor */ + ~NEAddMulAdd(); + /** Initialize the function's inputs and outputs. + * + * Valid data layouts: + * - Any + * + * Valid data type configurations: + * |input1 |input2 |bn_mul |bn_add |add_output |final_output | + * |:--------------|:--------------|:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 |F32 |F32 | + * + * This is what this composite function (tailored for add followed by a batch norm operation) does: + * add_output <- input1 + input2 (add) + * final_output <- add_output * bn_mul + bn_add (batch norm = mul+add) + * + * @param[in] input1 First tensor input. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input. + * @param[in] bn_mul The multiplication coefficient on the feature dimension. Data types supported: Same as @p input. + * It's one dimensional tensor with size equal to the feature maps [FM] + * @param[in] bn_add The addition coefficient on the feature dimension. Data types supported: Same as @p input. + * It's one dimensional tensor with size equal to the feature maps [FM] + * @param[out] add_output Output of the first add. Data type supported: Same as @p input. + * @param[out] final_output Output of the add+mul+add+act composite operation. Data type supported: Same as @p input. + * @param[in] policy Policy to handle overflow + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + */ + void configure(ITensor *input1, + ITensor *input2, + ITensor *bn_mul, + ITensor *bn_add, + ITensor *add_output, + ITensor *final_output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEAddMulAdd + * + * Similar to @ref NEAddMulAdd::configure() except the arguments are @ref ITensorInfo * instead of @ref ITensor * + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *bn_mul, + const ITensorInfo *bn_add, + const ITensorInfo *add_output, + const ITensorInfo *final_output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEADDMULADD */ diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index 4b13d1f44e..3bb50a0f90 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_NEARGMINMAXLAYER_H #define ARM_COMPUTE_NEARGMINMAXLAYER_H -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" @@ -33,11 +31,10 @@ namespace arm_compute { class ITensor; - /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. * - * This function calls the following NEON kernels: + * This function calls the following kernels: * * -# @ref NEReductionOperationKernel * -# @ref NEFillBorderKernel @@ -64,6 +61,18 @@ public: ~NEArgMinMaxLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------| + * |QASYMM8 |U32, S32 | + * |QASYMM8_SIGNED |U32, S32 | + * |S32 |U32, S32, S64 | + * |F16 |U32, S32 | + * |F32 |U32, S32 | + * * @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. @@ -74,7 +83,7 @@ public: * * @param[in] input Input source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. - * @param[in] output Output source tensor info. Data types supported: U32/S32. + * @param[in] output Output source tensor info. Data types supported: U32/S32/S64. * @param[in] op Operation to perform: min or max * * @return a status @@ -85,7 +94,8 @@ public: void run() override; private: - std::unique_ptr<NEReductionOperation> _reduction_function; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEARGMINMAXLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 6aaa5ff4f7..73a43dbc44 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,69 +25,17 @@ #define ARM_COMPUTE_NEARITHMETICADDITION_H #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/INEOperator.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -namespace experimental -{ -/** Basic function to run @ref NEArithmeticAdditionKernel */ -class NEArithmeticAddition : public INEOperator -{ -public: - /** Constructor */ - NEArithmeticAddition() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticAddition(const NEArithmeticAddition &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEArithmeticAddition(NEArithmeticAddition &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEArithmeticAddition &operator=(NEArithmeticAddition &&) = delete; - /** Default destructor */ - ~NEArithmeticAddition(); - /** Initialise the kernel's inputs, output and conversion policy. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 - * @param[in] policy Policy to use to handle overflow - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace experimental - -/** Basic function to run @ref NEArithmeticAdditionKernel */ +/** Basic function to run @ref cpu::kernels::CpuAddKernel */ class NEArithmeticAddition : public IFunction { public: @@ -105,19 +53,21 @@ public: NEArithmeticAddition &operator=(NEArithmeticAddition &&); /** Initialise the kernel's inputs, output and conversion policy. * - * Valid configurations (Input1,Input2) -> Output : + * Valid data layouts: + * - All * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 @@ -125,7 +75,11 @@ public: * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 @@ -136,7 +90,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -146,4 +104,4 @@ private: std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /*ARM_COMPUTE_NEARITHMETICADDITION_H */ +#endif /* ARM_COMPUTE_NEARITHMETICADDITION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index 5d2475b3a4..3e4f6356c5 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_NEARITHMETICSUBTRACTION_H #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/INEOperator.h" @@ -32,75 +33,13 @@ namespace arm_compute { class ITensor; -namespace experimental -{ -/** Basic function to run @ref NEArithmeticSubtractionKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function performs an arithmetic subtraction between two tensors. - * - * This function calls the following kernels: - * -# @ref NEArithmeticSubtractionKernel - */ -class NEArithmeticSubtraction : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (QASYMM8, QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[out] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 - * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (QASYMM8, QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 - * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace experimental - -/** Basic function to run @ref NEArithmeticSubtractionKernel +/** Basic function to run @ref cpu::kernels::CpuSubKernel * * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. * @note The function performs an arithmetic subtraction between two tensors. * * This function calls the following kernels: - * -# @ref NEArithmeticSubtractionKernel + * -# @ref cpu::kernels::CpuSubKernel */ class NEArithmeticSubtraction : public IFunction { @@ -119,13 +58,33 @@ public: NEArithmeticSubtraction &operator=(NEArithmeticSubtraction &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |S16 |S16 |S16 | + * |S32 |S32 |S32 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction * * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32 @@ -136,7 +95,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + ConvertPolicy policy, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 6d56a267a7..99e2dcadbb 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,16 @@ public: ~NEBatchNormalizationLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | + * * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. @@ -71,7 +81,13 @@ public: * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f, + void configure(ITensor *input, + ITensor *output, + const ITensor *mean, + const ITensor *var, + const ITensor *beta = nullptr, + const ITensor *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayer * @@ -88,10 +104,14 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *mean, + const ITensorInfo *var, + const ITensorInfo *beta = nullptr, + const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -99,5 +119,5 @@ public: private: std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h index c2fd26d34c..ebed0bea29 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NEBATCHTOSPACELAYER_H #define ARM_COMPUTE_NEBATCHTOSPACELAYER_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute @@ -52,10 +51,22 @@ public: ~NEBatchToSpaceLayer() = default; /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:---------|:---------|:----------| + * |All |s32 |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input + * + * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release */ + ARM_COMPUTE_DEPRECATED_REL(23.05) void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); /** Set the input and output tensors. (Static block shape). * @@ -63,8 +74,13 @@ public: * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed */ - void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output); + void configure(const ITensor *input, + int32_t block_shape_x, + int32_t block_shape_y, + ITensor *output, + const CropInfo &crop_info = CropInfo{}); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. @@ -72,7 +88,9 @@ public: * @param[out] output Tensor output info. Data types supported: same as @p input * * @return a status + * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release */ + ARM_COMPUTE_DEPRECATED_REL(23.05) static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape). * @@ -80,10 +98,15 @@ public: * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output info. Data types supported: same as @p input + * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed * * @return a status */ - static Status validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + int32_t block_shape_x, + int32_t block_shape_y, + const ITensorInfo *output, + const CropInfo &crop_info = CropInfo{}); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEBATCHTOSPACELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h index 3203d2b9a7..1f95f193d3 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,14 @@ public: ~NEBitwiseAnd() = default; /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h index 9fa0d38caf..c66bebf7cc 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ class NEBitwiseNot : public INESimpleFunctionNoBorder public: /** Initialise the kernel's input and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input Input tensor. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h index fba6b784de..183df212e4 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ class NEBitwiseOr : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h index c6cb584284..126aaa6ddd 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,14 @@ class NEBitwiseXor : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h index de8dfef4ed..aa41fc0df2 100644 --- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h +++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,16 +32,23 @@ namespace arm_compute class ITensor; class ITensorInfo; -/** Basic function to run @ref NEBoundingBoxTransformKernel. - * - * This function calls the following Neon kernels: - * -# @ref NEBoundingBoxTransformKernel - */ +/** Basic function to run @ref NEBoundingBoxTransformKernel. */ class NEBoundingBoxTransform : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM16 |QASYMM8 |QASYMM16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. @@ -50,7 +57,8 @@ public: * * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. */ - void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); + void + configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEBoundingBoxTransform * @@ -64,7 +72,10 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + static Status validate(const ITensorInfo *boxes, + const ITensorInfo *pred_boxes, + const ITensorInfo *deltas, + const BoundingBoxTransformInfo &info); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEBOUNDINGBOXTRANSFORM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h deleted file mode 100644 index d65c2be885..0000000000 --- a/arm_compute/runtime/NEON/functions/NEBox3x3.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOX3x3_H -#define ARM_COMPUTE_NEBOX3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute box filter 3x3. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEBox3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEBox3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's input, output and border mode. - * - * @note The border handler is run on the input tensor. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); -}; -} -#endif /*ARM_COMPUTE_NEBOX3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h deleted file mode 100644 index 7cdb8ee38e..0000000000 --- a/arm_compute/runtime/NEON/functions/NECannyEdge.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECANNYEDGE_H -#define ARM_COMPUTE_NECANNYEDGE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEGradientKernel; -class NEFillBorderKernel; -class NEEdgeNonMaxSuppressionKernel; -class NEEdgeTraceKernel; - -/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions: - * - * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) - * -# @ref NESobel3x3 (if gradient_size == 3) or - * @ref NESobel5x5 (if gradient_size == 5) or - * @ref NESobel7x7 (if gradient_size == 7) - * -# @ref NEGradientKernel - * -# @ref NEEdgeNonMaxSuppressionKernel - * -# @ref NEEdgeTraceKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - * - */ -class NECannyEdge : public IFunction -{ -public: - /** Constructor - * - * Initialize Sobel kernel to nullptr. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECannyEdge(const NECannyEdge &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECannyEdge &operator=(const NECannyEdge &) = delete; - /** Default destructor */ - ~NECannyEdge(); - /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] upper_thr Upper threhold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis. - * @param[in] gradient_size Gradient size (3, 5 or 7) - * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel */ - std::unique_ptr<NEGradientKernel> _gradient; /**< Gradient kernel */ - std::unique_ptr<NEEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel */ - std::unique_ptr<NEEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel */ - std::unique_ptr<NEFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ - std::unique_ptr<NEFillBorderKernel> _border_edge_trace; /**< Fill border before edge trace */ - Tensor _gx; /**< Source tensor - Gx component */ - Tensor _gy; /**< Source tensor - Gy component */ - Tensor _magnitude; /**< Source tensor - Magnitude */ - Tensor _phase; /**< Source tensor - Phase */ - Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ - ITensor *_output; /**< Output tensor provided by the user. */ -}; -} -#endif /* ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h index e536317660..43cae777f6 100644 --- a/arm_compute/runtime/NEON/functions/NECast.h +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,33 +25,51 @@ #define ARM_COMPUTE_NECAST_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -/** Basic function to run @ref NEDepthConvertLayerKernel. +/** Basic function to run @ref cpu::kernels::CpuCastKernel. * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values. */ -class NECast : public INESimpleFunctionNoBorder +class NECast : public IFunction { public: + /** Constructor */ + NECast(); + /** Destructor */ + ~NECast(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECast(const NECast &) = delete; + /** Default move constructor */ + NECast(NECast &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECast &operator=(const NECast &) = delete; + /** Default move assignment operator */ + NECast &operator=(NECast &&); /** Initialize the function's source, destination * - * Input data type must be different than output data type. + * Valid data layouts: + * - All * - * Valid conversions Input -> Output : + * Valid data type configurations: + * |src |dst | + * |:--------------|:-----------------------------------------------| + * |QASYMM8_SIGNED | S16, S32, F32, F16 | + * |QASYMM8 | U16, S16, S32, F32, F16 | + * |U8 | U16, S16, S32, F32, F16 | + * |U16 | U8, U32 | + * |S16 | QASYMM8_SIGNED, U8, S32 | + * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 | + * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 | + * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8| * - * - QASYMM8_SIGNED -> S16, S32, F32, F16 - * - QASYMM8 -> U16, S16, S32, F32, F16 - * - U8 -> U16, S16, S32, F32, F16 - * - U16 -> U8, U32 - * - S16 -> QASYMM8_SIGNED, U8, S32 - * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32. @@ -66,7 +84,14 @@ public: * * @return a status */ - static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NECAST_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h deleted file mode 100644 index c4ead73343..0000000000 --- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELCOMBINE_H -#define ARM_COMPUTE_NECHANNELCOMBINE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEChannelCombine : public INESimpleFunctionNoBorder -{ -public: - /** Initialize function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 - * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - */ - void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); - /** Initialize function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444 - */ - void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECHANNELCOMBINE_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h deleted file mode 100644 index 99522d2d74..0000000000 --- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELEXTRACT_H -#define ARM_COMPUTE_NECHANNELEXTRACT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEChannelExtract : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function's source, destination - * - * @param[in] input The input tensor to extract the channel from. Formats supported: Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel The channel to extract. - * @param[out] output The extracted channel. Format supported: U8 - */ - void configure(const ITensor *input, Channel channel, ITensor *output); - /** Initialize the function's source, destination - * - * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel The channel to extract. - * @param[out] output The extracted channel. Format supported: U8 - */ - void configure(const IMultiImage *input, Channel channel, IImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECHANNELEXTRACT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h index aa11396c20..bc19e1a4af 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,15 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder public: /** Initialize the function * + * Valid data layouts: + * - NCHW + * - NHWC + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h deleted file mode 100644 index 69459a83c1..0000000000 --- a/arm_compute/runtime/NEON/functions/NECol2Im.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOL2IM_H -#define ARM_COMPUTE_NECOL2IM_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NECol2Im */ -class NECol2Im : public INESimpleFunctionNoBorder -{ -public: - /** Configure the col2im NEON kernel - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - */ - void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); - /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECOL2IM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h deleted file mode 100644 index 8974aa63a1..0000000000 --- a/arm_compute/runtime/NEON/functions/NEColorConvert.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOLORCONVERT_H -#define ARM_COMPUTE_NECOLORCONVERT_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class IMultiImage; -using IImage = ITensor; - -/**Basic function to run @ref NEColorConvertKernel to perform color conversion - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEColorConvert : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function's source, destination - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ITensor *input, ITensor *output); - /** Initialize the function's source, destination - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const IMultiImage *input, IImage *output); - /** Initialize the function's source, destination - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const IImage *input, IMultiImage *output); - /** Initialize the function's source, destination - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const IMultiImage *input, IMultiImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOLORCONVERT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h deleted file mode 100644 index b63243fec6..0000000000 --- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H -#define ARM_COMPUTE_NECOMPUTEALLANCHORS_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NEComputeAllAnchorsKernel. - * - * This function calls the following NEON kernels: - * -# @ref NEComputeAllAnchorsKernel - */ -class NEComputeAllAnchors : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel - * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32 - * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - * @return a Status - */ - static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECOMPUTEALLANCHORS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index fd35d0bc46..1600f85488 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,10 @@ #ifndef ARM_COMPUTE_NECONCATENATELAYER_H #define ARM_COMPUTE_NECONCATENATELAYER_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INEOperator.h" -#include "support/Requires.h" +#include "arm_compute/runtime/IFunction.h" #include <memory> -#include <vector> namespace arm_compute { @@ -40,13 +36,7 @@ class ITensor; class ITensorInfo; class Status; -/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: - * - * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). - * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). - * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3). - */ +/** Basic function to execute concatenate tensors along a given axis */ class NEConcatenateLayer : public IFunction { public: @@ -64,8 +54,20 @@ public: NEConcatenateLayer &operator=(NEConcatenateLayer &&); /** Initialise the kernel's inputs vector and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel, + * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel. * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. @@ -75,7 +77,8 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel, + * @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel. * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. @@ -83,7 +86,8 @@ public: * * @return a status */ - static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); + static Status + validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: void run() override; @@ -92,62 +96,5 @@ private: struct Impl; std::unique_ptr<Impl> _impl; }; - -namespace experimental -{ -/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: - * - * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). - * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). - * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3). - */ -class NEConcatenation : public INEOperator -{ -public: - /** Constructor */ - NEConcatenation(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConcatenation(const NEConcatenation &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConcatenation &operator=(const NEConcatenation &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConcatenation(NEConcatenation &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConcatenation &operator=(NEConcatenation &&) = delete; - /** Default destructor */ - ~NEConcatenation() = default; - /** Initialise the kernel's inputs vector and output. - * - * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - */ - void configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis); - /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer - * - * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. - * - * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - * - * @return a status - */ - static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - -private: - std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels; - unsigned int _num_inputs; - unsigned int _axis; -}; -} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NECONCATENATELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConv3D.h b/arm_compute/runtime/NEON/functions/NEConv3D.h new file mode 100644 index 0000000000..525f37f3e7 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConv3D.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NECONV3D_H +#define ARM_COMPUTE_NECONV3D_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Basic function to simulate a 3d convolution. This function calls one of the following functions: + * -# @ref cpu::CpuDirectConv3d + * + */ +class NEConv3D : public IFunction +{ +public: + /** Constructor */ + NEConv3D(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConv3D(const NEConv3D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConv3D &operator=(const NEConv3D &) = delete; + /** Default move constructor */ + NEConv3D(NEConv3D &&) = default; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEConv3D &operator=(NEConv3D &&) = default; + /** Default destructor */ + ~NEConv3D(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NDHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. 4 lower dimensions represent a single input [IFM, width, height, depth], + * while every optional dimension from 5 and above represent a batch of inputs. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [OFM, IFM, kernel_x, kernel_y, kernel_z]. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * @param[out] output Destination tensor. 4 lower dimensions represent a single output [OFM, width, height, depth], while the rest represent batch of outputs. + * @param[in] conv_info Contains padding, stride, acitvation information described in @ref Conv3dInfo. + */ + void configure( + ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv3dInfo &conv_info); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to NEConv3D::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const Conv3dInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_NECONV3D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h index 984e8d68c0..dc6b22d717 100644 --- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,16 @@ #ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H #define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/ITransformWeights.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" -#include <memory> namespace arm_compute { // Forward declarations class ITensor; -class NEConvertFullyConnectedWeightsKernel; +class ITensorInfo; -/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ +/** Basic function to run @ref cpu::kernels::CpuConvertFullyConnectedWeightsKernel. */ class NEConvertFullyConnectedWeights : public IFunction { public: @@ -54,12 +51,22 @@ public: ~NEConvertFullyConnectedWeights(); /** Initialize the function. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. */ - void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); + void + configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights * * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. @@ -69,53 +76,17 @@ public: * * @return A Status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const TensorShape &original_input_shape, + DataLayout data_layout); // Inherited methods overriden: void run() override; private: - std::unique_ptr<NEConvertFullyConnectedWeightsKernel> _kernel; -}; - -namespace weights_transformations -{ -/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ -class NEConvertFullyConnectedWeightsManaged : public ITransformWeights -{ -public: - void run() override - { - _output.allocator()->allocate(); - _func.run(); - _reshape_run = true; - } - - void release() override - { - _output.allocator()->free(); - } - - ITensor *get_weights() override - { - return &_output; - } - - uint32_t uid() override - { - return _uid; - } - - void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout) - { - _func.configure(input, &_output, original_input_shape, data_layout); - } - -private: - static constexpr uint32_t _uid = 0x4; - Tensor _output{}; - NEConvertFullyConnectedWeights _func{}; + struct Impl; + std::unique_ptr<Impl> _impl; }; -} // namespace weights_transformations } // namespace arm_compute #endif /* ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h deleted file mode 100644 index afd654a595..0000000000 --- a/arm_compute/runtime/NEON/functions/NEConvolution.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVOLUTION_H -#define ARM_COMPUTE_NECONVOLUTION_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEFillBorderKernel; -template <unsigned int matrix_size> -class NEConvolutionKernel; -template <unsigned int matrix_size> -class NESeparableConvolutionHorKernel; -template <unsigned int matrix_size> -class NESeparableConvolutionVertKernel; - -/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEConvolution3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEConvolution3x3 : public INESimpleFunction -{ -public: - /** Constructor */ - NEConvolution3x3() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolution3x3(const NEConvolution3x3 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolution3x3(NEConvolution3x3 &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete; - /** Default destructor */ - ~NEConvolution3x3(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8/S16. - * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); -}; - -/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEConvolutionKernel or<br/> - * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable) - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -template <unsigned int matrix_size> -class NEConvolutionSquare : public IFunction -{ -public: - /** Default constructor */ - NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionSquare(const NEConvolutionSquare &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionSquare(NEConvolutionSquare &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete; - /** Default destructor */ - ~NEConvolutionSquare(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8 or S16. - * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function memory group */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - bool _is_separable; /**< true if the convolution can be separated */ - std::unique_ptr<NESeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */ - std::unique_ptr<NESeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */ - std::unique_ptr<NEConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/ - std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel for border handling */ -}; - -/** Basic function to run 5x5 convolution. */ -using NEConvolution5x5 = NEConvolutionSquare<5>; -/** Basic function to run 7x7 convolution. */ -using NEConvolution7x7 = NEConvolutionSquare<7>; -/** Basic function to run 9x9 convolution. */ -using NEConvolution9x9 = NEConvolutionSquare<9>; - -/** Basic function to execute non-square convolution. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEConvolutionRectangleKernel or<br/> - * - * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEConvolutionRectangle : public INESimpleFunction -{ -public: - /** Constructor */ - NEConvolutionRectangle() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionRectangle(const NEConvolutionRectangle &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionRectangle(NEConvolutionRectangle &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete; - /** Default destructor */ - ~NEConvolutionRectangle(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8 or S16. - * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. - * @param[in] rows Rows of convolution kernel. - * @param[in] cols Columns of convolution kernel. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NECONVOLUTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index a061dc7b04..2d07980ade 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NECONVOLUTIONLAYER_H -#define ARM_COMPUTE_NECONVOLUTIONLAYER_H - -#include "arm_compute/runtime/IFunction.h" +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include <memory> @@ -37,10 +37,10 @@ namespace arm_compute // Forward declarations class ITensor; -/** Basic function to simulate a convolution layer. This function calls one of the following NEON functions: - * -# @ref NEGEMMConvolutionLayer (executed only in case GEMM is required for the operation) - * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation) - * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation) +/** Basic function to simulate a convolution layer. This function calls one of the following functions: + * -# @ref cpu::CpuGemmConv2d (executed only in case GEMM is required for the operation) + * -# @ref cpu::CpuWinogradConv2d (executed only in case Winograd is required for the operation) + * -# @ref cpu::CpuDirectConv2d (executed only in case Direct Convolution is required for the operation) * -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation) * * @@ -78,46 +78,70 @@ public: NEConvolutionLayer(const NEConvolutionLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete; + /** Default move constructor */ + NEConvolutionLayer(NEConvolutionLayer &&) = default; /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionLayer(NEConvolutionLayer &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete; + NEConvolutionLayer &operator=(NEConvolutionLayer &&) = default; /** Default destructor */ - ~NEConvolutionLayer() = default; + ~NEConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation @@ -126,20 +150,28 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, - unsigned int num_groups = 1); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation @@ -147,15 +179,21 @@ public: * * @return the Convolution Method Hint */ - static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + static ConvolutionMethod get_convolution_method(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; void prepare() override; private: - std::shared_ptr<IMemoryManager> _memory_manager; - std::unique_ptr<IFunction> _function; /**< Function to run */ + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */
\ No newline at end of file +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NECONVOLUTIONLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h index a58ac9e620..840c03e968 100644 --- a/arm_compute/runtime/NEON/functions/NECopy.h +++ b/arm_compute/runtime/NEON/functions/NECopy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,31 +25,41 @@ #define ARM_COMPUTE_NECOPY_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -/** Basic function to run @ref NECopyKernel */ -class NECopy : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuCopyKernel */ +class NECopy : public IFunction { public: - /** Constructor */ - NECopy() = default; + /** Default Constructor */ + NECopy(); + /** Default Destructor */ + ~NECopy(); /** Prevent instances of this class from being copied (As this class contains pointers) */ NECopy(const NECopy &) = delete; + /** Default move constructor */ + NECopy(NECopy &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ NECopy &operator=(const NECopy &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NECopy(NECopy &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NECopy &operator=(NECopy &&) = delete; - /** Default destructor */ - ~NECopy(); + /** Default move assignment operator */ + NECopy &operator=(NECopy &&); /** Initialise the function's source and destination. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. * @@ -63,6 +73,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NECOPY_H */ diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h index 5c3733f8ee..f806762158 100644 --- a/arm_compute/runtime/NEON/functions/NECropResize.h +++ b/arm_compute/runtime/NEON/functions/NECropResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ namespace arm_compute { // Forward Declarations +class Tensor; class ITensor; class NECropKernel; @@ -53,6 +54,14 @@ public: /** Configure kernel * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------|:--------|:------|:--------| + * |All |F32 |F32 |F32 | + * * @note Supported tensor rank: up to 4 * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used. * @note Start and end indices of boxes are inclusive. @@ -66,8 +75,13 @@ public: * @param[in] method The policy to be used when resizing image. Default is bilinear. * @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0. */ - void configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size, - InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0); + void configure(const ITensor *input, + const ITensor *boxes, + const ITensor *box_ind, + ITensor *output, + Coordinates2D crop_size, + InterpolationPolicy method = InterpolationPolicy::BILINEAR, + float extrapolation_value = 0); /** Static function to check if given info will lead to a valid configuration of @ref NESlice * @@ -87,8 +101,13 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output, - Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value); + static Status validate(const ITensorInfo *input, + const ITensorInfo *boxes, + const ITensorInfo *box_ind, + const ITensorInfo *output, + Coordinates2D crop_size, + InterpolationPolicy method, + float extrapolation_value); void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 97b1a47f64..aabe42f928 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,15 +24,14 @@ #ifndef ARM_COMPUTE_NEDECONVOLUTIONLAYER_H #define ARM_COMPUTE_NEDECONVOLUTIONLAYER_H -#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEReverse.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReverse.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -64,11 +63,10 @@ namespace arm_compute * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse. * - * This function calls the following NEON kernels/functions: + * This function calls the following kernels/functions: * * -# @ref CPPUpsample * -# @ref NEConvolutionLayer - * -# @ref NEPermute * -# @ref NEReverse * */ @@ -77,39 +75,77 @@ class NEDeconvolutionLayer : public IFunction public: /** Constructor */ NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete; + /** Default move constructor */ + NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NEDeconvolutionLayer(NEDeconvolutionLayer &&) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = delete; + /** Default move assignment operator */ + NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default; /** Default destructor */ - virtual ~NEDeconvolutionLayer() = default; + ~NEDeconvolutionLayer() = default; /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. + * Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. + * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for + * the GEMM convolution. * */ - void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *bias, + ITensor *output, + const PadStrideInfo &info, + bool enable_fast_math = false, + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. + * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for + * the GEMM convolution. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *bias, + const ITensorInfo *output, + const PadStrideInfo &info, + bool enable_fast_math = false, + const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: void run() override; @@ -127,6 +163,7 @@ private: ITensor *_input; PadStrideInfo _info; bool _is_prepared; + bool _do_upsampling; }; -} // arm_compute +} // namespace arm_compute #endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index c9817a63c1..7bfdfbd13d 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,38 +25,48 @@ #define ARM_COMPUTE_NEDEPTHCONVERT_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" -#include <cstdint> +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -/**Basic function to run @ref NEDepthConvertLayerKernel */ -class NEDepthConvertLayer : public INESimpleFunctionNoBorder +/**Basic function to run @ref cpu::kernels::CpuCastKernel */ +class NEDepthConvertLayer : public IFunction { public: - /* Contructor */ - NEDepthConvertLayer() = default; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ + /** Constructor */ + NEDepthConvertLayer(); + /** Destructor */ + ~NEDepthConvertLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDepthConvertLayer(const NEDepthConvertLayer &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete; - /** Default destructor */ - ~NEDepthConvertLayer() = default; + /** Default move constructor */ + NEDepthConvertLayer(NEDepthConvertLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete; + /** Default move assignment operator */ + NEDepthConvertLayer &operator=(NEDepthConvertLayer &&); /** Initialize the function's source, destination * - * Valid conversions Input -> Output : + * Valid data layouts: + * - All * - * - QASYMM8 -> F16, F32 - * - U8 -> U16, S16, S32 - * - U16 -> U8, U32 - * - S16 -> U8, S32 - * - BFLOAT16 -> F32 - * - F16 -> QASYMM8, F32 - * - F32 -> QASYMM8, F16, BFLOAT16 + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------------------| + * |QASYMM8 | F16, F32 | + * |U8 | U16, S16, S32 | + * |U16 | U8, U32 | + * |S16 | U8, S32 | + * |BFLOAT16 | F32 | + * |F16 | QASYMM8, F32 | + * |F32 | QASYMM8, F16, BFLOAT16 | + * + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. @@ -73,7 +83,15 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEDEPTHCONVERT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h index 51f7ff7770..d27369670e 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,26 +21,27 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYER_H -#define ARM_COMPUTE_NEDEPTHTOSPACELAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include <memory> namespace arm_compute { // Forward declarations class ITensor; class ITensorInfo; +class NEDepthToSpaceLayerKernel; /** Basic function to run @ref NEDepthToSpaceLayerKernel. */ -class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder +class NEDepthToSpaceLayer : public IFunction { public: /** Constructor */ - NEDepthToSpaceLayer() = default; + NEDepthToSpaceLayer(); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -50,9 +51,18 @@ public: /** Prevent instances of this class from being moved (As this class contains non movable objects) */ NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete; /** Default destructor */ - ~NEDepthToSpaceLayer() = default; + ~NEDepthToSpaceLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. @@ -67,6 +77,11 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + void run() override; + +private: + std::unique_ptr<NEDepthToSpaceLayerKernel> _kernel; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEDEPTHTOSPACELAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index dc70aec7ff..6ad5aa7bfa 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,11 @@ #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" + #include <memory> namespace arm_compute @@ -54,6 +56,20 @@ public: ~NEDepthwiseConvolutionLayer(); /** Initialize the function's source, destination, weights and convolution information. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. @@ -65,8 +81,14 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer * @@ -83,40 +105,27 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; void prepare() override; private: - /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a Depthwise Convolution Function - */ - static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); - - /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels: + /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels: * * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported * * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present - * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present + * -# @ref cpu::CpuDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required * -# @ref NEActivationLayer if fused activation is required * @@ -131,9 +140,11 @@ private: /** Default move constructor */ NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; + NEDepthwiseConvolutionLayerOptimizedInternal & + operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; /** Default move assignment operator */ - NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; + NEDepthwiseConvolutionLayerOptimizedInternal & + operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; /** Default destructor */ ~NEDepthwiseConvolutionLayerOptimizedInternal() = default; /** Initialize the function's source, destination, kernels and border_size. @@ -148,8 +159,14 @@ private: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3 * @@ -165,34 +182,26 @@ private: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - const ITensor *_original_weights; - bool _has_bias; - bool _is_quantized; - bool _is_nchw; - bool _permute; - bool _is_activationlayer_enabled; - bool _is_prepared; + MemoryGroup _memory_group; + struct Impl; + std::unique_ptr<Impl> _impl; }; - /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel: + /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: * * -# @ref NEDepthwiseConvolutionLayerNativeKernel * @@ -225,8 +234,14 @@ private: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric * @@ -243,31 +258,25 @@ private: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + unsigned int depth_multiplier = 1, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overriden: void run() override; - void prepare() override; private: - std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_prepared; - bool _is_nchw; - bool _is_activationlayer_enabled; - const ITensor *_original_weights; + struct Impl; + std::unique_ptr<Impl> _impl; }; - - DepthwiseConvolutionFunction _depth_conv_func; - NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized; - NEDepthwiseConvolutionLayerGeneric _func_generic; + MemoryGroup _memory_group; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
\ No newline at end of file +#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index f52d709c74..8b49930ef5 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,10 @@ #ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H #define ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { @@ -34,12 +35,36 @@ namespace arm_compute class ITensor; class ITensorInfo; -/** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */ -class NEDequantizationLayer : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::CpuDequantize that dequantizes an input tensor */ +class NEDequantizationLayer : public IFunction { public: + /** Default Constructor */ + NEDequantizationLayer(); + /** Default Destructor */ + ~NEDequantizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayer(const NEDequantizationLayer &) = delete; + /** Default move constructor */ + NEDequantizationLayer(NEDequantizationLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayer &operator=(const NEDequantizationLayer &) = delete; + /** Default move assignment operator */ + NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default; /** Configure the kernel. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------------------|:-----------| + * |QASYMM8 |F16, F32 | + * |QASYMM8_SIGNED |F16, F32 | + * |QSYMM8_PER_CHANNEL |F16, F32 | + * |QSYMM8 |F16, F32 | + * |QSYMM16 |F16, F32 | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ @@ -52,6 +77,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h deleted file mode 100644 index b14e38a23a..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDerivative.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDERIVATIVE_H -#define ARM_COMPUTE_NEDERIVATIVE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" - -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEDerivativeKernel; -class NEFillBorderKernel; - -/** Basic function to execute first order derivative operator. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEDerivativeKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEDerivative : public IFunction -{ -public: - /** Default constructor */ - NEDerivative(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivative(const NEDerivative &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivative &operator=(const NEDerivative &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEDerivative(NEDerivative &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEDerivative &operator=(NEDerivative &&) = delete; - /** Default destructor */ - ~NEDerivative(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16. - * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16. - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEDerivativeKernel> _kernel; /**< Derivative kernel */ - std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /* ARM_COMPUTE_NEDERIVATIVE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h index d5c1f0ab6f..7a94833d10 100644 --- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,13 +24,12 @@ #ifndef ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H #define ARM_COMPUTE_NE_DETECTION_POSTPROCESS_H -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/functions/CPPDetectionPostProcessLayer.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/runtime/Tensor.h" #include <map> @@ -57,6 +56,16 @@ public: ~NEDetectionPostProcessLayer() = default; /** Configure the detection output layer NE function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src2 |dst0 - dst3 | + * |:--------------|:--------------| + * |QASYMM8 |F32 | + * |QASYMM8_SIGNED |F32 | + * |F32 |F32 | + * * @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. * @param[in] input_score The class prediction input tensor. Data types supported: same as @p input_box_encoding. * @param[in] input_anchors The anchors input tensor. Data types supported: same as @p input_box_encoding. @@ -68,8 +77,14 @@ public: * * @note Output contains all the detections. Of those, only the ones selected by the valid region are valid. */ - void configure(const ITensor *input_box_encoding, const ITensor *input_score, const ITensor *input_anchors, - ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); + void configure(const ITensor *input_box_encoding, + const ITensor *input_score, + const ITensor *input_anchors, + ITensor *output_boxes, + ITensor *output_classes, + ITensor *output_scores, + ITensor *num_detection, + DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDetectionPostProcessLayer * * @param[in] input_box_encoding The bounding box input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32. @@ -83,8 +98,13 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input_box_encoding, const ITensorInfo *input_class_score, const ITensorInfo *input_anchors, - ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection, + static Status validate(const ITensorInfo *input_box_encoding, + const ITensorInfo *input_class_score, + const ITensorInfo *input_anchors, + ITensorInfo *output_boxes, + ITensorInfo *output_classes, + ITensorInfo *output_scores, + ITensorInfo *num_detection, DetectionPostProcessLayerInfo info = DetectionPostProcessLayerInfo()); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h deleted file mode 100644 index 1f2bcb50ea..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDilate.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDILATE_H -#define ARM_COMPUTE_NEDILATE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute dilate. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEDilateKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEDilate : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and border mode. - * - * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEDILATE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h index 5b6ed55be2..3ae3b2a15c 100644 --- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,27 +25,22 @@ #define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { -class NEDirectConvolutionLayerOutputStageKernel; -class NEDirectConvolutionLayerKernel; -class NEFillBorderKernel; - +class ITensor; +class ITensorInfo; /** Function to run the direct convolution. * - * This function calls the following NEON kernels: + * This function calls the following: * - * -# @ref NEFillBorderKernel for the input - * -# @ref NEDirectConvolutionLayerOutputStageKernel - * -# @ref NEDirectConvolutionLayerKernel + * -# @ref cpu::CpuDirectConv2d */ class NEDirectConvolutionLayer : public IFunction { @@ -64,6 +59,16 @@ public: ~NEDirectConvolutionLayer(); /** Set the input, weights, biases and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * * @note: DirectConvolution only works in the following configurations: * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 @@ -80,7 +85,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *bias, + ITensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer * * @note: DirectConvolution only works in the following configurations: @@ -101,23 +111,20 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info, + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *bias, + const ITensorInfo *output, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; - std::unique_ptr<NEDirectConvolutionLayerOutputStageKernel> _output_stage_kernel; - std::unique_ptr<NEDirectConvolutionLayerKernel> _conv_kernel; - std::unique_ptr<NEFillBorderKernel> _input_border_handler; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - bool _has_bias; - bool _is_activationlayer_enabled; - unsigned int _dim_split; - bool _is_padding_required; + struct Impl; + std::shared_ptr<IMemoryManager> _memory_manager; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index 5c755e96ac..ebf2277d1f 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/INEOperator.h" @@ -32,7 +33,7 @@ namespace arm_compute { class ITensor; -/** Basic function to run @ref NEArithmeticOperationKernel for max +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a max operation between two tensors. @@ -54,13 +55,29 @@ public: NEElementwiseMax &operator=(NEElementwiseMax &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -69,7 +86,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -79,7 +99,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for min +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a min operation between two tensors. @@ -101,13 +121,29 @@ public: NEElementwiseMin &operator=(NEElementwiseMin &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -116,7 +152,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -126,7 +165,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for squared difference +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference * * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 @@ -148,13 +187,29 @@ public: NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference * * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -163,7 +218,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -173,7 +231,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for division +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division * * @note The tensor data type for the inputs must be F16/F32. * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i]) @@ -195,13 +253,25 @@ public: NEElementwiseDivision &operator=(NEElementwiseDivision &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division * * @param[in] input1 First tensor input info. Data types supported: F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -210,7 +280,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -220,7 +293,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEArithmeticOperationKernel for power +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power * * @note The tensor data type for the inputs must be F16/F32. * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) @@ -243,13 +316,25 @@ public: NEElementwisePower &operator=(NEElementwisePower &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * * @param[in, out] input1 First tensor input. Data types supported: F16/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: Same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power * * @param[in] input1 First tensor input info. Data types supported: F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -258,7 +343,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -268,7 +356,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEComparisonOperationKernel. +/** Basic function to run @ref cpu::kernels::CpuComparisonKernel. * * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. @@ -290,13 +378,27 @@ public: NEElementwiseComparison &operator=(NEElementwiseComparison &&); /** Initialise the kernel's inputs, output and conversion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:-----| + * |QASYMM8 |QASYMM8 |U8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |U8 | + * |S32 |S32 |U8 | + * |U8 |U8 |U8 | + * |S16 |S16 |U8 | + * |F16 |F16 |U8 | + * |F32 |F32 |U8 | + * * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. * @param[out] output Output tensor. Data types supported: U8. * @param[in] op Comparison Operation to be performed. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -305,7 +407,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op); + static Status + validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op); // Inherited methods overridden: void run() override; @@ -315,7 +418,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEComparisonOperationKernel +/** Basic function to run @ref cpu::kernels::CpuComparisonKernel * * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @note The function performs a comparison operation between two tensors. @@ -343,7 +446,7 @@ public: * @param[out] output Output tensor. Data types supported: U16/U32. */ void configure(ITensor *input1, ITensor *input2, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel * * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -374,206 +477,5 @@ using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>; /** Basic function to run less-equal comparison. */ using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>; -namespace experimental -{ -/** Basic function to run @ref NEArithmeticOperationKernel for max - * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @note The function performs a max operation between two tensors. - */ -class NEElementwiseMax : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; - -/** Basic function to run @ref NEArithmeticOperationKernel for min - * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @note The function performs a min operation between two tensors. - */ -class NEElementwiseMin : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; - -/** Basic function to run @ref NEArithmeticOperationKernel for squared difference - * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 - */ -class NEElementwiseSquaredDiff : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; - -/** Basic function to run @ref NEArithmeticOperationKernel for division - * - * @note The tensor data type for the inputs must be S32/F16/F32. - * @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i]) - */ -class NEElementwiseDivision : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division - * - * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; - -/** Basic function to run @ref NEArithmeticOperationKernel for power - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) - * @note For an exponent that is a float, this function will only work with a positive base. - */ -class NEElementwisePower : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: F16/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; - -/** Basic function to run @ref NEComparisonOperationKernel. - * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @note The function performs a comparison operation between two tensors. - */ -class NEElementwiseComparison : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: U16/U32. - * @param[in] op Comparison Operation to be performed. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. - * @param[in] op Comparison Operation to be performed. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op); -}; - -/** Basic function to run @ref NEComparisonOperationKernel - * - * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @note The function performs a comparison operation between two tensors. - */ -template <ComparisonOperation op> -class NEElementwiseComparisonStatic : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: U16/U32. - */ - void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; - -/** Basic function to run equal comparison. */ -using NEEqual = NEElementwiseComparisonStatic<ComparisonOperation::Equal>; -/** Basic function to run not equal comparison. */ -using NENotEqual = NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>; -/** Basic function to run greater comparison. */ -using NEGreater = NEElementwiseComparisonStatic<ComparisonOperation::Greater>; -/** Basic function to run greater-equal comparison. */ -using NEGreaterEqual = NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>; -/** Basic function to run less comparison. */ -using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>; -/** Basic function to run less-equal comparison. */ -using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>; -} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h index 46a7316705..63e47b8377 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,151 +25,72 @@ #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H #include "arm_compute/core/Error.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; - -/** Basic function to perform inverse square root on an input tensor. */ -class NERsqrtLayer : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Basic function to perform exponential on an input tensor. */ -class NEExpLayer : public INESimpleFunctionNoBorder +/** Basic function to perform unary elementwise operations */ +template <ElementWiseUnary op> +class NEElementwiseUnaryLayer : public IFunction { public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEExpLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; + /** Default Constructor */ + NEElementwiseUnaryLayer(); + /** Default Destructor */ + ~NEElementwiseUnaryLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryLayer(const NEElementwiseUnaryLayer &) = delete; + /** Default move constructor */ + NEElementwiseUnaryLayer(NEElementwiseUnaryLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryLayer &operator=(const NEElementwiseUnaryLayer &) = delete; + /** Default move assignment operator */ + NEElementwiseUnaryLayer &operator=(NEElementwiseUnaryLayer &&); -/** Basic function to negate an input tensor. */ -class NENegLayer : public INESimpleFunctionNoBorder -{ -public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NENegLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. + * Valid data layouts: + * - All * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Basic function to compute the natural logarithm of an input tensor. */ -class NELogLayer : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. + * @param[in] input Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. + * @param[out] output Output tensor. Data types supported: Same as @p input. */ void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELogLayer + /** Static function to check if given info will lead to a valid configuration * - * @param[in] input First tensor input info. Data types supported: F16/F32. + * @param[in] input Input tensor info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations. * @param[in] output Output tensor info. Data types supported: Same as @p input. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; + // Inherited methods overridden: + void run() override; -/** Basic function to compute the absolute value of an input tensor. */ -class NEAbsLayer : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEAbsLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -/** Basic function to compute the round value elementwise of an input tensor. */ -class NERoundLayer : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NERoundLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; +using NERsqrtLayer = NEElementwiseUnaryLayer<ElementWiseUnary::RSQRT>; +using NEExpLayer = NEElementwiseUnaryLayer<ElementWiseUnary::EXP>; +using NENegLayer = NEElementwiseUnaryLayer<ElementWiseUnary::NEG>; +using NELogLayer = NEElementwiseUnaryLayer<ElementWiseUnary::LOG>; +using NEAbsLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ABS>; +using NERoundLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ROUND>; +using NESinLayer = NEElementwiseUnaryLayer<ElementWiseUnary::SIN>; -/** Basic function to compute the sine of an input tensor. */ -class NESinLayer : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NESinLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; } // namespace arm_compute #endif /* ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h deleted file mode 100644 index e81b4ce33a..0000000000 --- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H -#define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H - -#include "arm_compute/runtime/Distribution1D.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/Lut.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -class NEHistogramKernel; -class NECumulativeDistributionKernel; -class NETableLookupKernel; -using IImage = ITensor; - -/** Basic function to execute histogram equalization. This function calls the following NEON kernels: - * - * -# @ref NEHistogramKernel - * -# @ref NECumulativeDistributionKernel - * -# @ref NETableLookupKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEEqualizeHistogram : public IFunction -{ -public: - /** Default Constructor. */ - NEEqualizeHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEqualizeHistogram(const NEEqualizeHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEEqualizeHistogram(NEEqualizeHistogram &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete; - /** Default destructor */ - ~NEEqualizeHistogram(); - /** Initialise the kernel's inputs. - * - * @note Currently the width of the input image must be a multiple of 16. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] output Output image. Data type supported: same as @p input - */ - void configure(const IImage *input, IImage *output); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */ - std::unique_ptr<NECumulativeDistributionKernel> _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution - and creates the relevant LookupTable. */ - std::unique_ptr<NETableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ - Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ - Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ - Lut _cd_lut; /**< Holds the equalization lookuptable. */ - static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ - static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ -}; -} -#endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h deleted file mode 100644 index b81da4e5b6..0000000000 --- a/arm_compute/runtime/NEON/functions/NEErode.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEERODE_H -#define ARM_COMPUTE_NEERODE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute erode. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEErodeKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEErode : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and border mode - * - * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEERODE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h index 4b6cc3fd18..99c6fd4eb4 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT1D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NEFFT1D_H #define ARM_COMPUTE_NEFFT1D_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" @@ -40,7 +39,7 @@ class NEFFTDigitReverseKernel; class NEFFTRadixStageKernel; class NEFFTScaleKernel; -/** Basic function to execute one dimensional FFT. This function calls the following NEON kernels: +/** Basic function to execute one dimensional FFT. This function calls the following kernels: * * -# @ref NEFFTDigitReverseKernel Performs digit reverse * -# @ref NEFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition @@ -63,6 +62,14 @@ public: ~NEFFT1D(); /** Initialise the function's source and destinations. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex. diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h index 18e72c1a2f..cefd3df17a 100644 --- a/arm_compute/runtime/NEON/functions/NEFFT2D.h +++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NEFFT2D_H #define ARM_COMPUTE_NEFFT2D_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/FunctionDescriptors.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEFFT1D.h" #include "arm_compute/runtime/Tensor.h" @@ -36,7 +35,7 @@ namespace arm_compute // Forward declaration class ITensor; -/** Basic function to execute two dimensional FFT. This function calls the following NEON kernels: +/** Basic function to execute two dimensional FFT. This function calls the following kernels: * * -# @ref NEFFT1D 1D FFT is performed on the first given axis * -# @ref NEFFT1D 1D FFT is performed on the second given axis @@ -58,6 +57,14 @@ public: ~NEFFT2D(); /** Initialise the function's source and destinations * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: F32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] config FFT related configuration diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h index 37750e243b..84bfe6b02f 100644 --- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H #define ARM_COMPUTE_NEFFTCONVOLUTIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEFFT2D.h" @@ -43,7 +42,7 @@ namespace arm_compute // Forward declarations class ITensor; -/** Basic function to execute FFT-based convolution on NEON. This function calls the following NEON functions/kernels: +/** Basic function to execute FFT-based convolution on CPU. This function calls the following functions/kernels: * * -# @ref NEPermute Permute input if NHWC(only NCHW is supported). * -# @ref NEPadLayer Pad input. @@ -73,38 +72,58 @@ public: ~NEFFTConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); /** Static function to check if given info will lead to a valid configuration of @ref NEFFTConvolutionLayer * * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input - * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h deleted file mode 100644 index e86a87eb7e..0000000000 --- a/arm_compute/runtime/NEON/functions/NEFastCorners.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFASTCORNERS_H -#define ARM_COMPUTE_NEFASTCORNERS_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NENonMaximaSuppression3x3Kernel; -class NEFastCornersKernel; -class NEFillBorderKernel; -class NEFillArrayKernel; -using IImage = ITensor; - -/** Basic function to execute fast corners. This function call the following NEON kernels: - * - * -# @ref NEFastCornersKernel - * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) - * -# @ref NEFillArrayKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEFastCorners : public IFunction -{ -public: - /** Constructor */ - NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCorners(const NEFastCorners &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCorners &operator=(const NEFastCorners &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEFastCorners(NEFastCorners &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEFastCorners &operator=(NEFastCorners &&) = delete; - /** Default destructor */ - ~NEFastCorners(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. - * @param[out] corners Array of keypoints to store the results. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::unique_ptr<NEFastCornersKernel> _fast_corners_kernel; - std::unique_ptr<NEFillBorderKernel> _border_handler; - std::unique_ptr<NENonMaximaSuppression3x3Kernel> _nonmax_kernel; - std::unique_ptr<NEFillArrayKernel> _fill_kernel; - Image _output; - Image _suppressed; - bool _non_max; -}; -} -#endif /*ARM_COMPUTE_NEFASTCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h index 14d690f419..1829c71fef 100644 --- a/arm_compute/runtime/NEON/functions/NEFill.h +++ b/arm_compute/runtime/NEON/functions/NEFill.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,22 +26,51 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; -/** Basic function to run @ref NEMemsetKernel */ -class NEFill : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuFillKernel */ +class NEFill : public IFunction { public: + /** Default Constructor */ + NEFill(); + /** Default Destructor */ + ~NEFill(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFill(const NEFill &) = delete; + /** Default move constructor */ + NEFill(NEFill &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFill &operator=(const NEFill &) = delete; + /** Default move assignment operator */ + NEFill &operator=(NEFill &&); /** Initialize the function * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in,out] tensor Source tensor. Data types supported: All * @param[in] constant_value Constant value to use to fill tensor. */ void configure(ITensor *tensor, PixelValue constant_value); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_FILL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h index e9a08ef7ec..44b1d4a62b 100644 --- a/arm_compute/runtime/NEON/functions/NEFillBorder.h +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" + #include <memory> namespace arm_compute @@ -39,8 +40,17 @@ class NEFillBorderKernel; class NEFillBorder : public IFunction { public: + NEFillBorder(); /** Initialize the function's source, destination and border_mode. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @note This function fills the borders within the XY-planes. * * @param[in, out] input Source tensor. Data type supported: All @@ -48,7 +58,10 @@ public: * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + void configure(ITensor *input, + unsigned int border_width, + BorderMode border_mode, + const PixelValue &constant_border_value = PixelValue()); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 9f0d5226de..3e92143824 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,9 @@ #define ARM_COMPUTE_NEFLATTENLAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { @@ -33,11 +35,30 @@ class ITensor; class ITensorInfo; /** Basic function to execute flatten layer kernel. */ -class NEFlattenLayer : public INESimpleFunctionNoBorder +class NEFlattenLayer : public IFunction { public: + NEFlattenLayer(); + /** Destructor */ + ~NEFlattenLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayer(const NEFlattenLayer &) = delete; + /** Default move constructor */ + NEFlattenLayer(NEFlattenLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFlattenLayer &operator=(const NEFlattenLayer &) = delete; + /** Default move assignment operator */ + NEFlattenLayer &operator=(NEFlattenLayer &&); /** Initialise the kernel's input and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input @@ -54,6 +75,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h index 7f4248eadb..77ac484bab 100644 --- a/arm_compute/runtime/NEON/functions/NEFloor.h +++ b/arm_compute/runtime/NEON/functions/NEFloor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,21 +24,44 @@ #ifndef ARM_COMPUTE_NEFLOOR_H #define ARM_COMPUTE_NEFLOOR_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { +// Forward declarations class ITensor; class ITensorInfo; -/** Basic function to run @ref NEFloorKernel */ -class NEFloor : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuFloorKernel */ +class NEFloor : public IFunction { public: + /** Constructor */ + NEFloor(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloor(const NEFloor &) = delete; + /** Default move constructor */ + NEFloor(NEFloor &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFloor &operator=(const NEFloor &) = delete; + /** Default move assignment operator */ + NEFloor &operator=(NEFloor &&); + /** Destructor */ + ~NEFloor(); /** Set the source, destination of the kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input Source tensor. Data type supported: F16/F32. * @param[out] output Destination tensor. Same as @p input */ @@ -51,6 +74,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEFLOOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 0a7748a94b..885f8430cf 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,57 +24,20 @@ #ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H #define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H +#include "arm_compute/function_info/FullyConnectedLayerInfo.h" #include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" -#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/Tensor.h" -namespace arm_compute -{ -class NEFlattenLayerKernel; +#include <memory> -/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder +namespace arm_compute { -public: - /** Constructor */ - NEFullyConnectedLayerReshapeWeights() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete; - /** Default destructor */ - ~NEFullyConnectedLayerReshapeWeights() = default; - /** Set the input and output tensors. - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights - * - * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - namespace weights_transformations { -/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */ +/** Basic function to manage the reshape weights generated from @ref NETranspose */ class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights { public: @@ -106,17 +69,17 @@ public: } private: - static constexpr uint32_t _uid = 0x0; - Tensor _output{}; - NEFullyConnectedLayerReshapeWeights _func{}; + static constexpr uint32_t _uid = 0x0; + Tensor _output{}; + NETranspose _func{}; }; } // namespace weights_transformations -/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) - * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) +/** Basic function to compute a Fully Connected layer. This function calls the following kernels: + * -# @ref cpu::kernels::CpuIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref NETranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) + * -# @ref NEGEMM or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) + * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel or @ref NEGEMMLowpOutputStage (if quantized asymmetric) (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -124,7 +87,8 @@ class NEFullyConnectedLayer : public IFunction { public: /** Constructor */ - NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, + IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete; /** Prevent instances of this class from being moved (As this class contains pointers) */ @@ -137,66 +101,77 @@ public: ~NEFullyConnectedLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. + * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. + * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. + * Data type supported: Same as @p input. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: + * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer + * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. + * Data type supported: Same as @p input. + * @param[in] fc_info (Optional) Fully connected layer additional info + * @param[in] weights_info (Optional) Stores neccessary compute information when weights are already reshaped */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), + const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info + * Similar to @ref NEFullyConnectedLayer::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), + const WeightsInfo &weights_info = WeightsInfo()); + + /** Static function that queries whether fixed-format kernel exists for a given problem description + * + * @param[out] expected_weight_format Format in which weights should be for found fixed format kernel + * @param[in] input Source tensor + * @param[in] weights Weights tensor. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor + * @param[in] fc_info Fully connected layer additional info + * @param[in] weights_info Describes weights shape + * + * @return a status + */ + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const FullyConnectedLayerInfo &fc_info, + const WeightsInfo &weights_info); //Inherited methods override void run() override; void prepare() override; private: - void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); - void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); - void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); - - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEFlattenLayerKernel> _flatten_kernel; - NEConvertFullyConnectedWeights _convert_weights; - weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed; - NEFullyConnectedLayerReshapeWeights _reshape_weights_function; - weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - Tensor _flatten_output; - Tensor _converted_weights_output; - Tensor _reshape_weights_output; - const ITensor *_original_weights; - bool _are_weights_converted; - bool _are_weights_reshaped; - bool _is_fc_after_conv; - bool _is_quantized_asymmetric; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h index 5dc804e240..f53b3de7f6 100644 --- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h +++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,16 @@ public: ~NEFuseBatchNormalization(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights @@ -65,9 +75,16 @@ public: * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to Convolution. */ - void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias, - const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + void configure(const ITensor *input_weights, + const ITensor *bn_mean, + const ITensor *bn_var, + ITensor *fused_weights, + ITensor *fused_bias, + const ITensor *input_bias = nullptr, + const ITensor *bn_beta = nullptr, + const ITensor *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalization * * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC @@ -85,10 +102,16 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + static Status validate(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, + const ITensorInfo *bn_beta = nullptr, + const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 645ab56417..29650a5eca 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,38 +24,18 @@ #ifndef ARM_COMPUTE_NEGEMM_H #define ARM_COMPUTE_NEGEMM_H +#include "arm_compute/function_info/GEMMInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { -class NEGEMMInterleave4x4Kernel; -class NEGEMMMatrixAdditionKernel; -class NEGEMMMatrixMultiplyKernel; -class NEGEMMTranspose1xWKernel; -/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: +/** Basic function to execute GEMM. This function calls the following kernels: * - * If optimized assembly is available: - * -# @ref NEGEMMAssemblyDispatch - * -# @ref NEActivationLayer (if alpha != 1.0) - * Else: - * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) - * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) - * -# @ref NEGEMMMatrixMultiplyKernel - * In both cases: - * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) - * Else: - * -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place) - * - * -# @ref NEActivationLayer (if activation is specified in GEMMInfo) + * -# @ref cpu::CpuGemm */ class NEGEMM : public IFunction { @@ -74,9 +54,21 @@ public: ~NEGEMM(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. * + * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around + * * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a @@ -86,49 +78,49 @@ public: * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and * if the reshape of matrix B should happen only for the first run */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + void configure(const ITensor *a, + const ITensor *b, + const ITensor *c, + ITensor *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMM. * - * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32 - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. - * @param[out] output Output tensor info. Data type supported: same as @p a - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of matrix C - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run + * Similar to @ref NEGEMM::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format + * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same + * as in @ref NEGEMM::validate() except that all arguments are required. * * @return a status */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; - std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; - - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _tmp_d; - const ITensor *_original_b; - bool _run_vector_matrix_multiplication; - bool _run_alpha_scale; - bool _run_addition; - bool _run_bias_addition; - bool _run_activation; - bool _reshape_b_only_on_first_run; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h deleted file mode 100644 index 8f9498d0f5..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H - -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/* Convolution method supported by the assembly gemm interface */ -enum class AsmConvMethod -{ - Im2Col, - Indirect, - Conv -}; - -struct AsmGemmInfo -{ - AsmConvMethod method{ AsmConvMethod::Im2Col }; - PadStrideInfo ps_info{}; - ActivationLayerInfo activation_info{}; - GEMMLowpOutputStageInfo output_stage{}; - bool negated_offsets{ true }; - bool reinterpret_input_as_3d{ false }; - bool depth_output_gemm3d{ false }; - int64_t padding_top{ 0 }; - int64_t padding_left{ 0 }; - float padding_value{ 0.f }; -}; - -/** Assembly kernel glue */ -class NEGEMMAssemblyDispatch : public IFunction -{ -public: - /** Constructor */ - NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); - /** Prevent instances of this class from being copy constructed */ - NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; - /** Prevent instances of this class from being copied */ - NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete; - NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default; - NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default; - ~NEGEMMAssemblyDispatch() = default; - - class IFallback - { - public: - virtual void run() = 0; - virtual void prepare() = 0; - virtual bool is_configured() const = 0; - virtual ~IFallback() = default; - }; - -public: - /** If supported create a Compute Library function else fallback to the arm_gemm function. - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations - * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info); - - /** Indicates whether or not this function can be used to process the given parameters. - * - * @param[in] a Input tensor info (Matrix A) - * @param[in] b Input tensor info (Matrix B) - * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations - * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] info GEMM meta-data - * - * @return a status. - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info); - /** Checks if activation is supported by the gemm assembly dispatcher - * - * @param[in] activation Activation to check - * - * @return True if activation is supported else false - */ - static bool is_activation_supported(const ActivationLayerInfo &activation); - /** Was the function successfully configured ? - * - * @return True if the function is configured and ready to run - */ - bool is_configured() const; - - // Inherited methods overridden: - void prepare() override; - void run() override; - -private: - std::unique_ptr<IFallback> _arm_gemm; /** Interface for the arm_gemm fallback */ - MemoryGroup _memory_group; /**< Function memory group */ - IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index 7cae39397f..d1c5a1c9b3 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,21 +27,20 @@ #include "arm_compute/runtime/FunctionDescriptors.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> + namespace arm_compute { // Forward declarations class ITensor; -/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: +class ITensorInfo; + +/** Basic function to compute the convolution layer. This function calls the following kernels/functions: * * Supports only NHWC data layout * - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch * * Weights are transformed from OHWI to HWIO format using the following kernels: @@ -60,8 +59,22 @@ public: NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete; /** Default move assignment operator */ NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default; + /** Destructor */ + ~NEGEMMConv2d(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. @@ -73,7 +86,8 @@ public: * Data types supported: Same as @p input. * @param[in] info Convolution layer descriptor */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info); + void + configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConv2d * * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], @@ -89,20 +103,19 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const Conv2dInfo &info); // Inherited methods overridden: void run() override; void prepare() override; private: - NEGEMMAssemblyDispatch _gemm_asm_func; - NEActivationLayer _activation_func; - NEPermute _weights_permute_func; - const ITensor *_original_weights; - Tensor _permuted_weights; - bool _is_prepared; - bool _run_activation; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 59d83ed68d..3e84c3e2cf 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,147 +24,31 @@ #ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H #define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { class ITensor; -class NECol2ImKernel; -class NEIm2ColKernel; -class NEWeightsReshapeKernel; - -/** Function to reshape the weights. This function calls the following kernel: - * -# @ref NEWeightsReshapeKernel - */ -class NEConvolutionLayerReshapeWeights : public IFunction -{ -public: - /** Constructor */ - NEConvolutionLayerReshapeWeights(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = delete; - /** Default destructor */ - ~NEConvolutionLayerReshapeWeights(); - /** Set the input and output tensors. - * - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: All. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: same as @p weights. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output Destination tensor. Data types supported: same as @p weights. - */ - void configure(const ITensor *weights, const ITensor *biases, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights - * - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: All. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: same as @p weights. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[in] output Destination tensor. Data types supported: same as @p weights. - * - * @return an error status - */ - static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel; -}; - -namespace weights_transformations -{ -/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */ -class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights -{ -public: - /** Constructor */ - NEConvolutionLayerReshapeWeightsTransform() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete; - /** Default destructor */ - ~NEConvolutionLayerReshapeWeightsTransform() = default; - void configure(const ITensor *input, const ITensor *biases) - { - _bias_bit = (biases != nullptr) ? 1 : 0; - _func.configure(input, biases, &_output); - } - - void run() override - { - _output.allocator()->allocate(); - _func.run(); - _reshape_run = true; - } - - ITensor *get_weights() override - { - return &_output; - } - - void release() override - { - _output.allocator()->free(); - } +class ITensorInfo; - uint32_t uid() override - { - return ((0x8) | (_bias_bit << 7)); - } - - bool is_reshape_run() - { - return _reshape_run; - } - -private: - Tensor _output{}; - NEConvolutionLayerReshapeWeights _func{}; - int32_t _bias_bit{ 0 }; -}; -} // namespace weights_transformations - -/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: +/** Basic function to compute the convolution layer. This function calls the following kernels/functions: * - * -# @ref NEIm2ColKernel - * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32) - * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED) - * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED) - * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout) - * -# @ref NECol2ImKernel (if NCHW data layout) + * -# @ref cpu::CpuGemmConv2d * */ class NEGEMMConvolutionLayer : public IFunction { public: /** Constructor */ - NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, + IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete; /** Prevent instances of this class from being moved (As this class contains non movable objects) */ @@ -177,116 +61,154 @@ public: ~NEGEMMConvolutionLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer * - * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported + * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - /** Configures the appropriate matrix multiply routine + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false, + unsigned int num_groups = 1); + + /** Static function to check if there is an optimized version of + * GEMM available for the input parameters. * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) - */ - void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines + * The method is intended to be used to find out the optimal + * memory layout to be used for the weights tensor when running + * variable weights execution. * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] output Output tensor info. Data types supported: Same as @p input, - * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) - * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false) + * The user can query the database of optimised kernels in + * arm_gemm by specifying one of the enumerations of + * arm_compute::WeightFormat in the weight_format field of the input + * parameter weights_info. In case of success, the method + * writes the expected format in the output parameter + * expected_weight_format. The expected_weight_format can than be + * used in the configure method of the class for retrieving the + * best optimal kernel. * - * @return a status - */ - static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - int gemm_3d_depth = 1, bool skip_im2col = false); - /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore + * Use case one - query for a specific format: * - * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. - * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * @param[in] gemm_3d_depth Depth of GEMM 3D - * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout + * WeightInfo weights_info(..., arm_compute::WeightFormat::OHWIo4, ...); // Set the value of the input query. + * if (NEGEMMConvolutionlayer::has_opt_impl(WeightFormat(), ...., weights_info, ...)) + * { + * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>(); + * conv->configure(..., weights_info, ...); // uses the same WeightFormat the user wanted originally, OHWYo4. + * conv->run(...); + * } * - * @return a status + * Use case two - query for any format that would be optimal for the GEMM to execute: + * + * WeightInfo weights_info(..., arm_compute::WeightFormat::ANY, ...); // Set the value of the input query. + * arm_compute::WeightFormat expected_wf; + * if (NEGEMMConvolutionlayer::has_opt_impl(expected_wf, ...., weights_info, ...)) + * { + * auto conv = std::unique_ptr<NEGEMMConvolutionlayer>(); + * // ... code to convert the layout of the weights tensor to the layout returned by has_opt_impl + * WeightInfo new_weights_info(..., expected_wf, ...); // Set the value of the WeightFormat returned by has_opt_impl. + * conv->configure(..., new_weights_info, ...); + * conv->run(...); + * } + * + * Notice that a GEMM configured with a WeightFormat other than + * UNSPECIFIED will run GEMM with variable weights mode. + * + * @param[out] expected_weight_format The arm_compute::WeightFormat expected by the kernel. + * @param[in] src Source tensor info. + * @param[in] weights Weights tensor info. + * @param[in] biases Biases tensor info. Shared biases supported. + * @param[in] dst Destination tensor info. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info (optional) Specifies additional configuration parameters for the weights of the GEMM computation. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. And no activation (i.e. Linear) which is the default value. + * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation + * + * @return a Status */ - static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col); + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U), + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); + // Inherited methods overridden: + void run() override; + void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEConvolutionLayerReshapeWeights _reshape_weights; - weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; - std::unique_ptr<NEIm2ColKernel> _im2col_kernel; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - std::unique_ptr<NECol2ImKernel> _col2im_kernel; - NEReshapeLayer _reshape_layer; - - const ITensor *_original_weights; - - Tensor _im2col_output; - Tensor _weights_reshaped; - Tensor _gemm_output; - Tensor _tmp_output; - - DataLayout _data_layout; - - bool _skip_im2col; - bool _skip_col2im; - bool _is_quantized; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */ +#endif /* ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h deleted file mode 100644 index 7195c71063..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H -#define ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute NEGEMMInterleave4x4Kernel. This function calls the following NEON kernel: - * - * -# @ref NEGEMMInterleave4x4Kernel - * - */ -class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output - * - * @param[in] input First input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index cb1d6bd782..6d07675d3d 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,49 +21,34 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H -#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H -#include "NEActivationLayer.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/function_info/GEMMInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/IWeightsManager.h" #include <memory> namespace arm_compute { class ITensor; -class NEConvertQuantizedSignednessKernel; -class NEConvertQuantizedSignednessKernel; -class NEGEMMInterleave4x4Kernel; -class NEGEMMLowpMatrixMultiplyKernel; -class NEGEMMLowpOffsetContributionKernel; -class NEGEMMLowpOffsetContributionOutputStageKernel; -class NEGEMMLowpMatrixAReductionKernel; -class NEGEMMLowpMatrixBReductionKernel; -class NEGEMMTranspose1xWKernel; +class ITensorInfo; -/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available: +/** Function to run Gemm on quantized types. * - * -# @ref NEGEMMInterleave4x4Kernel - * -# @ref NEGEMMTranspose1xWKernel - * -# @ref NEGEMMLowpMatrixMultiplyKernel - * -# @ref NEGEMMLowpOffsetContributionKernel - * -# @ref NEActivationLayer + * This function calls the following: * - * otherwise if the DOT product instruction is available: - * - * -# @ref NEGEMMLowpOffsetContributionKernel - * -*/ + * -# @ref cpu::CpuGemmLowpMatrixMultiplyCore + */ class NEGEMMLowpMatrixMultiplyCore : public IFunction { public: /** Constructor */ - NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, + IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete; /** Default move constructor */ @@ -76,6 +61,27 @@ public: ~NEGEMMLowpMatrixMultiplyCore(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QASYMM8 |S32 |S32 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8 |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 | + * * @note GEMM_LOWP: low precision GEMM kernel * This kernel performs the following computations: * @@ -83,69 +89,36 @@ public: * -# Convert b values from QASYMM8 to int32 add b_offset to each of them. * -# Compute the matrix product of the resulting a * b in int32. * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise + * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED/F32 otherwise * * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. * @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED + * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32/F32 + * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED/F32 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and * if the reshape of matrix B should be executed only for the first run */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo()); + void configure( + const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should be executed only for the first run + * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure() * * @return a status */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + const GEMMInfo &gemm_info = GEMMInfo()); // Inherited methods overridden void run() override; void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel; - std::unique_ptr<NEGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel; - std::unique_ptr<NEGEMMLowpOffsetContributionKernel> _offset_contribution_kernel; - std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel; - NEActivationLayer _activation_func; - std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_to_signed_asymm; - std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_from_signed_asymm; - - Tensor _vector_sum_col; - Tensor _vector_sum_row; - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _mm_result_s32; - Tensor _signed_a; - Tensor _signed_output; - const ITensor *_original_b; - int32_t _a_offset; - int32_t _b_offset; - - bool _run_vector_matrix_multiplication; - bool _assembly_path; - bool _fused_assembly_path; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - bool _fuse_output_stage; - bool _run_activation; - bool _flip_signedness; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index 6977d27cb6..0d932bb4af 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,9 @@ #define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" -/** This file contains all available output stages for GEMMLowp on NEON. +/** This file contains all available output stages for GEMMLowp. * * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore), * and processes it to obtain the final ASYMM8 value. @@ -39,237 +39,17 @@ namespace arm_compute { class ITensor; class ITensorInfo; - -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters: - * - * result_fixedpoint_multiplier, result_shift, result_offset_after_shift - * - * The final result is: - * - * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68 - * - * In case the bias tensor is provided, the final result is: - * - * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift -*/ -class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete; - /** Default destructor */ - ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(); - /** Initialise the kernel's inputs, output - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint - * - * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters: - * - * result_fixedpoint_multiplier, result_shift, result_offset_after_shift - * - * The final result is: - * - * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68 - * - * In case the bias tensor is provided, the final result is: - * - * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift -*/ -class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete; - /** Default destructor */ - ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(); - /** Initialise the kernel's inputs, output - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint - * - * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; -/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON. - * - * NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters: +/** Basic function to execute GEMMLowpQuantizeDown kernels. * - * result_fixedpoint_multiplier, result_shift + * This function calls the following operators: * - * The final result is: - * - * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68 - * - * In case the bias tensor is provided, the final result is: - * - * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions - * after the result is shifted right by result_shift + * -# @ref cpu::CpuGemmLowpOutputStage */ -class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder +class NEGEMMLowpOutputStage : public IFunction { public: /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete; - /** Default destructor */ - ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(); - /** Initialise the kernel's inputs, output - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(), - int max = std::numeric_limits<int32_t>::max()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint - * - * @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max()); -}; - -/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON. - * - * This function calls the following NEON kernels: - * - * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel - * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel -*/ -class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEGEMMLowpOutputStage() = default; + NEGEMMLowpOutputStage(); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -282,6 +62,16 @@ public: ~NEGEMMLowpOutputStage(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | + * * @param[in] input Input tensor. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. @@ -299,7 +89,17 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *bias, + const ITensorInfo *output, + const GEMMLowpOutputStageInfo &info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h deleted file mode 100644 index 723a638d76..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H -#define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; -class ITensorInfo; - -/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: - * - * -# @ref NEGEMMTranspose1xWKernel - * - */ -class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEGEMMTranspose1xW() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMTranspose1xW(const NEGEMMTranspose1xW &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMTranspose1xW &operator=(const NEGEMMTranspose1xW &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMTranspose1xW(NEGEMMTranspose1xW &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEGEMMTranspose1xW &operator=(NEGEMMTranspose1xW &&) = delete; - /** Default destructor */ - ~NEGEMMTranspose1xW() = default; - /** Initialise the kernel's inputs, output - * - * @param[in] input First input tensor. Data type supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW - * - * @param[in] input First input tensor. Data type supported: All - * @param[in] output Output tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h index a5e0461227..9c7ae0134d 100644 --- a/arm_compute/runtime/NEON/functions/NEGather.h +++ b/arm_compute/runtime/NEON/functions/NEGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,19 +40,26 @@ class NEGather : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable. + * @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor. * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * */ void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * Similar to @ref NEGather::configure() * * @return a status */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h deleted file mode 100644 index 9341c76d85..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN3x3_H -#define ARM_COMPUTE_NEGAUSSIAN3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEGaussian3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's input, output and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEGAUSSIAN3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h deleted file mode 100644 index 51ebee3e8e..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H -#define ARM_COMPUTE_NEGAUSSIAN5x5_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEGaussian5x5HorKernel; -class NEGaussian5x5VertKernel; -class NEFillBorderKernel; - -/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian5x5HorKernel - * -# @ref NEGaussian5x5VertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEGaussian5x5 : public IFunction -{ -public: - /** Default constructor - */ - NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5(const NEGaussian5x5 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete; - /** Allow instances of this class to be moved */ - NEGaussian5x5(NEGaussian5x5 &&) = default; - /** Allow instances of this class to be moved */ - NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default; - /** Default destructor */ - ~NEGaussian5x5(); - /** Initialise the function's input, output and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr<NEGaussian5x5HorKernel> _kernel_hor; /**< kernel for horizontal pass */ - std::unique_ptr<NEGaussian5x5VertKernel> _kernel_vert; /**< kernel for vertical pass */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h deleted file mode 100644 index f5a1272b53..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMID_H -#define ARM_COMPUTE_NEGAUSSIANPYRAMID_H - -#include "arm_compute/core/IPyramid.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEGaussianPyramidHorKernel; -class NEGaussianPyramidVertKernel; -class NEFillBorderKernel; - -/** Common interface for all Gaussian pyramid functions - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEGaussianPyramid : public IFunction -{ -public: - /** Default constructor */ - NEGaussianPyramid(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramid(const NEGaussianPyramid &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramid(NEGaussianPyramid &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default; - /** Default destructor */ - virtual ~NEGaussianPyramid() = default; - - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8. - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0; - -protected: - const ITensor *_input; - IPyramid *_pyramid; - Pyramid _tmp; -}; - -/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussianPyramidHorKernel - * -# @ref NEGaussianPyramidVertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - * - */ -class NEGaussianPyramidHalf : public NEGaussianPyramid -{ -public: - /** Constructor */ - NEGaussianPyramidHalf(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default; - /** Default destructor */ - ~NEGaussianPyramidHalf(); - - // Inherited methods overridden: - void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; - void run() override; - -private: - std::vector<std::unique_ptr<NEFillBorderKernel>> _horizontal_border_handler; - std::vector<std::unique_ptr<NEFillBorderKernel>> _vertical_border_handler; - std::vector<std::unique_ptr<NEGaussianPyramidHorKernel>> _horizontal_reduction; - std::vector<std::unique_ptr<NEGaussianPyramidVertKernel>> _vertical_reduction; -}; - -/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian5x5 - * -# @ref NEScaleKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - * - */ -class NEGaussianPyramidOrb : public NEGaussianPyramid -{ -public: - /** Constructor */ - NEGaussianPyramidOrb(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default; - /** Default destructor */ - ~NEGaussianPyramidOrb(); - - // Inherited methods overridden: - void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; - void run() override; - -private: - std::vector<NEGaussian5x5> _gaus5x5; - std::vector<NEScale> _scale_nearest; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMID_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index 613f0d1c47..0f294fde22 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" -#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPadLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" @@ -41,17 +40,18 @@ namespace arm_compute { class ITensor; +class NEComputeAllAnchorsKernel; /** Basic function to generate proposals for a RPN (Region Proposal Network) * - * This function calls the following Neon kernels: - * -# @ref NEComputeAllAnchors + * This function calls the following Arm(R) Neon(TM) layers/kernels: + * -# @ref NEComputeAllAnchorsKernel * -# @ref NEPermute x 2 * -# @ref NEReshapeLayer x 2 * -# @ref NEBoundingBoxTransform * -# @ref NEPadLayerKernel - * -# @ref NEDequantizationLayerKernel x 2 - * -# @ref NEQuantizationLayerKernel + * -# @ref NEDequantizationLayer x 2 + * -# @ref NEQuantizationLayer * And the following CPP kernels: * -# @ref CPPBoxWithNonMaximaSuppressionLimit */ @@ -72,6 +72,16 @@ public: /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 | + * * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. * Data types supported: QASYMM8/F16/F32 * @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores @@ -85,7 +95,12 @@ public: * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct. * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid. */ - void configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals, + void configure(const ITensor *scores, + const ITensor *deltas, + const ITensor *anchors, + ITensor *proposals, + ITensor *scores_out, + ITensor *num_valid_proposals, const GenerateProposalsInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer @@ -102,7 +117,11 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out, + static Status validate(const ITensorInfo *scores, + const ITensorInfo *deltas, + const ITensorInfo *anchors, + const ITensorInfo *proposals, + const ITensorInfo *scores_out, const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info); @@ -113,17 +132,17 @@ private: // Memory group manager MemoryGroup _memory_group; - // Neon kernels - NEPermute _permute_deltas; - NEReshapeLayer _flatten_deltas; - NEPermute _permute_scores; - NEReshapeLayer _flatten_scores; - NEComputeAllAnchors _compute_anchors; - NEBoundingBoxTransform _bounding_box; - NEPadLayer _pad; - NEDequantizationLayer _dequantize_anchors; - NEDequantizationLayer _dequantize_deltas; - NEQuantizationLayer _quantize_all_proposals; + // kernels/layers + NEPermute _permute_deltas; + NEReshapeLayer _flatten_deltas; + NEPermute _permute_scores; + NEReshapeLayer _flatten_scores; + std::unique_ptr<NEComputeAllAnchorsKernel> _compute_anchors; + NEBoundingBoxTransform _bounding_box; + NEPadLayer _pad; + NEDequantizationLayer _dequantize_anchors; + NEDequantizationLayer _dequantize_deltas; + NEQuantizationLayer _quantize_all_proposals; // CPP functions CPPBoxWithNonMaximaSuppressionLimit _cpp_nms; diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h deleted file mode 100644 index a2d42fedf8..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H -#define ARM_COMPUTE_NEHOGDESCRIPTOR_H - -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class IHOG; -class NEHOGOrientationBinningKernel; -class NEHOGBlockNormalizationKernel; - -/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels: - * - * -# @ref NEHOGGradient - * -# @ref NEHOGOrientationBinningKernel - * -# @ref NEHOGBlockNormalizationKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGDescriptor : public IFunction -{ -public: - /** Default constructor */ - NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDescriptor(const NEHOGDescriptor &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDescriptor(NEHOGDescriptor &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete; - /** Default destructor */ - ~NEHOGDescriptor(); - /** Initialise the function's source, destination, HOG data-object and border mode - * - * @param[in, out] input Input tensor. Data type supported: U8 - * (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block - * @param[in] hog HOG data object which describes the HOG descriptor - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEHOGGradient _gradient; - std::unique_ptr<NEHOGOrientationBinningKernel> _orient_bin; - std::unique_ptr<NEHOGBlockNormalizationKernel> _block_norm; - Tensor _mag; - Tensor _phase; - Tensor _hog_space; -}; -} - -#endif /* ARM_COMPUTE_NEHOGDESCRIPTOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h deleted file mode 100644 index 644851ee92..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDETECTOR_H -#define ARM_COMPUTE_NEHOGDETECTOR_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; -/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel: - * - * -# @ref NEHOGDetectorKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGDetector : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEHOGDetector() = default; - /** Prevent instances of this class from being copied */ - NEHOGDetector(const NEHOGDetector &) = delete; - /** Default move constructor */ - NEHOGDetector(NEHOGDetector &&) = default; - /** Prevent instances of this class from being copied */ - NEHOGDetector &operator=(const NEHOGDetector &) = delete; - /** Default move assignment operator */ - NEHOGDetector &operator=(NEHOGDetector &&) = default; - /** Destructor */ - ~NEHOGDetector(); - /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class - * - * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. - * - * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 - * @param[in] hog HOG data-object that describes the HOG descriptor - * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the block stride stored in hog - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEHOGDETECTOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h deleted file mode 100644 index 426bc4b23c..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGGRADIENT_H -#define ARM_COMPUTE_NEHOGGRADIENT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class ICPPKernel; - -/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels: - * - * -# @ref NEDerivative - * -# NEMagnitudePhaseKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGGradient : public IFunction -{ -public: - /** Default constructor */ - NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGGradient(const NEHOGGradient &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGGradient &operator=(const NEHOGGradient &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGGradient(NEHOGGradient &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGGradient &operator=(NEHOGGradient &&) = delete; - /** Default destructor */ - ~NEHOGGradient(); - /** Initialise the function's source, destinations, phase type and border mode - * - * @param[in, out] input Input tensor. Data type supported: U8. - * (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. - * @param[out] output_phase Output tensor.(phase). Format supported: U8 - * @param[in] phase_type Type of @ref PhaseType - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEDerivative _derivative; - std::unique_ptr<ICPPKernel> _mag_phase; - Tensor _gx; - Tensor _gy; -}; -} -#endif /*ARM_COMPUTE_NEHOGGRADIENT_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h deleted file mode 100644 index f370dd29ab..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGMULTIDETECTION_H -#define ARM_COMPUTE_NEHOGMULTIDETECTION_H - -#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IMultiHOG.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class NEHOGOrientationBinningKernel; -class NEHOGBlockNormalizationKernel; - -/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels: - * - * -# @ref NEHOGGradient - * -# @ref NEHOGOrientationBinningKernel - * -# @ref NEHOGBlockNormalizationKernel - * -# @ref NEHOGDetector - * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) - * - * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: - * -# Phase type - -# Normalization type - -# L2 hysteresis threshold if the normalization type is L2HYS_NORM - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGMultiDetection : public IFunction -{ -public: - /** Default constructor */ - NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NEHOGMultiDetection(NEHOGMultiDetection &&) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = delete; - /** Default destructor */ - ~NEHOGMultiDetection(); - /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression - * - * @param[in, out] input Input tensor. Data type supported: U8 - * (Written to only for @p border_mode != UNDEFINED) - * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. - * This container should store the HOG data-objects in descending or ascending cell_size width order. - * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects - * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects - * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object - * The dimension of this array must be the same of multi_hog->num_models() - * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. - * True if the non-maxima suppression stage has to be computed - * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage - * - */ - void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, - uint8_t constant_border_value = 0, - float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEHOGGradient _gradient_kernel; - std::vector<NEHOGOrientationBinningKernel> _orient_bin_kernel; - std::vector<NEHOGBlockNormalizationKernel> _block_norm_kernel; - std::vector<NEHOGDetector> _hog_detect_kernel; - CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel; - std::vector<Tensor> _hog_space; - std::vector<Tensor> _hog_norm_space; - IDetectionWindowArray *_detection_windows; - Tensor _mag; - Tensor _phase; - bool _non_maxima_suppression; - size_t _num_orient_bin_kernel; - size_t _num_block_norm_kernel; - size_t _num_hog_detect_kernel; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGMULTIDETECTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h deleted file mode 100644 index 477b843aee..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHARRISCORNERS_H -#define ARM_COMPUTE_NEHARRISCORNERS_H - -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEFillBorderKernel; -class INEHarrisScoreKernel; -using IImage = ITensor; - -/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions: - * - * -# @ref NESobel3x3 (if gradient_size == 3) or<br/> - * @ref NESobel5x5 (if gradient_size == 5) or<br/> - * @ref NESobel7x7 (if gradient_size == 7) - * -# @ref NEFillBorderKernel - * -# NEHarrisScoreKernel<3> (if block_size == 3) or<br/> - * NEHarrisScoreKernel<5> (if block_size == 5) or<br/> - * NEHarrisScoreKernel<7> (if block_size == 7) - * -# @ref NENonMaximaSuppression3x3 - * -# @ref CPPCornerCandidatesKernel - * -# @ref CPPSortEuclideanDistanceKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHarrisCorners : public IFunction -{ -public: - /** Constructor - * - * Initialize _sobel, _harris_score and _corner_list to nullptr. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHarrisCorners(const NEHarrisCorners &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHarrisCorners &operator=(const NEHarrisCorners &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHarrisCorners(NEHarrisCorners &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHarrisCorners &operator=(NEHarrisCorners &&) = delete; - /** Default destructor */ - ~NEHarrisCorners(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation - * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 - * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. - * @param[out] corners Array of keypoints to store the results. - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(IImage *input, float threshold, float min_dist, float sensitivity, - int32_t gradient_size, int32_t block_size, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr<IFunction> _sobel; /**< Sobel function */ - std::unique_ptr<INEHarrisScoreKernel> _harris_score; /**< Harris score kernel */ - NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ - CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ - CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ - std::unique_ptr<NEFillBorderKernel> _border_gx; /**< Border handler before running harris score */ - std::unique_ptr<NEFillBorderKernel> _border_gy; /**< Border handler before running harris score */ - Image _gx; /**< Source image - Gx component */ - Image _gy; /**< Source image - Gy component */ - Image _score; /**< Source image - Harris score */ - Image _nonmax; /**< Source image - Non-Maxima suppressed image */ - std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ - int32_t _num_corner_candidates; /**< Number of potential corner candidates */ -}; -} -#endif /*ARM_COMPUTE_NEHARRISCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h deleted file mode 100644 index d922ef1214..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHistogram.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHISTOGRAM_H -#define ARM_COMPUTE_NEHISTOGRAM_H - -#include "arm_compute/runtime/IFunction.h" - -#include <cstddef> -#include <cstdint> -#include <memory> -#include <vector> - -namespace arm_compute -{ -class ITensor; -class IDistribution1D; -class NEHistogramKernel; -using IImage = ITensor; - -/** Basic function to run @ref NEHistogramKernel. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHistogram : public IFunction -{ -public: - /** Default Constructor. */ - NEHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogram(const NEHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogram &operator=(const NEHistogram &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogram(NEHistogram &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogram &operator=(NEHistogram &&) = delete; - /** Default destructor */ - ~NEHistogram(); - /** Initialise the kernel's inputs. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] output Output distribution. - */ - void configure(const IImage *input, IDistribution1D *output); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEHistogramKernel> _histogram_kernel; - std::vector<uint32_t> _local_hist; - std::vector<uint32_t> _window_lut; - size_t _local_hist_size; - /** 256 possible pixel values as we handle only U8 images */ - static constexpr unsigned int window_lut_default_size = 256; -}; -} -#endif /*ARM_COMPUTE_NEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h deleted file mode 100644 index 2f023f44fe..0000000000 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEIM2COL_H -#define ARM_COMPUTE_NEIM2COL_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Types.h" -#include <memory> - -namespace arm_compute -{ -// Forward declarations -class ITensor; -class ITensorInfo; -class NEIm2ColKernel; - -/** Basic function to run @ref NEIm2ColKernel */ -class NEIm2Col : public IFunction -{ -public: - /** Default constructor */ - NEIm2Col(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2Col(const NEIm2Col &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2Col &operator=(const NEIm2Col &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIm2Col(NEIm2Col &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIm2Col &operator=(NEIm2Col &&) = delete; - /** Default destructor */ - ~NEIm2Col(); - /** Configure the im2col NEON kernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEIm2ColKernel> _kernel; - unsigned int _y_dim; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEIM2COL_H */ diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h index 57165c94b4..0bc57be09e 100644 --- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,16 @@ public: ~NEInstanceNormalizationLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. * Data types supported: F16/F32. Data layout supported: NHWC, NCHW * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. @@ -79,7 +89,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + float gamma = 1.0f, + float beta = 0.0f, + float epsilon = 1e-12f); // Inherited methods overridden: void run() override; @@ -93,5 +107,5 @@ private: Tensor _permuted_input; Tensor _permuted_output; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h deleted file mode 100644 index 31c0ec9ebe..0000000000 --- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINTEGRALIMAGE_H -#define ARM_COMPUTE_NEINTEGRALIMAGE_H - -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run a @ref NEIntegralImageKernel - * -* @deprecated This function is deprecated and is intended to be removed in 21.05 release -* -*/ -class NEIntegralImage : public INESimpleFunction -{ -public: - /** Constructor */ - NEIntegralImage() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIntegralImage(const NEIntegralImage &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIntegralImage &operator=(const NEIntegralImage &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIntegralImage(NEIntegralImage &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIntegralImage &operator=(NEIntegralImage &&) = delete; - /** Default destructor */ - ~NEIntegralImage(); - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U32. - */ - void configure(const ITensor *input, ITensor *output); -}; -} -#endif /*ARM_COMPUTE_NEINTEGRALIMAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h index 173b9d2141..8502cee5d2 100644 --- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,16 @@ public: ~NEL2NormalizeLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 @@ -87,5 +97,5 @@ private: std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel; Tensor _sumsq; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index ef8defb827..629c5d10a0 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,8 @@ #define ARM_COMPUTE_NELSTMLAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/common/LSTMParams.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" @@ -35,7 +36,6 @@ #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/runtime/common/LSTMParams.h" namespace arm_compute { @@ -60,6 +60,15 @@ public: ~NELSTMLayer(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src13 | dst0 - dst3 | + * |:------------|:------------| + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. @@ -95,13 +104,26 @@ public: * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. * If set to 0.0 then clipping is disabled. */ - void configure(const ITensor *input, - const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - const ITensor *output_state_in, const ITensor *cell_state_in, - ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, - const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + void configure(const ITensor *input, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + const ITensor *output_state_in, + const ITensor *cell_state_in, + ITensor *scratch_buffer, + ITensor *output_state_out, + ITensor *cell_state_out, + ITensor *output, + const LSTMParams<ITensor> &lstm_params, + const ActivationLayerInfo &activation_info, + float cell_threshold = 0.f, + float projection_threshold = 0.f); /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer * @@ -142,13 +164,26 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, - const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, - const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); + static Status validate(const ITensorInfo *input, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_in, + const ITensorInfo *scratch_buffer, + const ITensorInfo *output_state_out, + const ITensorInfo *cell_state_out, + const ITensorInfo *output, + const LSTMParams<ITensorInfo> &lstm_params, + const ActivationLayerInfo &activation_info, + float cell_threshold = 0.f, + float projection_threshold = 0.f); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h index a354a4df7b..ae951669b3 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" @@ -38,8 +39,6 @@ #include "arm_compute/runtime/NEON/functions/NESlice.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "arm_compute/runtime/common/LSTMParams.h" - namespace arm_compute { // Forward declarations @@ -47,10 +46,10 @@ class ITensor; /** Basic function to run @ref NELSTMLayerQuantized * - * This function calls the following NEON functions/kernels: + * This function calls the following functions/kernels: * * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 + * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 * -# @ref NETranspose Matrix transpose * -# @ref NEConcatenateLayer Tensor concatenation * -# @ref NEActivationLayer Activation functions (tanh and logistic) @@ -77,6 +76,14 @@ public: ~NELSTMLayerQuantized(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | + * |:-----------|:------------|:-------|:------|:------|:------| + * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. @@ -96,11 +103,22 @@ public: * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. */ void configure(const ITensor *input, - const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - ITensor *cell_state_in, const ITensor *output_state_in, - ITensor *cell_state_out, ITensor *output_state_out); + const ITensor *input_to_input_weights, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_input_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *input_gate_bias, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + ITensor *cell_state_in, + const ITensor *output_state_in, + ITensor *cell_state_out, + ITensor *output_state_out); /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer * @@ -125,11 +143,22 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); + const ITensorInfo *input_to_input_weights, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_input_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *input_gate_bias, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out); // Inherited methods overridden: void run() override; @@ -139,30 +168,30 @@ private: MemoryGroup _memory_group; // Functions used - NEGEMMLowpMatrixMultiplyCore _gemmlowp; - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; - NETranspose _transpose_weights; - NEConcatenateLayer _concat_input_weights; - NEConcatenateLayer _concat_recurrent_weights; - NEConcatenateLayer _concat_weights; - NEConcatenateLayer _concat_inputs; - NEConcatenateLayer _concat_bias; - NEActivationLayer _sigmoid_forget_gate; - NEActivationLayer _sigmoid_input_gate; - NEActivationLayer _sigmoid_output_gate; - NEActivationLayer _tanh_modulation_gate; - NEActivationLayer _tanh_output_state; - NEArithmeticAddition _add1; - NEArithmeticAddition _add2; - NEPixelWiseMultiplication _mul1; - NEPixelWiseMultiplication _mul2; - NEPixelWiseMultiplication _mul3; - NESlice _slice_input_tensor; - NESlice _slice_forget_tensor; - NESlice _slice_cell_tensor; - NESlice _slice_output_tensor; - NEDequantizationLayer _dequantize; - NEQuantizationLayer _quantize; + NEGEMMLowpMatrixMultiplyCore _gemmlowp; + NEGEMMLowpOutputStage _output_stage; + NETranspose _transpose_weights; + NEConcatenateLayer _concat_input_weights; + NEConcatenateLayer _concat_recurrent_weights; + NEConcatenateLayer _concat_weights; + NEConcatenateLayer _concat_inputs; + NEConcatenateLayer _concat_bias; + NEActivationLayer _sigmoid_forget_gate; + NEActivationLayer _sigmoid_input_gate; + NEActivationLayer _sigmoid_output_gate; + NEActivationLayer _tanh_modulation_gate; + NEActivationLayer _tanh_output_state; + NEArithmeticAddition _add1; + NEArithmeticAddition _add2; + NEPixelWiseMultiplication _mul1; + NEPixelWiseMultiplication _mul2; + NEPixelWiseMultiplication _mul3; + NESlice _slice_input_tensor; + NESlice _slice_forget_tensor; + NESlice _slice_cell_tensor; + NESlice _slice_output_tensor; + NEDequantizationLayer _dequantize; + NEQuantizationLayer _quantize; // Tensor pointers const ITensor *_input_to_input_weights; diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h deleted file mode 100644 index 9ca30141a6..0000000000 --- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELAPLACIANPYRAMID_H -#define ARM_COMPUTE_NELAPLACIANPYRAMID_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" -#include "arm_compute/runtime/Pyramid.h" - -#include <cstddef> -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions: - * - * -# @ref NEGaussianPyramidHalf - * -# @ref NEGaussian5x5 - * -# @ref NEArithmeticSubtraction - * - * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then - * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. - * L(i) = I(i) - Gaussian5x5(I(i)) - * Level 0 has always the same first two dimensions as the input tensor. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NELaplacianPyramid : public IFunction -{ -public: - /** Constructor */ - NELaplacianPyramid(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianPyramid(const NELaplacianPyramid &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianPyramid(NELaplacianPyramid &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete; - /** Default destructor */ - ~NELaplacianPyramid(); - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16. - * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16. - * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: - * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run() override; - -private: - size_t _num_levels; - NEGaussianPyramidHalf _gaussian_pyr_function; - std::vector<NEGaussian5x5> _convf; - std::vector<NEArithmeticSubtraction> _subf; - Pyramid _gauss_pyr; - Pyramid _conv_pyr; - NEDepthConvertLayer _depth_function; -}; -} -#endif /*ARM_COMPUTE_NELAPLACIANPYRAMID_H */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h deleted file mode 100644 index 8e0a3efff0..0000000000 --- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H -#define ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Pyramid.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions: - * - * -# @ref NEArithmeticAddition - * -# @ref NEScale - * -# @ref NEDepthConvertLayer - * - * This function reconstructs the original image from a Laplacian Image Pyramid. - * - * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the - * resolution of the next pyramid level. - * - * I(n-2) = upsample( input + L(n-1) - * - * For each pyramid level i, except i=0 and i=n-1: - * I(i-1) = upsample(I(i) + L(i)) - * - * output = I(0) + L(0) - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NELaplacianReconstruct : public IFunction -{ -public: - /** Constructor */ - NELaplacianReconstruct(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianReconstruct(const NELaplacianReconstruct &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianReconstruct(NELaplacianReconstruct &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete; - /** Default destructor */ - ~NELaplacianReconstruct(); - /** Initialise the function's source, destinations and border mode. - * - * The Output image must have the same size as the first level of the pyramid. - * The Input image must have the same size as the last level of the pyramid. - * - * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. - * - * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16. - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run() override; - -private: - Pyramid _tmp_pyr; - std::vector<NEArithmeticAddition> _addf; - std::vector<NEScale> _scalef; - NEDepthConvertLayer _depthf; -}; -} -#endif /*ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H */ diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h deleted file mode 100644 index 86e6300130..0000000000 --- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H -#define ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NECol2Im.h" -#include "arm_compute/runtime/NEON/functions/NEIm2Col.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class INETensor; -class NEWeightsReshapeKernel; -class NELocallyConnectedMatrixMultiplyKernel; - -/** Basic function to compute the locally connected layer. This function calls the following NEON kernels: - * - * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) - * -# @ref NEIm2ColKernel - * -# @ref NELocallyConnectedMatrixMultiplyKernel - * -# @ref NECol2ImKernel - */ -class NELocallyConnectedLayer : public IFunction -{ -public: - /** Default constructor */ - NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedLayer(const NELocallyConnectedLayer &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NELocallyConnectedLayer(NELocallyConnectedLayer &&) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = delete; - /** Default destructor */ - ~NELocallyConnectedLayer(); - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16, F32. - * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - ARM_COMPUTE_DEPRECATED_REL(20.11) - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedLayer - * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16, F32. - * @param[in] weights Weights tensor info. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. - * @param[in] output Output tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEIm2Col _input_im2col; - std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel; - std::unique_ptr<NELocallyConnectedMatrixMultiplyKernel> _mm_kernel; - NECol2Im _output_col2im; - Tensor _input_im2col_reshaped; - Tensor _weights_reshaped; - Tensor _gemm_output; - bool _is_prepared; - const ITensor *_original_weights; -}; -} -#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELogical.h b/arm_compute/runtime/NEON/functions/NELogical.h index 04ffce6221..0ad23200c6 100644 --- a/arm_compute/runtime/NEON/functions/NELogical.h +++ b/arm_compute/runtime/NEON/functions/NELogical.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/Macros.h" #include <memory> @@ -42,12 +41,27 @@ class NELogicalAnd : public IFunction public: /** Constructor */ NELogicalAnd(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalAnd(const NELogicalAnd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalAnd(NELogicalAnd &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalAnd &operator=(const NELogicalAnd &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalAnd &operator=(NELogicalAnd &&) = delete; /** Destructor */ ~NELogicalAnd(); - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalAnd) /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. @@ -77,12 +91,27 @@ class NELogicalOr : public IFunction public: /** Constructor */ NELogicalOr(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalOr(const NELogicalOr &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalOr(NELogicalOr &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalOr &operator=(const NELogicalOr &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalOr &operator=(NELogicalOr &&) = delete; /** Destructor */ ~NELogicalOr(); - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalOr) /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:------------| + * |U8 |U8 |U8 | + * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. @@ -112,12 +141,27 @@ class NELogicalNot : public IFunction public: /** Constructor */ NELogicalNot(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalNot(const NELogicalNot &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalNot(NELogicalNot &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogicalNot &operator=(const NELogicalNot &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NELogicalNot &operator=(NELogicalNot &&) = delete; /** Destructor */ ~NELogicalNot(); - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE_INC(NELogicalNot) /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------| + * |U8 |U8 | + * * @param[in] input Input tensor. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h deleted file mode 100644 index e100de2e08..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMagnitude.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAGNITUDE_H -#define ARM_COMPUTE_NEMAGNITUDE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEMagnitudePhaseKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEMagnitude : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEMagnitude() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitude(const NEMagnitude &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitude &operator=(const NEMagnitude &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMagnitude(NEMagnitude &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMagnitude &operator=(NEMagnitude &&) = delete; - /** Default destructor */ - ~NEMagnitude(); - /** Initialise the kernel's inputs. - * - * @param[in] input1 First tensor input. Data type supported: S16. - * @param[in] input2 Second tensor input. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: S16. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMAGNITUDE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h new file mode 100644 index 0000000000..58dd7a6f6b --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMatMul.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +/** Settings for MatMul Cpu implementation*/ +class CpuMatMulSettings +{ +public: + // get fast math flag + bool fast_math() const + { + return _fast_math; + } + // get fixed format flag + bool fixed_format() const + { + return _fixed_format; + } + // Set fast math flag + CpuMatMulSettings &fast_math(bool fmath) + { + _fast_math = fmath; + return *this; + } + // Set fixed format flag + CpuMatMulSettings &fixed_format(bool fixed_format) + { + _fixed_format = fixed_format; + return *this; + } + +private: + bool _fast_math{false}; + bool _fixed_format{false}; +}; + +// Forward declarations +class ITensor; +class ITensorInfo; +class MatMulInfo; +class Status; + +/** Basic function to run the following operators: + * + * -# @ref cpu::CpuMatMul + */ +class NEMatMul : public IFunction +{ +public: + /** Constructor */ + NEMatMul(); + /** Destructor */ + ~NEMatMul(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMatMul(const NEMatMul &) = delete; + /** Default move constructor */ + NEMatMul(NEMatMul &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMatMul &operator=(const NEMatMul &) = delete; + /** Default move assignment operator */ + NEMatMul &operator=(NEMatMul &&) = default; + /** Initialize + * + * Valid data layouts: + * - Any + * + * Valid data type configurations: + * |lhs |rhs |dst | + * |:--------------|:------------------|:--------------| + * |F32 |F32 |F32 | + * |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QASYMM8 |QASYMM8 |QASYMM8 | + * + * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8. + * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs. + * @param[out] dst Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs. + * @param[in] info Contains MatMul operation information described in @ref MatMulInfo. + * @param[in] settings Contains flags for function level settings i.e fast math + * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions. + */ + void configure(ITensor *lhs, + ITensor *rhs, + ITensor *dst, + const MatMulInfo &info, + const CpuMatMulSettings &settings, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul + * + * @param[in] lhs Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8. + * @param[in] rhs Right-hand side tensor info. Data types supported: same as @p lhs. + * @param[out] dst Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs. + * @param[in] info Contains MatMul operation information described in @ref MatMulInfo. + * @param[in] settings Contains flags for function level settings i.e fast math + * @param[in] act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions. + * + * @return Status + */ + static Status validate(const ITensorInfo *lhs, + const ITensorInfo *rhs, + const ITensorInfo *dst, + const MatMulInfo &info, + const CpuMatMulSettings &settings, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; +}; +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h index 5b5bb5cb78..e00fc4544f 100644 --- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,19 +26,18 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" + #include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -class NEMemsetKernel; -class NEMaxUnpoolingLayerKernel; +class NEFill; -/** Function to perform MaxUnpooling. This function calls the following NEON kernels: +/** Function to perform MaxUnpooling. This function calls the following kernels: * - * -# @ref NEMemsetKernel - * -# @ref NEMaxUnpoolingLayerKernel + * -# @ref NEFill */ class NEMaxUnpoolingLayer : public IFunction { @@ -57,6 +56,18 @@ public: ~NEMaxUnpoolingLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note Only supported pool size 2 * * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -76,14 +87,18 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *indices, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info); // Inherited methods overridden: void run() override; private: - std::unique_ptr<NEMemsetKernel> _memset_kernel; - std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel; + std::unique_ptr<NEFill> _fill_func; + struct Impl; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h deleted file mode 100644 index 875c3630c1..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEV_H -#define ARM_COMPUTE_NEMEANSTDDEV_H - -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include <memory> - -#include <cstdint> - -namespace arm_compute -{ -class NEMeanStdDevKernel; -class NEFillBorderKernel; - -/** Basic function to execute mean and std deviation. This function calls the following NEON kernels: - * - * @ref NEMeanStdDevKernel - * - */ -class NEMeanStdDev : public IFunction -{ -public: - /** Default Constructor. */ - NEMeanStdDev(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDev(const NEMeanStdDev &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDev &operator=(const NEMeanStdDev &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDev(NEMeanStdDev &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDev &operator=(NEMeanStdDev &&) = delete; - /** Default destructor */ - ~NEMeanStdDev(); - /** Initialise the kernel's inputs and outputs. - * - * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) - * @param[out] mean Output average pixel value. - * @param[out] stddev (Optional) Output standard deviation of pixel values. - */ - void configure(IImage *input, float *mean, float *stddev = nullptr); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ - std::unique_ptr<NEFillBorderKernel> _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */ - uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ - uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ -}; -} -#endif /*ARM_COMPUTE_NEMEANSTDDEV_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h index 31e376191c..41aa81946b 100644 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,16 @@ public: ~NEMeanStdDevNormalizationLayer(); /** Initialise the function's input and outputs. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | + * * @note If the output tensor is a nullptr, the normalization will be performed in-place. * * @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h deleted file mode 100644 index 7e1ec905c6..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEDIAN3x3_H -#define ARM_COMPUTE_NEMEDIAN3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute median filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEMedian3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEMedian3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEMEDIAN3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h deleted file mode 100644 index 312d1cb668..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMINMAXLOCATION_H -#define ARM_COMPUTE_NEMINMAXLOCATION_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NEMinMaxKernel; -class NEMinMaxLocationKernel; -using IImage = ITensor; - -/** Basic function to execute min and max location. This function calls the following NEON kernels: - * - * -# NEMinMaxKernel - * -# NEMinMaxLocationKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEMinMaxLocation : public IFunction -{ -public: - /** Constructor */ - NEMinMaxLocation(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocation(const NEMinMaxLocation &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLocation(NEMinMaxLocation &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete; - /** Default destructor */ - ~NEMinMaxLocation(); - /** Initialise the kernel's inputs and outputs. - * - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_loc (Optional) Array of minimum value locations. - * @param[out] max_loc (Optional) Array of maximum value locations. - * @param[out] min_count (Optional) Number of minimum value encounters. - * @param[out] max_count (Optional) Number of maximum value encounters. - */ - void configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, - uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr<NEMinMaxKernel> _min_max; /**< Kernel that performs min/max */ - std::unique_ptr<NEMinMaxLocationKernel> _min_max_loc; /**< Kernel that extracts min/max locations */ -}; -} -#endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h deleted file mode 100644 index 8642350736..0000000000 --- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONLINEARFILTER_H -#define ARM_COMPUTE_NENONLINEARFILTER_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute non linear filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NENonLinearFilterKernel - * - * @note Supported mask dimensions squares of sizes 3, 5 - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NENonLinearFilter : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, - uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NENONLINEARFILTER_H */ diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h deleted file mode 100644 index 5b71d52e3e..0000000000 --- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H -#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NENonMaximaSuppression3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NENonMaximaSuppression3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT - * The constant values used with CONSTANT border mode is 0 - * - * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input - * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT - * - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode); -}; -} -#endif /* ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H */ diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h index 6519f9b4e6..27e3fa674e 100644 --- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NENORMALIZATIONLAYER_H #define ARM_COMPUTE_NENORMALIZATIONLAYER_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" @@ -39,7 +38,7 @@ namespace arm_compute class ITensor; class NENormalizationLayerKernel; -/** Basic function to compute a normalization layer. This function calls the following NEON kernels: +/** Basic function to compute a normalization layer. This function calls the following kernels: * * -# @ref NEPixelWiseMultiplication * -# @ref NEFillBorderKernel @@ -63,6 +62,16 @@ public: ~NENormalizationLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | + * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], * and an optional 4th dimension for batch of inputs. Data type supported: F16/F32. Data layouts supported: NCHW/NHWC. * @param[out] output Destination with the same dimensions, data type, data layout and number of channels of @p input @@ -78,16 +87,17 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info); // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */ - NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */ - Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ + MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */ + NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */ + Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h deleted file mode 100644 index d1624ec68a..0000000000 --- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEOPTICALFLOW_H -#define ARM_COMPUTE_NEOPTICALFLOW_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstddef> -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class Pyramid; -class NELKTrackerKernel; - -/** Array of LK Internel Keypoints */ -using LKInternalKeypointArray = Array<NELKInternalKeypoint>; -/** Basic function to execute optical flow. This function calls the following NEON kernels and functions: - * - * -# @ref NEScharr3x3 - * -# @ref NELKTrackerKernel - * - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEOpticalFlow : public IFunction -{ -public: - /** Constructor - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEOpticalFlow(const NEOpticalFlow &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; - /** Default destructor */ - ~NEOpticalFlow(); - /** Initialise the function input and output - * - * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 - * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8 - * @param[in] old_points Pointer to the IKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points - * @param[out] new_points Pointer to the IKeyPointArray storing new key points - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] border_mode The border mode applied at scharr kernel stage - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT - * - */ - void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, - IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, - bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::vector<NEScharr3x3> _func_scharr; - std::vector<std::unique_ptr<NELKTrackerKernel>> _kernel_tracker; - std::vector<Tensor> _scharr_gx; - std::vector<Tensor> _scharr_gy; - IKeyPointArray *_new_points; - const IKeyPointArray *_new_points_estimates; - const IKeyPointArray *_old_points; - LKInternalKeypointArray _new_points_internal; - LKInternalKeypointArray _old_points_internal; - unsigned int _num_levels; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEOPTICALFLOW_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h index 358e633000..81d5fd162c 100644 --- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,42 +26,15 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/INEOperator.h" + +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -namespace experimental -{ -/** Basic function to run @ref NEArithmeticOperationKernel for PRELU - * - * @note The function implements an activation layer with the PRELU activation function. - */ -class NEPRelu : public INEOperator -{ -public: - /** Set the input and output tensor. - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] alpha Source alpha tensor info. Data types supported: same of @p input. - * @param[out] output Destination tensor info. Data type supported: same as @p input - */ - void configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] alpha Source alpha tensor info. Data types supported: same of @p input. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output); -}; -} // namespace experimental - -/** Basic function to run @ref NEArithmeticOperationKernel for PRELU +/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for PRELU * * @note The function implements an activation layer with the PRELU activation function. */ @@ -82,6 +55,17 @@ public: NEPReluLayer &operator=(NEPReluLayer &&); /** Set the input and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] alpha Source alpha tensor. Data types supported: same of @p input. * @param[out] output Destination tensor. Data type supported: same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h index 3fdbb0d73c..494b1c0641 100644 --- a/arm_compute/runtime/NEON/functions/NEPadLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,26 +24,26 @@ #ifndef ARM_COMPUTE_NEPADLAYER_H #define ARM_COMPUTE_NEPADLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/runtime/SubTensor.h" - -#include "arm_compute/core/Types.h" #include "arm_compute/runtime/Tensor.h" + #include <memory> namespace arm_compute { -class NECopyKernel; class NEPadLayerKernel; -/** Basic function to pad a tensor. This function calls the following NEON functions/kernels: +/** Basic function to pad a tensor. This function calls the following functions/kernels: * * - For padding mode = PaddingMode::CONSTANT: * -# @ref NEPadLayerKernel * - Otherwise: - * -# @ref NECopyKernel + * -# @ref NECopy * -# @ref NEStridedSlice * -# @ref NEConcatenateLayer * @@ -65,6 +65,15 @@ public: ~NEPadLayer(); /** Initialize the function * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |All |All | + * * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] @@ -73,7 +82,11 @@ public: * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ - void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + void configure(ITensor *input, + ITensor *output, + const PaddingList &padding, + const PixelValue constant_value = PixelValue(), + const PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. * * @param[in] input Source tensor info. Data types supported: All. @@ -86,7 +99,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const PaddingList &padding, + const PixelValue constant_value = PixelValue(), + const PaddingMode mode = PaddingMode::CONSTANT); // Inherited methods overridden: void run() override; @@ -100,7 +117,10 @@ private: * specifies the front and the end padding in the i-th dimension. * @param[in] constant_value Constant value to be used for the padding */ - void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value); + void configure_constant_mode(ITensor *input, + ITensor *output, + const PaddingList &padding, + const PixelValue constant_value); /** Configure functions for when reflect or symmetric padding is used. * * @param[in] input Source tensor. Data types supported: All. @@ -109,7 +129,7 @@ private: void configure_reflect_symmetric_mode(ITensor *input, ITensor *output); private: - std::unique_ptr<NECopyKernel> _copy_kernel; + NECopy _copy_function; std::unique_ptr<NEPadLayerKernel> _pad_kernel; PaddingMode _mode; PaddingList _padding; diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h index ef8854b360..2cef64764d 100644 --- a/arm_compute/runtime/NEON/functions/NEPermute.h +++ b/arm_compute/runtime/NEON/functions/NEPermute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,10 @@ #ifndef ARM_COMPUTE_NEPERMUTE_H #define ARM_COMPUTE_NEPERMUTE_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { @@ -34,11 +35,32 @@ namespace arm_compute class ITensor; class ITensorInfo; -/** Basic function to run @ref NEPermuteKernel */ -class NEPermute : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuPermuteKernel */ +class NEPermute : public IFunction { public: - /** Configure the permute NEON kernel + /** Default Constructor */ + NEPermute(); + /** Default Destructor */ + ~NEPermute(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermute(const NEPermute &) = delete; + /** Default move constructor */ + NEPermute(NEPermute &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPermute &operator=(const NEPermute &) = delete; + /** Default move assignment operator */ + NEPermute &operator=(NEPermute &&) = default; + /** Configure the permute function + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * @@ -58,6 +80,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEPERMUTE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h deleted file mode 100644 index 1202f1878d..0000000000 --- a/arm_compute/runtime/NEON/functions/NEPhase.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPHASE_H -#define ARM_COMPUTE_NEPHASE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NEMagnitudePhaseKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEPhase : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in] input1 First tensor input. Data type supported: S16. - * @param[in] input2 Second tensor input. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPHASE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index 91cf44ff2e..3d81bf6087 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,115 +24,19 @@ #ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H #define ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H +#include "arm_compute/core/Rounding.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/INEOperator.h" + +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -namespace experimental -{ -/** Basic function to run @ref NEPixelWiseMultiplicationKernel */ -class NEPixelWiseMultiplication : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output and convertion policy. - * - * Valid configurations (Input1,Input2) -> Output : - * - * Support: Broadcast? Scale=1/255? - * - (U8,U8) -> U8, S16 N Y - * - (U8,S16) -> S16 N Y - * - (S16,U8) -> S16 N Y - * - (S16,S16) -> S16 N Y - * - (S32,S32) -> S32 Y N - * - (F16,F16) -> F16 N Y - * - (F32,F32) -> F32 Y Y - * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y - * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y - * - * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. - * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. - * - * @param[in, out] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32 - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype - * @param[in] rounding_policy Rounding policy. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - */ - void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication - * - * Valid configurations (Input1,Input2) -> Output : - * - * Support: Broadcast? Scale=1/255? - * - (U8,U8) -> U8, S16 N Y - * - (U8,S16) -> S16 N Y - * - (S16,U8) -> S16 N Y - * - (S16,S16) -> S16 N Y - * - (S32,S32) -> S32 Y N - * - (F16,F16) -> F16 N Y - * - (F32,F32) -> F32 Y Y - * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y - * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y - * - * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. - * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. - * - * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32 - * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32/S32 - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255 - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype - * @param[in] rounding_policy Rounding policy. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref NEComplexPixelWiseMultiplicationKernel. */ -class NEComplexPixelWiseMultiplication : public INEOperator -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - */ - void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication - * - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace experimental - -/** Basic function to run @ref NEPixelWiseMultiplicationKernel */ +/** Basic function to run @ref cpu::CpuMul */ class NEPixelWiseMultiplication : public IFunction { public: @@ -143,13 +47,31 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NEPixelWiseMultiplication(const NEPixelWiseMultiplication &) = delete; /** Default move constructor */ - NEPixelWiseMultiplication(NEPixelWiseMultiplication &&); + NEPixelWiseMultiplication(NEPixelWiseMultiplication &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEPixelWiseMultiplication &operator=(const NEPixelWiseMultiplication &) = delete; /** Default move assignment operator */ - NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&); + NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default; /** Initialise the kernel's inputs, output and convertion policy. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM8 |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |QSYMM16 |QSYMM16 |QASYMM16 | + * |QSYMM16 |QSYMM16 |S32 | + * |U8 |U8 |U8 | + * |U8 |U8 |S16 | + * |U8 |S16 |S16 | + * |S16 |U8 |S16 | + * |S16 |S16 |S16 | + * |F16 |F16 |F16 | + * |F32 |S32 |F32 | + * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * @@ -173,7 +95,12 @@ public: * @param[in] rounding_policy Rounding policy. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, + void configure(const ITensor *input1, + const ITensor *input2, + ITensor *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication * @@ -200,7 +127,12 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: @@ -211,7 +143,7 @@ private: std::unique_ptr<Impl> _impl; }; -/** Basic function to run @ref NEComplexPixelWiseMultiplicationKernel. */ +/** Basic function to run @ref cpu::CpuComplexMul. */ class NEComplexPixelWiseMultiplication : public IFunction { public: @@ -222,11 +154,11 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NEComplexPixelWiseMultiplication(const NEComplexPixelWiseMultiplication &) = delete; /** Default move constructor */ - NEComplexPixelWiseMultiplication(NEComplexPixelWiseMultiplication &&); + NEComplexPixelWiseMultiplication(NEComplexPixelWiseMultiplication &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEComplexPixelWiseMultiplication &operator=(const NEComplexPixelWiseMultiplication &) = delete; /** Default move assignment operator */ - NEComplexPixelWiseMultiplication &operator=(NEComplexPixelWiseMultiplication &&); + NEComplexPixelWiseMultiplication &operator=(NEComplexPixelWiseMultiplication &&) = default; /** Initialise the kernel's inputs, output. * * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). @@ -236,7 +168,10 @@ public: * @param[out] output The output tensor. Data types supported: same as @p input1. Number of channels: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + void configure(ITensor *input1, + ITensor *input2, + ITensor *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplication * * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). @@ -244,7 +179,10 @@ public: * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; @@ -253,5 +191,5 @@ private: struct Impl; std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h index 168845d203..09251f2a5f 100644 --- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPooling3dLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,65 +21,76 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H -#define ARM_COMPUTE_NEUPSAMPLELAYER_H +#ifndef ARM_COMPUTE_NEPOOLING3DLAYER_H +#define ARM_COMPUTE_NEPOOLING3DLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { +// Forward declarations class ITensor; -class NEUpsampleLayerKernel; - -/** Function to run upsample layer */ -class NEUpsampleLayer : public IFunction +class ITensorInfo; +class IMemoryManager; +/** Basic function to simulate a pooling 3d layer with the specified pooling operation. This function calls the following kernels: + * + * -# @ref cpu::CpuPool3d + */ +class NEPooling3dLayer : public IFunction { public: /** Constructor */ - NEUpsampleLayer(); + NEPooling3dLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEUpsampleLayer(const NEUpsampleLayer &) = delete; + NEPooling3dLayer(const NEPooling3dLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEUpsampleLayer &operator=(const NEUpsampleLayer &) = delete; + NEPooling3dLayer &operator=(const NEPooling3dLayer &) = delete; /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEUpsampleLayer(NEUpsampleLayer &&) = delete; + NEPooling3dLayer(NEPooling3dLayer &&) = delete; /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEUpsampleLayer &operator=(NEUpsampleLayer &&) = delete; + NEPooling3dLayer &operator=(NEPooling3dLayer &&) = delete; /** Default destructor */ - ~NEUpsampleLayer(); - /** Set the input output tensors. + ~NEPooling3dLayer(); + /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. + * Valid data layouts: + * - NDHWC * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * + * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise + * + * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED. + * @param[out] output Destination tensor. + * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo. */ - void configure(const ITensor *input, ITensor *output, const Size2D &info, - const InterpolationPolicy &policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayer + void configure(const ITensor *input, ITensor *output, const Pooling3dLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEPooling3dLayer + * * - * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[out] output Destination tensor info. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. + * @param[in] input Source tensor info. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED. + * @param[in] output Destination tensor info. + * @param[in] pool_info Contains pooling operation information described in @ref Pooling3dLayerInfo. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, - const InterpolationPolicy &policy); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Pooling3dLayerInfo &pool_info); // Inherited methods overridden: void run() override; private: - std::unique_ptr<NEUpsampleLayerKernel> _kernel; - DataLayout _data_layout; + struct Impl; + std::unique_ptr<Impl> _impl; }; -} // arm_compute -#endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */ +} // namespace arm_compute +#endif /* ARM_COMPUTE_NEPOOLING3DLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index b45290fb46..768ad0d818 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,28 +24,28 @@ #ifndef ARM_COMPUTE_NEPOOLINGLAYER_H #define ARM_COMPUTE_NEPOOLINGLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/core/Types.h" #include <memory> namespace arm_compute { +// Forward declarations class ITensor; class ITensorInfo; -class NEPoolingLayerKernel; -class NEFillBorderKernel; -/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels: +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels: * - * -# @ref NEFillBorderKernel (executed if padding size is different from zero) - * -# @ref NEPoolingLayerKernel + * -# @ref cpu::CpuPool2d */ class NEPoolingLayer : public IFunction { public: /** Constructor */ - NEPoolingLayer(); + NEPoolingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEPoolingLayer(const NEPoolingLayer &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -58,7 +58,21 @@ public: ~NEPoolingLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note F16 is supported for pool sizes 2 and 3 only + * @note Source tensor is padded with -inf for MAX pooling and 0 otherwise + * Cases where pooling region is completely outside input tensor are only supported for floating point data type * * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. @@ -77,16 +91,17 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info, + const ITensorInfo *indices = nullptr); // Inherited methods overridden: void run() override; private: - std::unique_ptr<NEPoolingLayerKernel> _pooling_layer_kernel; - std::unique_ptr<NEFillBorderKernel> _border_handler; - bool _is_global_pooling_layer; - DataLayout _data_layout; + struct Impl; + std::unique_ptr<Impl> _impl; }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h index 3cc79fa28e..858e3299af 100644 --- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,15 @@ class NEPriorBoxLayer : public INESimpleFunctionNoBorder public: /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------|:--------|:--------| + * |F32 |F32 |F32 | + * * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input @@ -53,7 +62,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEPRIORBOXLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index fcabc1d0c4..009a4e0911 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,17 +25,18 @@ #define ARM_COMPUTE_NEQLSTMLAYER_H #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/common/LSTMParams.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" #include "arm_compute/runtime/NEON/functions/NECopy.h" +#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "support/MemorySupport.h" -#include "arm_compute/runtime/common/LSTMParams.h" #include <memory> namespace arm_compute @@ -44,20 +45,25 @@ namespace arm_compute class ITensor; class ITensorInfo; class NEQLSTMLayerNormalizationKernel; -class NEGEMMLowpMatrixAReductionKernel; - +namespace cpu +{ +namespace kernels +{ +class CpuGemmLowpMatrixAReductionKernel; +} // namespace kernels +} // namespace cpu /** Basic function to run @ref NEQLSTMLayer * - * This function calls the following NEON functions/kernels: + * This function calls the following kernels: * * -# @ref NEActivationLayer Activation functions (tanh and logistic) - * -# @ref NEArithmeticAddition Elementwise addition - * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction - * -# @ref NECopyKernel Copy kernel for copying output_state_out to output + * -# @ref NEArithmeticAddition Elementwise addition + * -# @ref NEArithmeticSubtraction Elementwise subtraction + * -# @ref NECopy Copy kernel for copying output_state_out to output * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers - * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 - * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use - * -# @ref NEPixelWiseMultiplication Elementwise multiplication + * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 + * -# @ref cpu::kernels::CpuGemmLowpMatrixAReductionKernel For precomputing effective biases to use + * -# @ref NEPixelWiseMultiplication Elementwise multiplication * -# @ref NETranspose Transpose function for reshaping the weights * */ class NEQLSTMLayer : public IFunction @@ -77,6 +83,14 @@ public: ~NEQLSTMLayer(); /** Initialize function's tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 | + * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------| + * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED | + * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. @@ -116,12 +130,21 @@ public: * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. */ - void configure(const ITensor *input, - const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, - const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, - const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, - const ITensor *cell_state_in, ITensor *output_state_in, - ITensor *cell_state_out, ITensor *output_state_out, ITensor *output, + void configure(const ITensor *input, + const ITensor *input_to_forget_weights, + const ITensor *input_to_cell_weights, + const ITensor *input_to_output_weights, + const ITensor *recurrent_to_forget_weights, + const ITensor *recurrent_to_cell_weights, + const ITensor *recurrent_to_output_weights, + const ITensor *forget_gate_bias, + const ITensor *cell_bias, + const ITensor *output_gate_bias, + const ITensor *cell_state_in, + ITensor *output_state_in, + ITensor *cell_state_out, + ITensor *output_state_out, + ITensor *output, const LSTMParams<ITensor> &lstm_params); /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer @@ -166,12 +189,21 @@ public: * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. * @return a status */ - static Status validate(const ITensorInfo *input, - const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, - const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, - const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, - const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, - const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, + static Status validate(const ITensorInfo *input, + const ITensorInfo *input_to_forget_weights, + const ITensorInfo *input_to_cell_weights, + const ITensorInfo *input_to_output_weights, + const ITensorInfo *recurrent_to_forget_weights, + const ITensorInfo *recurrent_to_cell_weights, + const ITensorInfo *recurrent_to_output_weights, + const ITensorInfo *forget_gate_bias, + const ITensorInfo *cell_bias, + const ITensorInfo *output_gate_bias, + const ITensorInfo *cell_state_in, + const ITensorInfo *output_state_in, + const ITensorInfo *cell_state_out, + const ITensorInfo *output_state_out, + const ITensorInfo *output, const LSTMParams<ITensorInfo> &lstm_params); // Inherited methods overridden: @@ -204,10 +236,17 @@ private: * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor. * */ - void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info, - const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias, Tensor *mm_res, - Tensor *outstage_res, float gemmlowp_scale, - const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info); + void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, + NEGEMMLowpOutputStage &outstage, + GEMMLowpOutputStageInfo &gemmlowp_info, + const ITensor *mm_input, + const ITensor *mm_weights, + const ITensor *bias, + Tensor *mm_res, + Tensor *outstage_res, + float gemmlowp_scale, + const TensorInfo &mm_res_info, + const TensorInfo &outstage_tensor_info); MemoryGroup _memory_group; @@ -216,8 +255,8 @@ private: { static constexpr uint32_t max_dimension_supported = 2; - ITensor *_src{ nullptr }; - ITensor *_dst{ nullptr }; + ITensor *_src{nullptr}; + ITensor *_dst{nullptr}; size_t _row_size{}; Window _window{}; @@ -243,70 +282,73 @@ private: }; // Functions used - NETranspose _transpose_input_to_forget_weights; - NETranspose _transpose_input_to_cell_weights; - NETranspose _transpose_input_to_output_weights; - NETranspose _transpose_input_to_input_weights; - NETranspose _transpose_recurrent_to_forget_weights; - NETranspose _transpose_recurrent_to_cell_weights; - NETranspose _transpose_recurrent_to_output_weights; - NETranspose _transpose_recurrent_to_input_weights; - NETranspose _transpose_projection_weights; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction; - std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction; - NEArithmeticAddition _projection_bias_add; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget; - NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget; - NEGEMMLowpOutputStage _input_to_forget_outstage; - NEGEMMLowpOutputStage _recurrent_to_forget_outstage; - NEGEMMLowpOutputStage _cell_to_forget_outstage; - NEArithmeticAddition _accumulate_input_recurrent_forget; - NEArithmeticAddition _accumulate_cell_forget; - NEActivationLayer _forget_gate_sigmoid; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell; - NEGEMMLowpOutputStage _input_to_cell_outstage; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell; - NEGEMMLowpOutputStage _recurrent_to_cell_outstage; - NEArithmeticAddition _accumulate_input_recurrent_modulation; - NEActivationLayer _cell_gate_tanh; - NEArithmeticSubtraction _input_gate_sub; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_input; - NEGEMMLowpOutputStage _input_to_input_outstage; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input; - NEGEMMLowpOutputStage _recurrent_to_input_outstage; - NEArithmeticAddition _accumulate_input_recurrent_input; - NEPixelWiseMultiplication _pixelwise_mul_cell_to_input; - NEGEMMLowpOutputStage _cell_to_input_outstage; - NEArithmeticAddition _accumulate_cell_input; - NEActivationLayer _input_gate_sigmoid; - NEPixelWiseMultiplication _pixelwise_mul_forget_cell; - NEPixelWiseMultiplication _pixelwise_mul_input_cell; - NEArithmeticAddition _add_forget_cell; - NEActivationLayer _cell_clip; - NEGEMMLowpMatrixMultiplyCore _mm_input_to_output; - NEGEMMLowpOutputStage _input_to_output_outstage; - NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output; - NEGEMMLowpOutputStage _recurrent_to_output_outstage; - NEArithmeticAddition _accumulate_input_recurrent_output; - NEPixelWiseMultiplication _pixelwise_mul_cell_to_output; - NEGEMMLowpOutputStage _cell_to_output_outstage; - NEArithmeticAddition _accumulate_cell_to_output; - NEActivationLayer _output_gate_sigmoid; - NEActivationLayer _hidden_tanh; - NEPixelWiseMultiplication _pixelwise_mul_hidden; - NEGEMMLowpOutputStage _hidden_outstage; - NEGEMMLowpMatrixMultiplyCore _mm_projection; - NEGEMMLowpOutputStage _projection_outstage; - NEArithmeticAddition _accumulate_projection; - NEActivationLayer _projection_clip; + + NEDequantizationLayer _dequantize_input_to_forget_weights; + NEQuantizationLayer _quantize_input_to_forget_weights; + NETranspose _transpose_input_to_forget_weights; + NETranspose _transpose_input_to_cell_weights; + NETranspose _transpose_input_to_output_weights; + NETranspose _transpose_input_to_input_weights; + NETranspose _transpose_recurrent_to_forget_weights; + NETranspose _transpose_recurrent_to_cell_weights; + NETranspose _transpose_recurrent_to_output_weights; + NETranspose _transpose_recurrent_to_input_weights; + NETranspose _transpose_projection_weights; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_input_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_forget_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_cell_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _input_to_output_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction; + std::unique_ptr<cpu::kernels::CpuGemmLowpMatrixAReductionKernel> _projection_reduction; + NEArithmeticAddition _projection_bias_add; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget; + NEGEMMLowpOutputStage _input_to_forget_outstage; + NEGEMMLowpOutputStage _recurrent_to_forget_outstage; + NEGEMMLowpOutputStage _cell_to_forget_outstage; + NEArithmeticAddition _accumulate_input_recurrent_forget; + NEArithmeticAddition _accumulate_cell_forget; + NEActivationLayer _forget_gate_sigmoid; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell; + NEGEMMLowpOutputStage _input_to_cell_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell; + NEGEMMLowpOutputStage _recurrent_to_cell_outstage; + NEArithmeticAddition _accumulate_input_recurrent_modulation; + NEActivationLayer _cell_gate_tanh; + NEArithmeticSubtraction _input_gate_sub; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_input; + NEGEMMLowpOutputStage _input_to_input_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input; + NEGEMMLowpOutputStage _recurrent_to_input_outstage; + NEArithmeticAddition _accumulate_input_recurrent_input; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_input; + NEGEMMLowpOutputStage _cell_to_input_outstage; + NEArithmeticAddition _accumulate_cell_input; + NEActivationLayer _input_gate_sigmoid; + NEPixelWiseMultiplication _pixelwise_mul_forget_cell; + NEPixelWiseMultiplication _pixelwise_mul_input_cell; + NEArithmeticAddition _add_forget_cell; + NEActivationLayer _cell_clip; + NEGEMMLowpMatrixMultiplyCore _mm_input_to_output; + NEGEMMLowpOutputStage _input_to_output_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output; + NEGEMMLowpOutputStage _recurrent_to_output_outstage; + NEArithmeticAddition _accumulate_input_recurrent_output; + NEPixelWiseMultiplication _pixelwise_mul_cell_to_output; + NEGEMMLowpOutputStage _cell_to_output_outstage; + NEArithmeticAddition _accumulate_cell_to_output; + NEActivationLayer _output_gate_sigmoid; + NEActivationLayer _hidden_tanh; + NEPixelWiseMultiplication _pixelwise_mul_hidden; + NEGEMMLowpOutputStage _hidden_outstage; + NEGEMMLowpMatrixMultiplyCore _mm_projection; + NEGEMMLowpOutputStage _projection_outstage; + NEArithmeticAddition _accumulate_projection; + NEActivationLayer _projection_clip; TensorCopyKernel _projection_bias_copy; TensorCopyKernel _projection_output_to_accumulate_copy; @@ -318,19 +360,16 @@ private: NECopy _copy_output; // Tensor pointers - const ITensor *_input_to_input_weights - { - nullptr - }; - const ITensor *_recurrent_to_input_weights{ nullptr }; - const ITensor *_projection_bias{ nullptr }; - const ITensor *_input_to_forget_weights{ nullptr }; - const ITensor *_input_to_cell_weights{ nullptr }; - const ITensor *_input_to_output_weights{ nullptr }; - const ITensor *_recurrent_to_forget_weights{ nullptr }; - const ITensor *_recurrent_to_cell_weights{ nullptr }; - const ITensor *_recurrent_to_output_weights{ nullptr }; - const ITensor *_projection_weights{ nullptr }; + const ITensor *_input_to_input_weights{nullptr}; + const ITensor *_recurrent_to_input_weights{nullptr}; + const ITensor *_projection_bias{nullptr}; + const ITensor *_input_to_forget_weights{nullptr}; + const ITensor *_input_to_cell_weights{nullptr}; + const ITensor *_input_to_output_weights{nullptr}; + const ITensor *_recurrent_to_forget_weights{nullptr}; + const ITensor *_recurrent_to_cell_weights{nullptr}; + const ITensor *_recurrent_to_output_weights{nullptr}; + const ITensor *_projection_weights{nullptr}; std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{}; std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{}; @@ -365,63 +404,66 @@ private: return _layer_norms[getGateIndex(g)]; } - void configure_layer_norm(LayerNormGate g, const ITensor *in); + void configure_layer_norm(LayerNormGate g, const ITensor *in); static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias); // Temporary tensors - Tensor _input_to_forget_weights_transposed{ nullptr }; - Tensor _input_to_cell_weights_transposed{ nullptr }; - Tensor _input_to_output_weights_transposed{ nullptr }; - Tensor _input_to_input_weights_transposed{ nullptr }; - Tensor _recurrent_to_forget_weights_transposed{ nullptr }; - Tensor _recurrent_to_cell_weights_transposed{ nullptr }; - Tensor _recurrent_to_output_weights_transposed{ nullptr }; - Tensor _recurrent_to_input_weights_transposed{ nullptr }; - Tensor _projection_weights_transposed{ nullptr }; - Tensor _input_to_input_eff_bias{ nullptr }; - Tensor _recurrent_to_input_eff_bias{ nullptr }; - Tensor _input_to_forget_eff_bias{ nullptr }; - Tensor _recurrent_to_forget_eff_bias{ nullptr }; - Tensor _input_to_cell_eff_bias{ nullptr }; - Tensor _recurrent_to_cell_eff_bias{ nullptr }; - Tensor _input_to_output_eff_bias{ nullptr }; - Tensor _recurrent_to_output_eff_bias{ nullptr }; - Tensor _projection_reduction_res{ nullptr }; - Tensor _projection_eff_bias{ nullptr }; - Tensor _mm_input_to_forget_res{ nullptr }; - Tensor _mm_recurrent_to_forget_res{ nullptr }; - Tensor _mul_cell_to_forget_res{ nullptr }; - Tensor _input_to_forget_outstage_res{ nullptr }; - Tensor _cell_to_forget_outstage_res{ nullptr }; - Tensor _recurrent_to_forget_outstage_res{ nullptr }; - Tensor _forget_gate{ nullptr }; - Tensor _mm_input_to_cell_res{ nullptr }; - Tensor _input_to_cell_outstage_res{ nullptr }; - Tensor _mm_recurrent_to_cell_res{ nullptr }; - Tensor _recurrent_to_cell_outstage_res{ nullptr }; - Tensor _cell_gate{ nullptr }; - Tensor _mul_input_cell_res{ nullptr }; - Tensor _mm_input_to_input_res{ nullptr }; - Tensor _input_to_input_outstage_res{ nullptr }; - Tensor _mm_recurrent_to_input_res{ nullptr }; - Tensor _mul_cell_to_input_res{ nullptr }; - Tensor _cell_to_input_outstage_res{ nullptr }; - Tensor _recurrent_to_input_outstage_res{ nullptr }; - Tensor _input_gate{ nullptr }; - Tensor _mm_input_to_output_res{ nullptr }; - Tensor _input_to_output_outstage_res{ nullptr }; - Tensor _mm_recurrent_to_output_res{ nullptr }; - Tensor _mul_cell_to_output_res{ nullptr }; - Tensor _cell_to_output_outstage_res{ nullptr }; - Tensor _recurrent_to_output_outstage_res{ nullptr }; - Tensor _output_gate{ nullptr }; - Tensor _hidden_mul_res{ nullptr }; - Tensor _hidden_gate{ nullptr }; - Tensor _mm_projection_res{ nullptr }; - Tensor _projection_outstage_res{ nullptr }; - Tensor _projection_out_res{ nullptr }; - Tensor _projection_accumulate_res{ nullptr }; - Tensor _ones{ nullptr }; + Tensor _input_to_forget_weights_f32{nullptr}; + Tensor _input_to_forget_weights_symm8{nullptr}; + + Tensor _input_to_forget_weights_transposed{nullptr}; + Tensor _input_to_cell_weights_transposed{nullptr}; + Tensor _input_to_output_weights_transposed{nullptr}; + Tensor _input_to_input_weights_transposed{nullptr}; + Tensor _recurrent_to_forget_weights_transposed{nullptr}; + Tensor _recurrent_to_cell_weights_transposed{nullptr}; + Tensor _recurrent_to_output_weights_transposed{nullptr}; + Tensor _recurrent_to_input_weights_transposed{nullptr}; + Tensor _projection_weights_transposed{nullptr}; + Tensor _input_to_input_eff_bias{nullptr}; + Tensor _recurrent_to_input_eff_bias{nullptr}; + Tensor _input_to_forget_eff_bias{nullptr}; + Tensor _recurrent_to_forget_eff_bias{nullptr}; + Tensor _input_to_cell_eff_bias{nullptr}; + Tensor _recurrent_to_cell_eff_bias{nullptr}; + Tensor _input_to_output_eff_bias{nullptr}; + Tensor _recurrent_to_output_eff_bias{nullptr}; + Tensor _projection_reduction_res{nullptr}; + Tensor _projection_eff_bias{nullptr}; + Tensor _mm_input_to_forget_res{nullptr}; + Tensor _mm_recurrent_to_forget_res{nullptr}; + Tensor _mul_cell_to_forget_res{nullptr}; + Tensor _input_to_forget_outstage_res{nullptr}; + Tensor _cell_to_forget_outstage_res{nullptr}; + Tensor _recurrent_to_forget_outstage_res{nullptr}; + Tensor _forget_gate{nullptr}; + Tensor _mm_input_to_cell_res{nullptr}; + Tensor _input_to_cell_outstage_res{nullptr}; + Tensor _mm_recurrent_to_cell_res{nullptr}; + Tensor _recurrent_to_cell_outstage_res{nullptr}; + Tensor _cell_gate{nullptr}; + Tensor _mul_input_cell_res{nullptr}; + Tensor _mm_input_to_input_res{nullptr}; + Tensor _input_to_input_outstage_res{nullptr}; + Tensor _mm_recurrent_to_input_res{nullptr}; + Tensor _mul_cell_to_input_res{nullptr}; + Tensor _cell_to_input_outstage_res{nullptr}; + Tensor _recurrent_to_input_outstage_res{nullptr}; + Tensor _input_gate{nullptr}; + Tensor _mm_input_to_output_res{nullptr}; + Tensor _input_to_output_outstage_res{nullptr}; + Tensor _mm_recurrent_to_output_res{nullptr}; + Tensor _mul_cell_to_output_res{nullptr}; + Tensor _cell_to_output_outstage_res{nullptr}; + Tensor _recurrent_to_output_outstage_res{nullptr}; + Tensor _output_gate{nullptr}; + Tensor _hidden_mul_res{nullptr}; + Tensor _hidden_gate{nullptr}; + Tensor _mm_projection_res{nullptr}; + Tensor _projection_outstage_res{nullptr}; + Tensor _projection_out_res{nullptr}; + Tensor _projection_accumulate_res{nullptr}; + Tensor _ones{nullptr}; std::array<Tensor, _layer_norm_count> _layer_norm_output{}; inline Tensor &get_layer_norm_output(LayerNormGate g) @@ -429,14 +471,15 @@ private: return _layer_norm_output[getGateIndex(g)]; } - bool _is_prepared{ false }; - bool _has_cifg{ false }; - bool _has_cell_clipping{ false }; - bool _has_projection{ false }; - bool _has_projection_clipping{ false }; - bool _has_peephole{ false }; - bool _has_layer_norm{ false }; - bool _projection_tensor_copy_required{ false }; + bool _is_prepared{false}; + bool _has_cifg{false}; + bool _has_cell_clipping{false}; + bool _has_projection{false}; + bool _has_projection_clipping{false}; + bool _has_peephole{false}; + bool _has_layer_norm{false}; + bool _projection_tensor_copy_required{false}; + bool _convert_input_to_forget_weights_to_qsymm8{false}; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEQLSTMLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index 36302f4741..7bf97e28a5 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,28 +24,45 @@ #ifndef ARM_COMPUTE_NEQUANTIZATIONLAYER_H #define ARM_COMPUTE_NEQUANTIZATIONLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IRuntimeContext.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" +#include <memory> namespace arm_compute { class ITensor; class ITensorInfo; -/** Basic function to simulate a quantization layer. This function calls the following NEON kernels: - * - * - * -# @ref NEQuantizationLayerKernel - * - */ -class NEQuantizationLayer : public INESimpleFunctionNoBorder +/** Basic function to run a quantization layer using @ref cpu::CpuQuantize */ +class NEQuantizationLayer : public IFunction { public: + NEQuantizationLayer(); + /** Default Destructor */ + ~NEQuantizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayer(const NEQuantizationLayer &) = delete; + /** Default move constructor */ + NEQuantizationLayer(NEQuantizationLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationLayer &operator=(const NEQuantizationLayer &) = delete; + /** Default move assignment operator */ + NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default; /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------------------|:--------------------------------------| + * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16 */ @@ -58,6 +75,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEQUANTIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index c42b303a89..af7f464ac9 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NECopy.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -34,7 +35,6 @@ namespace arm_compute { // Forward declarations class ITensor; -class NECopyKernel; /** Basic function to run @ref NERNNLayer */ class NERNNLayer : public IFunction @@ -54,6 +54,16 @@ public: ~NERNNLayer(); /** Initialize the function * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |src3 |dst0 |dst1 | + * |:------|:------|:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 |F32 |F32 | + * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input @@ -62,7 +72,13 @@ public: * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input * @param[in] info Activation layer parameter. */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, const ITensor *bias, ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *recurrent_weights, + const ITensor *bias, + ITensor *hidden_state, + ITensor *output, + ActivationLayerInfo &info); /** Initialize the function * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 @@ -75,7 +91,12 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state, const ITensorInfo *output, + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *recurrent_weights, + const ITensorInfo *bias, + const ITensorInfo *hidden_state, + const ITensorInfo *output, const ActivationLayerInfo &info); // Inherited methods overridden: @@ -83,16 +104,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEGEMM _gemm_state_f; - NEArithmeticAddition _add_f; - NEActivationLayer _activation; - NEFullyConnectedLayer _fully_connected; - std::unique_ptr<NECopyKernel> _copy_kernel; - Tensor _fully_connected_out; - Tensor _gemm_output; - Tensor _add_output; - bool _is_prepared; + MemoryGroup _memory_group; + NEGEMM _gemm_state_f; + NEArithmeticAddition _add_f; + NEActivationLayer _activation; + NEFullyConnectedLayer _fully_connected; + NECopy _copy_f; + Tensor _fully_connected_out; + Tensor _gemm_output; + Tensor _add_output; + bool _is_prepared; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NERNNLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h index ea3be18932..b06ebe899d 100644 --- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,21 +32,27 @@ namespace arm_compute class ITensor; class ITensorInfo; -/** Basic function to run @ref NEROIAlignLayerKernel. - * - * This function calls the following NEON kernels: - * -# @ref NEROIAlignLayerKernel - * - */ +/** Basic function to run @ref NEROIAlignLayerKernel. */ class NEROIAlignLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * |QASYMM8 |QASYMM16 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED | + * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. * @@ -59,7 +65,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, * otherwise same as @p input * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. @@ -71,7 +77,10 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *rois, + ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info); }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEROIALIGNLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h index 0b9b4f75fc..929111ad4b 100644 --- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,23 +24,19 @@ #ifndef ARM_COMPUTE_NEROIPOOLINGLAYER_H #define ARM_COMPUTE_NEROIPOOLINGLAYER_H +#include "arm_compute/core/IArray.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/IArray.h" #include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; class NEROIPoolingLayerKernel; class ROIPoolingLayerInfo; -/** Basic function to run @ref NEROIPoolingLayerKernel. - * - * This function calls the following NEON kernels: - * -# @ref NEROIPoolingLayerKernel - * - */ +/** Basic function to run @ref NEROIPoolingLayerKernel. */ class NEROIPoolingLayer : public IFunction { public: @@ -58,7 +54,16 @@ public: ~NEROIPoolingLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F32 |U16 |F32 | + * |QASYMM8 |U16 |QASYMM8 | + * + * @param[in] input Source tensor. Data types supported: QASYMM8/F32 * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 * @param[out] output Destination tensor. Data types supported: Same as @p input. @@ -69,11 +74,30 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); + void + configure(const ITensor *input, const ITensor *rois, const ITensor *output, const ROIPoolingLayerInfo &pool_info); // Inherited methods overridden: void run() override; + /** Static function to check if given info will lead to a valid configuration of @ref NEROIPoolingLayerKernel + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/F32. + * @param[in] rois TensorInfo for rois tensor which is a 2D tensor of size [5,N] (where 5 is the number ROIs). Data types supported: U16 + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. + * + * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled + * width and pooled height. + * @note The z dimensions of @p output tensor and @p input tensor must be the same. + * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. + * @return a Status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *rois, + const ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info); + private: std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel; }; diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h index 28976001d7..609456a4ef 100644 --- a/arm_compute/runtime/NEON/functions/NERange.h +++ b/arm_compute/runtime/NEON/functions/NERange.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" + #include <memory> namespace arm_compute @@ -57,6 +58,21 @@ public: ~NERange(); /** Initialize the kernel's start, end, step and output tensor. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |dst | + * |:---------| + * |U8 | + * |S8 | + * |U16 | + * |S16 | + * |U32 | + * |S32 | + * |F16 | + * |F32 | + * * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] start The starting value of the sequence. * @param[in] end The ending (not including) value of the sequence. diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h index 89cd09812b..5b8d8cdf2b 100644 --- a/arm_compute/runtime/NEON/functions/NEReduceMean.h +++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,9 @@ #ifndef ARM_COMPUTE_NEON_REDUCE_MEAN_H #define ARM_COMPUTE_NEON_REDUCE_MEAN_H -#include "arm_compute/runtime/IFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" @@ -54,6 +51,17 @@ public: ~NEReduceMean(); /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @note Supported tensor rank: up to 4 * * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32 @@ -72,7 +80,8 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output); + static Status + validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output); // Inherited methods overridden: void run() override; @@ -82,13 +91,8 @@ private: std::vector<NEReductionOperation> _reduction_kernels; std::vector<Tensor> _reduced_outs; NEReshapeLayer _reshape; - NEDequantizationLayer _dequant; - NEQuantizationLayer _requant; int _reduction_ops; bool _keep_dims; - bool _do_requant; - Tensor _input_no_quant; - Tensor _output_no_quant; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEON_REDUCE_MEAN_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index 8186e2e355..f5391a6d0e 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,9 @@ #define ARM_COMPUTE_NEREDUCTIONOPERATION_H #include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" + #include <memory> namespace arm_compute @@ -35,7 +35,7 @@ namespace arm_compute class ITensor; class NEReductionOperationKernel; -/** Basic function to simulate a reduction operation. This function calls the following NEON kernels: +/** Basic function to simulate a reduction operation. This function calls the following kernels: * * -# @ref NEReshapeLayer * -# @ref NEReductionOperationKernel @@ -58,7 +58,19 @@ public: ~NEReductionOperation(); /** Set the input and output tensors. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -68,7 +80,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -76,7 +88,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + unsigned int axis, + ReductionOperation op, + bool keep_dims = true); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h deleted file mode 100644 index 86f366a697..0000000000 --- a/arm_compute/runtime/NEON/functions/NERemap.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREMAP_H -#define ARM_COMPUTE_NEREMAP_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute remap. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NERemapKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NERemap : public INESimpleFunction -{ -public: - /** Initialise the function's sources, destination, interpolation policy and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] map_x Map for X coordinates. Data type supported: F32. - * @param[in] map_y Map for Y coordinates. Data type supported: F32. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. - * @param[in] border_mode Border mode to use on the input tensor. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, - InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEREMAP_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReorderLayer.h b/arm_compute/runtime/NEON/functions/NEReorderLayer.h new file mode 100644 index 0000000000..e3fa7b9c16 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEReorderLayer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__aarch64__) + +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; +class ITensorInfo; +class NEReorderKernel; +/** Function to compute blocked reorder. */ +class NEReorderLayer : public IFunction +{ +public: + /** Default constructor */ + NEReorderLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorderLayer(const NEReorderLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorderLayer &operator=(const NEReorderLayer &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReorderLayer(NEReorderLayer &&) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEReorderLayer &operator=(NEReorderLayer &&) = delete; + /** Default destructor */ + ~NEReorderLayer(); + /** Set the input and output tensors. + * + * Valid data layouts: + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * + * @param[in] input Source tensor. Data type supported: F32. Data layouts supported: NCHW. + * @param[out] output Destination with the same dimensions, data type, data layout as @p input + * except last dimension of data layout which needs to be multiple of blocking parameter ksize + * @param[in] input_wf WeightFormat of input. + * @param[in] output_wf WeightFormat of output. + */ + void configure(const ITensor *input, + ITensor *output, + arm_compute::WeightFormat input_wf, + arm_compute::WeightFormat output_wf); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReorderLayer + * + * Similar to @ref NEReorderLayer::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + arm_compute::WeightFormat input_wf, + arm_compute::WeightFormat output_wf); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr<NEReorderKernel> _reorder_kernel; /**< Reorder layer kernel */ +}; +} // namespace arm_compute +#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREORDERLAYER */ + +#endif // defined(__aarch64__) diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h index f76d1d252c..0a7d824d10 100644 --- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,15 @@ class NEReorgLayer : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input First tensor input. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] stride Stride to be used during data re-organization diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index 641a96e0f9..3e6e33f797 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,7 @@ namespace arm_compute // Forward declarations class ITensor; -/** Basic function to run @ref NEReshapeLayerKernel */ +/** Basic function to run @ref cpu::kernels::CpuReshapeKernel */ class NEReshapeLayer : public IFunction { public: @@ -52,6 +52,14 @@ public: NEReshapeLayer &operator=(NEReshapeLayer &&); /** Initialise the kernel's inputs and outputs * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in] input Input tensor. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ @@ -73,41 +81,5 @@ private: struct Impl; std::unique_ptr<Impl> _impl; }; - -namespace experimental -{ -/** Basic function to run @ref NEReshapeLayerKernel */ -class NEReshape : public INEOperator -{ -public: - /** Default Constructor */ - NEReshape() = default; - /** Default Destructor */ - ~NEReshape(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReshape(const NEReshape &) = delete; - /** Default move constructor */ - NEReshape(NEReshapeLayer &&); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReshape &operator=(const NEReshape &) = delete; - /** Default move assignment operator */ - NEReshape &operator=(NEReshape &&); - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Input tensor info. Data type supported: All - * @param[out] output Output info. Data type supported: Same as @p input - */ - void configure(const ITensorInfo *input, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer - * - * @param[in] input Input tensor info. Data type supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace experimental } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h index 2048dafcb5..e03e415068 100644 --- a/arm_compute/runtime/NEON/functions/NEReverse.h +++ b/arm_compute/runtime/NEON/functions/NEReverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEREVERSE_H -#define ARM_COMPUTE_NEREVERSE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -39,20 +38,39 @@ class NEReverse : public INESimpleFunctionNoBorder public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |All |U32, S32 |All | + * + * @param[in] input Input tensor. Data types supported: All + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32 + * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis + * + * @note The value of each axis should be between [-rank, rank) + * @note If there are duplicate values in the tensor, the subsequent axis values are ignored. e.g. an array of [2, 2] has the same effects as [2]. + * + * @deprecated Support for U32 in axis tensor will be removed in 24.02 release + * */ - void configure(const ITensor *input, ITensor *output, const ITensor *axis); + void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false); /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 + * @param[in] input Input tensor info. Data types supported: All + * @param[in] output Output tensor info. Data type supported: Same as @p input + * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32 + * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *axis, + const bool use_inverted_axis = false); }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEREVERSE_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEREVERSE_H diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h index fceda83510..72dfa3bda4 100644 --- a/arm_compute/runtime/NEON/functions/NEScale.h +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,32 +26,58 @@ #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" -#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to run @ref NEScaleKernel */ -class NEScale : public INESimpleFunctionNoBorder +/** Basic function to compute Scale */ +class NEScale : public IFunction { public: - /** Constructor - * - * Initialize NEScale - */ + /** Constructor */ NEScale(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScale(const NEScale &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEScale(NEScale &&) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScale &operator=(const NEScale &) = delete; + /** Prevent instances of this class from being moved (As this class contains non movable objects) */ + NEScale &operator=(NEScale &&) = delete; + /** Destructor */ + ~NEScale(); /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |U8 |U8 | + * |S8 |S8 | + * |S16 |S16 | + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for configuration + * + * @note Using S8 data type only supports NHWC, @p border_mode Replicate, and @p policy Bilinear */ void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEScale * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info @ref ScaleKernelInfo to be used for validation * @@ -59,10 +85,12 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info); + // Inherited methods overridden: + void run() override; + private: - Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */ - Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */ - Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */ + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NESCALEIMAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h deleted file mode 100644 index 8dd8a80287..0000000000 --- a/arm_compute/runtime/NEON/functions/NEScharr3x3.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCHARR3x3_H -#define ARM_COMPUTE_NESCHARR3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEScharr3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEScharr3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NESCHARR3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h index c66fbfa7d4..c8e5a204dd 100644 --- a/arm_compute/runtime/NEON/functions/NESelect.h +++ b/arm_compute/runtime/NEON/functions/NESelect.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,14 @@ class NESelect : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |U8 |All |All |All | + * * @param[in] c Condition input tensor. Data types supported: U8. * @param[in] x First input tensor. Data types supported: All. * @param[in] y Second input tensor. Data types supported: Same as @p x diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h index 28628778cb..70a688d3b0 100644 --- a/arm_compute/runtime/NEON/functions/NESlice.h +++ b/arm_compute/runtime/NEON/functions/NESlice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,25 +32,44 @@ namespace arm_compute // Forward Declarations class ITensor; -namespace experimental -{ /** Basic function to perform tensor slicing */ -class NESlice : public INEOperator +class NESlice : public IFunction { public: + /** Default Constructor */ + NESlice(); + /** Default Destructor */ + ~NESlice(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESlice(const NESlice &) = delete; + /** Default move constructor */ + NESlice(NESlice &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESlice &operator=(const NESlice &) = delete; + /** Default move assignment operator */ + NESlice &operator=(NESlice &&); + /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Supported tensor rank: up to 4 * @note Start indices must be non-negative. 0 <= starts[i] * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. * @note End indices are not inclusive unless negative. * - * @param[in] input Source tensor info. Data type supported: All - * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] input Source tensor. Data type supported: All + * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). */ - void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends); /** Static function to check if given info will lead to a valid configuration of @ref NESlice * @@ -66,27 +85,23 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -} // namespace experimental +namespace experimental +{ /** Basic function to perform tensor slicing */ -class NESlice : public IFunction +class NESlice : public INEOperator { public: - /** Default Constructor */ - NESlice(); - /** Default Destructor */ - ~NESlice(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESlice(const NESlice &) = delete; - /** Default move constructor */ - NESlice(NESlice &&); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESlice &operator=(const NESlice &) = delete; - /** Default move assignment operator */ - NESlice &operator=(NESlice &&); - /** Configure kernel * * @note Supported tensor rank: up to 4 @@ -94,12 +109,12 @@ public: * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. * @note End indices are not inclusive unless negative. * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). */ - void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends); + void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); /** Static function to check if given info will lead to a valid configuration of @ref NESlice * @@ -115,14 +130,9 @@ public: * * @return A status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); - - // Inherited methods overridden: - void run() override; - -private: - struct Impl; - std::unique_ptr<Impl> _impl; + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends); }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NE_SLICE_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h deleted file mode 100644 index 89a2e07570..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel3x3.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL3x3_H -#define ARM_COMPUTE_NESOBEL3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NESobel3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NESOBEL3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h deleted file mode 100644 index 79e653b395..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel5x5.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL5x5_H -#define ARM_COMPUTE_NESOBEL5x5_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NESobel5x5HorKernel; -class NESobel5x5VertKernel; -class NEFillBorderKernel; - -/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel5x5HorKernel - * -# @ref NESobel5x5VertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NESobel5x5 : public IFunction -{ -public: - /** Default constructor */ - NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5(const NESobel5x5 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5 &operator=(const NESobel5x5 &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel5x5(NESobel5x5 &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel5x5 &operator=(NESobel5x5 &&) = delete; - /** Default destructor */ - ~NESobel5x5(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr<NESobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ - std::unique_ptr<NESobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NESOBEL5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h deleted file mode 100644 index 7395bb0198..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel7x7.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL7x7_H -#define ARM_COMPUTE_NESOBEL7x7_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <cstdint> -#include <memory> - -namespace arm_compute -{ -class ITensor; -class NESobel7x7HorKernel; -class NESobel7x7VertKernel; -class NEFillBorderKernel; - -/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel7x7HorKernel - * -# @ref NESobel7x7VertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NESobel7x7 : public IFunction -{ -public: - /** Default constructor */ - NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7(const NESobel7x7 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7 &operator=(const NESobel7x7 &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel7x7(NESobel7x7 &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel7x7 &operator=(NESobel7x7 &&) = delete; - /** Default destructor */ - ~NESobel7x7(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32. - * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr<NESobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ - std::unique_ptr<NESobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NESOBEL7x7_H */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 40fa38afde..1787de6237 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,35 +24,18 @@ #ifndef ARM_COMPUTE_NESOFTMAXLAYER_H #define ARM_COMPUTE_NESOFTMAXLAYER_H +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/IMemoryManager.h" + #include <memory> namespace arm_compute { class ITensor; -class NELogits1DMaxKernel; -template <bool IS_LOG> -class NELogits1DSoftmaxKernel; -class NEFillBorderKernel; +class ITensorInfo; -/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. - * - * Softmax is calculated by : - * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f] - * - * Log Softmax is calculated by : - * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] - * - * This function runs the following function/kernels: - * -# If axis is not 0: - * -# @ref NEPermute - * -# @ref NEFillBorderKernel - * -# @ref NELogits1DMaxKernel - * -# @ref NELogits1DSoftmaxKernel - */ +/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer. */ template <bool IS_LOG = false> class NESoftmaxLayerGeneric : public IFunction { @@ -62,17 +45,28 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NESoftmaxLayerGeneric(const NESoftmaxLayerGeneric &) = delete; /** Default move constructor */ - NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&) = default; + NESoftmaxLayerGeneric(NESoftmaxLayerGeneric &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete; /** Default move assignment operator */ - NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default; + NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&); /** Default destructor */ ~NESoftmaxLayerGeneric(); /** Set the input and output tensors. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * * @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a - * multiple of the internal processing block size, @ref NEFillBorderKernel replicates the + * multiple of the internal processing block size, @ref NEFillBorder replicates the * last value of each row to the nearest multiple. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] beta (Optional) A scaling factor for the exponent. @@ -96,17 +90,8 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEPermute _permute_input; - NEPermute _permute_output; - std::unique_ptr<NELogits1DMaxKernel> _max_kernel; - std::unique_ptr<NELogits1DSoftmaxKernel<IS_LOG>> _softmax_kernel; - std::unique_ptr<NEFillBorderKernel> _fill_border_kernel; - Tensor _max; - Tensor _tmp; - Tensor _input_permuted; - Tensor _output_permuted; - bool _needs_permute; + struct Impl; + std::unique_ptr<Impl> _impl; }; using NESoftmaxLayer = NESoftmaxLayerGeneric<false>; diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h index 62af092c40..5dee61a4a8 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,9 @@ #ifndef ARM_COMPUTE_NESPACETOBATCHLAYER_H #define ARM_COMPUTE_NESPACETOBATCHLAYER_H +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/Types.h" #include <memory> namespace arm_compute @@ -34,11 +34,11 @@ namespace arm_compute class ITensor; class ITensorInfo; class NESpaceToBatchLayerKernel; -class NEMemsetKernel; +class NEFill; -/** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions: +/** Basic function to spatial divide a tensor. This function calls the following kernels/functions: * - * -# @ref NEMemsetKernel + * -# @ref NEFill * -# @ref NESpaceToBatchLayerKernel */ class NESpaceToBatchLayer : public IFunction @@ -58,6 +58,15 @@ public: ~NESpaceToBatchLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:---------|:---------|:---------|:---------| + * |All |S32 |S32 |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32 * @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32 @@ -73,7 +82,12 @@ public: * @param[in] padding_right The padding at the end of every dimension of the output tensor. * @param[out] output Tensor output. Data types supported: same as @p input */ - void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); + void configure(const ITensor *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. @@ -83,7 +97,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + const ITensorInfo *block_shape, + const ITensorInfo *paddings, + const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings) * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. @@ -95,14 +112,19 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + const ITensorInfo *output); // Inherited methods overridden: void run() override; private: std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - std::unique_ptr<NEMemsetKernel> _memset_kernel; /**< Memset kernel to run */ + std::unique_ptr<NEFill> _fill_f; /**< Fill function to run */ bool _has_padding; /**< Flag to check if the output has padding */ }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h index 1e7aae215d..1820cb8f6b 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,10 +35,7 @@ class ITensor; class ITensorInfo; class NESpaceToDepthLayerKernel; -/** This function calls the following NEON kernels/functions: - * - * -# @ref NESpaceToDepthLayerKernel - */ +/** Basic function to run @ref NESpaceToDepthLayerKernel. */ class NESpaceToDepthLayer : public IFunction { public: @@ -56,6 +53,15 @@ public: ~NESpaceToDepthLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h index ede5ecf65a..36358a7094 100644 --- a/arm_compute/runtime/NEON/functions/NESplit.h +++ b/arm_compute/runtime/NEON/functions/NESplit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" - #include "arm_compute/runtime/CPP/functions/CPPSplit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NESlice.h" @@ -40,6 +39,18 @@ namespace arm_compute class NESplit : public CPPSplit<NESlice> { public: + /** NESplit + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * + */ + // Inherited methods overridden: void run() override; }; diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h index f6fa4f2eb3..98dacde0c1 100644 --- a/arm_compute/runtime/NEON/functions/NEStackLayer.h +++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NESTACKLAYER_H -#define ARM_COMPUTE_NESTACKLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -58,6 +58,14 @@ public: ~NEStackLayer(); /** Initialise the kernel's inputs vector and output. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @note Supported input tensor rank: up to 4 * * @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All @@ -83,9 +91,8 @@ public: void run() override; private: - std::vector<ITensor *> _input; - std::vector<std::unique_ptr<NEStackLayerKernel>> _stack_kernels; - unsigned int _num_inputs; + std::unique_ptr<NEStackLayerKernel> _stack_kernel; + bool _is_prepared; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h index f9c94f5301..fa1113ffec 100644 --- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h +++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,18 +32,37 @@ namespace arm_compute // Forward Declarations class ITensor; -namespace experimental -{ /** Basic function to run @ref NEStridedSliceKernel */ -class NEStridedSlice : public INEOperator +class NEStridedSlice : public IFunction { public: + /** Default Constructor */ + NEStridedSlice(); + /** Default Destructor */ + ~NEStridedSlice(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSlice(const NEStridedSlice &) = delete; + /** Default move constructor */ + NEStridedSlice(NEStridedSlice &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEStridedSlice &operator=(const NEStridedSlice &) = delete; + /** Default move assignment operator */ + NEStridedSlice &operator=(NEStridedSlice &&); + /** Configure kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor info. Data type supported: All - * @param[out] output Destination tensor info. Data type supported: Same as @p input + * @param[in] input Source tensor. Data type supported: All + * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -52,9 +71,14 @@ public: * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - void configure(const ITensorInfo *input, ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + void configure(const ITensor *input, + ITensor *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice * @@ -70,35 +94,35 @@ public: * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; -} // namespace experimental +namespace experimental +{ /** Basic function to run @ref NEStridedSliceKernel */ -class NEStridedSlice : public IFunction +class NEStridedSlice : public INEOperator { public: - /** Default Constructor */ - NEStridedSlice(); - /** Default Destructor */ - ~NEStridedSlice(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStridedSlice(const NEStridedSlice &) = delete; - /** Default move constructor */ - NEStridedSlice(NEStridedSlice &&); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStridedSlice &operator=(const NEStridedSlice &) = delete; - /** Default move assignment operator */ - NEStridedSlice &operator=(NEStridedSlice &&); - /** Configure kernel * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] input Source tensor info. Data type supported: All + * @param[out] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -107,9 +131,14 @@ public: * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - void configure(const ITensor *input, ITensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + void configure(const ITensorInfo *input, + ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSlice * @@ -125,16 +154,15 @@ public: * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); - - // Inherited methods overridden: - void run() override; - -private: - struct Impl; - std::unique_ptr<Impl> _impl; + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NE_STRIDED_SLICE_H */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h deleted file mode 100644 index 03674cd297..0000000000 --- a/arm_compute/runtime/NEON/functions/NETableLookup.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETABLELOOKUP_H -#define ARM_COMPUTE_NETABLELOOKUP_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ILut; - -/** Basic function to run @ref NETableLookupKernel */ -class NETableLookup : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and output - * - * @param[in] input First tensor input. Data types supported: U8/S16 - * @param[in] lut Input lookup table. - * @param[out] output Output tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ILut *lut, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETABLELOOKUP_H */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h deleted file mode 100644 index 9860abf835..0000000000 --- a/arm_compute/runtime/NEON/functions/NEThreshold.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETHRESHOLD_H -#define ARM_COMPUTE_NETHRESHOLD_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include <cstdint> - -namespace arm_compute -{ -// Forward declarations -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NEThresholdKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEThreshold : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the function's source, destination, thresholds and threshold type - * - * @param[in] input First tensor input. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold - * @param[in] false_value Value to assign when the condition is false - * @param[in] true_value value to assign when the condition is true - * @param[in] type Thresholding type. Can either be BINARY or RANGE. - * @param[in] upper Upper threshold. Only used with RANGE thresholding - */ - ARM_COMPUTE_DEPRECATED_REL(20.08) - void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, - ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); - /** Initialise the function's source, destination, thresholds and threshold type - * - * @param[in] input First tensor input. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] info Threshold descriptor - */ - void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEThreshold - * - * @param[in] input First tensor input. Data type supported: U8. - * @param[in] output Output tensor. Data type supported: U8. - * @param[in] info Threshold descriptor. - * - * @return A status, containing an error code in case of failure - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLD_H */ diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h index d5ce76c9cf..001a0a4128 100644 --- a/arm_compute/runtime/NEON/functions/NETile.h +++ b/arm_compute/runtime/NEON/functions/NETile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_NETILE_H #define ARM_COMPUTE_NETILE_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -39,6 +38,14 @@ class NETile : public INESimpleFunctionNoBorder public: /** Set the source, destination of the kernel * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Same as @p input * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 2651bdd727..5d2d1f1b01 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,23 +25,42 @@ #define ARM_COMPUTE_NETRANSPOSE_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> namespace arm_compute { +// Forward declarations class ITensor; class ITensorInfo; -/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel: - * - * -# @ref NETransposeKernel - * - */ -class NETranspose : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::kernels::CpuTransposeKernel */ +class NETranspose : public IFunction { public: + /** Default Constructor */ + NETranspose(); + /** Default Destructor */ + ~NETranspose(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETranspose(const NETranspose &) = delete; + /** Default move constructor */ + NETranspose(NETranspose &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETranspose &operator=(const NETranspose &) = delete; + /** Default move assignment operator */ + NETranspose &operator=(NETranspose &&) = default; /** Initialise the kernel's inputs and output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:------|:------| + * |All |All | + * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ @@ -54,7 +73,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute - #endif /* ARM_COMPUTE_NETRANSPOSE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h index c8e85115f7..e1af96d08d 100644 --- a/arm_compute/runtime/NEON/functions/NEUnstack.h +++ b/arm_compute/runtime/NEON/functions/NEUnstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" - #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include <memory> @@ -57,6 +56,14 @@ public: ~NEUnstack() = default; /** Set the input, output and unstacking axis. * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * * @param[in] input A tensor to be unstacked. Data type supported: All. * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. * Note: The number of elements of the vector will be used as the number of slices to be taken from the axis. diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h deleted file mode 100644 index 0aedb87aa2..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPAFFINE_H -#define ARM_COMPUTE_NEWARPAFFINE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEWarpAffineKernel - * - * @deprecated This function is deprecated and will be removed in release 20.02 - * -*/ -class NEWarpAffine : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] matrix The perspective matrix. Must be 2x3 of type float. - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEWARPAFFINE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h deleted file mode 100644 index 31a1477dca..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPPERSPECTIVE_H -#define ARM_COMPUTE_NEWARPPERSPECTIVE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEWarpPerspectiveKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEWarpPerspective : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEWARPPERSPECTIVE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 6b61e7031b..6caa2aeb59 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,17 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H -#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H - -#include "arm_compute/runtime/IFunction.h" +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CPP/functions/CPPPermute.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" - +#include "arm_compute/function_info/ActivationLayerInfo.h" +#include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -40,13 +35,12 @@ namespace arm_compute { // Forward declarations class ITensor; -class ICPPKernel; -/** Basic function to simulate a convolution layer. This function calls the following NEON kernels: - * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) - * -# @ref NEWinogradLayerTransformInputKernel - * -# @ref NEWinogradLayerTransformOutputKernel - * -# @ref NEGEMMAssemblyDispatch +/** Basic function to simulate a convolution layer. This function calls the following kernels: + * + * -# @ref cpu::CpuWinogradConv2dTransformInputKernel + * -# @ref cpu::CpuWinogradConv2dTransformOutputKernel + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref CPPPermute (three times: weights, input and output) * * @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true @@ -56,20 +50,35 @@ class NEWinogradConvolutionLayer : public IFunction public: /** Constructor */ NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr); - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete; - /** Default destructor */ - ~NEWinogradConvolutionLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete; + /** Default move constructor */ + NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete; + /** Default move assignment operator */ + NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = default; + /** Destructor */ + ~NEWinogradConvolutionLayer(); /** Set the input and output tensors. * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. + * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32 + * -> 3x3 for Fp16 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. @@ -78,62 +87,35 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - bool enable_fast_math = false); + void configure(const ITensor *input, + const ITensor *weights, + const ITensor *biases, + ITensor *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); // Inherited methods overridden: void run() override; void prepare() override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer + /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * Similar to @ref NEWinogradConvolutionLayer::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete; + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo(), + bool enable_fast_math = false); private: - MemoryGroup _memory_group; - NEGEMM _gemm_function; - std::unique_ptr<ICPPKernel> _transform_input_kernel; - std::unique_ptr<ICPPKernel> _transform_output_kernel; - std::unique_ptr<ICPPKernel> _transform_weights_kernel; - NEActivationLayer _activationlayer_function; - - CPPPermute _permute_input; - CPPPermute _permute_weights; - CPPPermute _permute_output; - Tensor _input_transformed; - Tensor _output_transformed; - Tensor _input_workspace; - Tensor _output_workspace; - Tensor _kernel_storage; - Tensor _input_nhwc; - Tensor _output_nhwc; - Tensor _weights_hwio; - const ITensor *_input; - const ITensor *_weights; - ITensor *_output; - bool _is_prepared; - bool _is_activationlayer_enabled; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h deleted file mode 100644 index 4c9a5bf6e4..0000000000 --- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEYOLOLAYER_H -#define ARM_COMPUTE_NEYOLOLAYER_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NEYOLOLayerKernel */ -class NEYOLOLayer : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer parameters. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayer - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEYOLOLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h deleted file mode 100644 index 7f63717b02..0000000000 --- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/** Depthwise convolution assembly kernel glue */ -class NEDepthwiseConvolutionAssemblyDispatch : public IFunction -{ -public: - /** Default constructor - * - * @param[in,out] memory_manager Memory manager to use - */ - NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move constructor */ - NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move assignment operator */ - NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Default destructor */ - ~NEDepthwiseConvolutionAssemblyDispatch(); - /** Initialize the function's source, destination, kernels and border_size. - * - * @note Supports only NHWC format - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - const Size2D &dilation = Size2D(1, 1)); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch - * - * @note Supports only NHWC format - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return An error status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - const Size2D &dilation = Size2D(1, 1)); - /** Check if the optimized kernel can be used for the given kernel sizes and strides - * - * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC - * - * @param[in] input Input tensor info. - * @param[in] weights Weights tensor info. - * @param[in] conv_info Convolution layer metadata. - * @param[in] depth_multiplier (Optional) Depth multiplier to be used. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed. - */ - static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1)); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - struct LocalImpl; - -private: - MemoryGroup _memory_group; - const ITensor *_input; - const ITensor *_weights; - const ITensor *_bias; - ITensor *_output; - Tensor _packed_weights; - Tensor _workspace; - bool _is_prepared; - std::unique_ptr<LocalImpl> _pImpl; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */ |