From 35ceeb2199c569810a1524a0a21c2df2a3f5f29e Mon Sep 17 00:00:00 2001 From: Diego Lopez Recas Date: Mon, 4 Dec 2017 18:56:10 +0000 Subject: IVGCVSW-798 Add Softmax NEON support for QASYMM8 Change-Id: I4f2cca52caf210fdb7d6bb7e9436ac51cb5088b4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112398 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- .../core/NEON/kernels/NESoftmaxLayerKernel.h | 116 +++++++-------------- 1 file changed, 35 insertions(+), 81 deletions(-) (limited to 'arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h') diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h index bd0e642d76..c30a4cd23d 100644 --- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -43,13 +43,13 @@ public: NELogits1DMaxKernel(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 + * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32. * @param[in] output Destination tensor. Data types supported: same as @p input * * @return a status @@ -61,117 +61,71 @@ public: BorderSize border_size() const override; private: - using Logits1DMaxFunction = void(const ITensor *in, ITensor *out, const Window &window); + using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window); private: Logits1DMaxFunction *_func; BorderSize _border_size; }; -/** Interface for shifting the logits values around the max value and exponentiating the result */ -class NELogits1DShiftExpSumKernel : public INEKernel +/** Interface for softmax computation for QASYMM8 with pre-computed max. */ +class NELogits1DSoftmaxKernel : public INEKernel { public: const char *name() const override { - return "NELogits1DShiftExpSumKernel"; + return "NELogits1DSoftmaxKernel"; } /** Default constructor */ - NELogits1DShiftExpSumKernel(); + NELogits1DSoftmaxKernel(); /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DShiftExpSumKernel(const NELogits1DShiftExpSumKernel &) = delete; + NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DShiftExpSumKernel &operator=(const NELogits1DShiftExpSumKernel &) = delete; + NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete; /** Allow instances of this class to be moved */ - NELogits1DShiftExpSumKernel(NELogits1DShiftExpSumKernel &&) = default; + NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default; /** Allow instances of this class to be moved */ - NELogits1DShiftExpSumKernel &operator=(NELogits1DShiftExpSumKernel &&) = default; + NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default; /** Default destructor */ - ~NELogits1DShiftExpSumKernel() = default; + ~NELogits1DSoftmaxKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. - * @param[in] max Max values tensor. Data types supported: same as @p input. + * @param[in] input Source tensor. Data types supported: QASYMM8/QS8/QS16/F16/F32. + * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input. - * @param[in] beta (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1. - */ - void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta = 1.0f); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DShiftExpSumKernel - * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: same as @p input. - * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input. - * @param[in] beta (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum, float beta = 1.0f); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta); - -private: - Logits1DShiftExpSumFunction *_func; - const ITensor *_input; - const ITensor *_max; - ITensor *_output; - ITensor *_sum; - float _beta; -}; - -/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ -class NELogits1DNormKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NELogits1DNormKernel"; - } - /** Default constructor */ - NELogits1DNormKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DNormKernel(const NELogits1DNormKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DNormKernel &operator=(const NELogits1DNormKernel &) = delete; - /** Allow instances of this class to be moved */ - NELogits1DNormKernel(NELogits1DNormKernel &&) = default; - /** Allow instances of this class to be moved */ - NELogits1DNormKernel &operator=(NELogits1DNormKernel &&) = default; - /** Default destructor */ - ~NELogits1DNormKernel() = default; - /** Set the input and output tensors. + * @param[in] beta A scaling factor for the exponent. * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. - * @param[in] sum Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input. */ - void configure(const ITensor *input, const ITensor *sum, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DNormKernel + void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp); + /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel * - * @param[in] input Source tensor. Data types supported: QS8/QS16/S32/F16/F32 - * @param[in] sum Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input. - * @param[in] output Destination tensor. Data types supported: same as @p input. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QS8/QS16/F16/F32. + * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, const ITensorInfo *max, + const ITensorInfo *output, const float beta, const ITensorInfo *tmp); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; private: - using Logits1DNormFunction = void(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window); + using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta, + const Window &window); -private: - Logits1DNormFunction *_func; - const ITensor *_input; - const ITensor *_sum; - ITensor *_output; + LogitsSoftmaxFunction *_func; + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + float _beta; + ITensor *_tmp; //Temporary. Used internally }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ */ -- cgit v1.2.1