From 678d83a5c3ec1b19ddb9df07a990262ce4bd65e1 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Mon, 7 Jan 2019 16:05:36 +0000 Subject: COMPMID-1838: Add 4D softmax support for NEON and achieve parity with CL Change-Id: I15c4a747cde2536b1caba2baf4ded9ca76e6dae2 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/487 Tested-by: Arm Jenkins Reviewed-by: VidhyaSudhan Loganathan --- .../runtime/NEON/functions/NESoftmaxLayer.h | 62 +++++++++++++++------- 1 file changed, 44 insertions(+), 18 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 3f5ec8e820..4932aeff5a 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,8 @@ #define __ARM_COMPUTE_NESOFTMAXLAYER_H__ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -49,6 +51,14 @@ class NESoftmaxLayer : public IFunction public: /** Constructor */ NESoftmaxLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESoftmaxLayer(const NESoftmaxLayer &) = delete; + /** Default move constructor */ + NESoftmaxLayer(NESoftmaxLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESoftmaxLayer &operator=(const NESoftmaxLayer &) = delete; + /** Default move assignment operator */ + NESoftmaxLayer &operator=(NESoftmaxLayer &&) = default; /** Set the input and output tensors. * * @param[in,out] input Source tensor. Data types supported: QASYMM8/F16/F32. If the width is not a @@ -56,24 +66,20 @@ public: * last value of each row to the nearest multiple. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) Reduction axis. It has the purpose of squashing the first @p axis - * dimensions together. For instance, given a [4x4x4x4] image, + * @param[in] axis (Optional) Reduction axis. Defaults to 1. Must be in range [1, input_num_dimensions). + * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image, * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. - * - * @note The value of @p axis must be always 1 for NEON */ void configure(ITensor *input, ITensor *output, float beta = 1.0f, size_t axis = 1); /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor. Data types supported: same as @p input + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) Reduction axis. It has the purpose of squashing the first @p axis - * dimensions together. For instance, given a [4x4x4x4] image, + * @param[in] axis (Optional) Reduction axis. Defaults to 1. Must be in range [1, input_num_dimensions). + * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image, * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. * - * @note The value of @p axis must be always 1 for NEON - * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t axis = 1); @@ -82,12 +88,32 @@ public: void run() override; private: - MemoryGroup _memory_group; - NELogits1DMaxKernel _max_kernel; - NELogits1DSoftmaxKernel _softmax_kernel; - NEFillBorderKernel _fill_border_kernel; - Tensor _max; - Tensor _tmp; + /** Utility method to configure the kernels needed to flatten the input + * tensor. + * + * @note This function changes the internal state of this class. In particular, + * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and + * @p _output_flat + * + * @param[in] input Original source tensor. + * @param[in] output Original destination tensor. + * @param[in] axis (Optional) Reduction axis. Defaults to 1. Must be in range [1, input_num_dimensions). + * It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image, + * when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image. + */ + void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, size_t axis); + + MemoryGroup _memory_group; + NELogits1DMaxKernel _max_kernel; + NELogits1DSoftmaxKernel _softmax_kernel; + std::unique_ptr _flat_or_reshape_kernel_ptr; + NEFillBorderKernel _fill_border_kernel; + NEReshapeLayerKernel _reshape_kernel; + Tensor _max; + Tensor _tmp; + Tensor _input_flattened; + Tensor _output_flattened; + bool _needs_flattening; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NESOFTMAXLAYER_H__ */ -- cgit v1.2.1