From 678d83a5c3ec1b19ddb9df07a990262ce4bd65e1 Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Mon, 7 Jan 2019 16:05:36 +0000
Subject: COMPMID-1838: Add 4D softmax support for NEON and achieve parity with
 CL

Change-Id: I15c4a747cde2536b1caba2baf4ded9ca76e6dae2
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/487
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: VidhyaSudhan Loganathan <vidhyasudhan.loganathan@arm.com>
---
 .../runtime/NEON/functions/NESoftmaxLayer.h        | 62 +++++++++++++++-------
 1 file changed, 44 insertions(+), 18 deletions(-)

(limited to 'arm_compute')
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 3f5ec8e820..4932aeff5a 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 #define __ARM_COMPUTE_NESOFTMAXLAYER_H__
 
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -49,6 +51,14 @@ class NESoftmaxLayer : public IFunction
 public:
     /** Constructor */
     NESoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESoftmaxLayer(const NESoftmaxLayer &) = delete;
+    /** Default move constructor */
+    NESoftmaxLayer(NESoftmaxLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESoftmaxLayer &operator=(const NESoftmaxLayer &) = delete;
+    /** Default move assignment operator */
+    NESoftmaxLayer &operator=(NESoftmaxLayer &&) = default;
     /** Set the input and output tensors.
      *
      * @param[in,out] input  Source tensor. Data types supported: QASYMM8/F16/F32. If the width is not a
@@ -56,24 +66,20 @@ public:
      *                       last value of each row to the nearest multiple.
      * @param[out]    output Destination tensor. Data types supported: same as @p input.
      * @param[in]     beta   (Optional) A scaling factor for the exponent.
-     * @param[in]     axis   (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                       dimensions together. For instance, given a [4x4x4x4] image,
+     * @param[in]     axis   (Optional) Reduction axis. Defaults to 1. Must be in range [1, input_num_dimensions).
+     *                       It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
      *                       when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
-     *
-     * @note The value of @p axis must be always 1 for NEON
      */
     void configure(ITensor *input, ITensor *output, float beta = 1.0f, size_t axis = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer
      *
-     * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32.
-     * @param[in] output Destination tensor. Data types supported: same as @p input
+     * @param[in] input  Source tensor info. Data types supported: QASYMM8/F16/F32.
+     * @param[in] output Destination tensor info. Data types supported: same as @p input
      * @param[in] beta   (Optional) A scaling factor for the exponent.
-     * @param[in] axis   (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                   dimensions together. For instance, given a [4x4x4x4] image,
+     * @param[in] axis   (Optional) Reduction axis. Defaults to 1. Must be in range [1, input_num_dimensions).
+     *                   It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
      *                   when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
      *
-     * @note The value of @p axis must be always 1 for NEON
-     *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t axis = 1);
@@ -82,12 +88,32 @@ public:
     void run() override;
 
 private:
-    MemoryGroup             _memory_group;
-    NELogits1DMaxKernel     _max_kernel;
-    NELogits1DSoftmaxKernel _softmax_kernel;
-    NEFillBorderKernel      _fill_border_kernel;
-    Tensor                  _max;
-    Tensor                  _tmp;
+    /** Utility method to configure the kernels needed to flatten the input
+     * tensor.
+     *
+     * @note This function changes the internal state of this class. In particular,
+     * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
+     * @p _output_flat
+     *
+     * @param[in] input  Original source tensor.
+     * @param[in] output Original destination tensor.
+     * @param[in] axis   (Optional) Reduction axis. Defaults to 1. Must be in range [1, input_num_dimensions).
+     *                   It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
+     *                   when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     */
+    void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, size_t axis);
+
+    MemoryGroup                _memory_group;
+    NELogits1DMaxKernel        _max_kernel;
+    NELogits1DSoftmaxKernel    _softmax_kernel;
+    std::unique_ptr<INEKernel> _flat_or_reshape_kernel_ptr;
+    NEFillBorderKernel         _fill_border_kernel;
+    NEReshapeLayerKernel       _reshape_kernel;
+    Tensor                     _max;
+    Tensor                     _tmp;
+    Tensor                     _input_flattened;
+    Tensor                     _output_flattened;
+    bool                       _needs_flattening;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NESOFTMAXLAYER_H__ */
-- 
cgit v1.2.1