aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-08-21 12:28:30 +0100
committerSiCong Li <sicong.li@arm.com>2020-08-25 14:12:07 +0000
commit96209c73b071bb65d4919fb441076f977095a31b (patch)
tree50252f1a33992b3a6171c6b2becf6da1b6f0022d /arm_compute
parent5111264954e2d1a4d3e91d23a0869a0d7105be4c (diff)
downloadComputeLibrary-96209c73b071bb65d4919fb441076f977095a31b.tar.gz
COMPMID-3694 COMPMID-3695 COMPMID-3458: Softmax Axis
* Properly support "axis" in CL and NEON (and GC) SoftmaxLayer and LogSoftmaxLayer in accord with mainstream frameworks. Axis now defines the dimension on which softmax is performed, and supports the range [-rank, rank) * Extend validation tests to include valid and invalid axes * Remove unnecessary LogSoftmaxLayer fixture, as it is only a specialisation of the SoftmaxLayer fixture * Change the validation fill value range from [-1000, 1000] to [-10, 10], as the former often results in sparse outputs with a single one and zeros elsewhere Change-Id: I8a0040453182b04ed88260de3ba434e98258d863 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3830 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/Helpers.h25
-rw-r--r--arm_compute/core/Helpers.inl5
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h57
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NESoftmaxLayer.h36
5 files changed, 53 insertions, 86 deletions
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 48ac38b170..90dd6082e1 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -801,16 +801,6 @@ inline T wrap_around(T x, T m)
return x >= 0 ? x % m : (x % m + m) % m;
}
-/** Convert a dimension axis to the number of dimensions in the range [0, @p dim_axis]
- * Handle negative axis, negative axis is used to specify axis from the end (e.g. -1 for the last axis).
- *
- * @param[in] dim_axis The last axis (inclusive) in the range [0, @p dim_axis]
- * @param[in] num_dims The total number of dimensions
- *
- * @return The number of dimensions in the range [0, @p dim_axis]
- */
-inline size_t dim_index_2_num_dims(int32_t dim_axis, int32_t num_dims);
-
/** Convert negative coordinates to positive in the range [0, num_dims_input]
*
* @param[out] coords Array of coordinates to be converted.
@@ -852,6 +842,21 @@ inline unsigned int get_next_power_two(unsigned int x)
return x;
}
+
+/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front
+ *
+ * @note This function assumes a tensor rank <= 4
+ *
+ * Axis selects the dimension on which softmax is performed.
+ * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5.
+ * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is
+ * required to put the dimension specified by @p axis to the first dimension.
+ *
+ * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed)
+ *
+ * @return the permutation vector
+ */
+PermutationVector get_permutation_vector_from_softmax_axis(size_t axis);
} // namespace arm_compute
#include "arm_compute/core/Helpers.inl"
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index df0c929372..5613e8c74e 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -29,11 +29,6 @@
namespace arm_compute
{
-inline size_t dim_index_2_num_dims(int32_t dim_axis, int32_t num_dims)
-{
- return static_cast<size_t>(wrap_around(dim_axis, num_dims)) + 1;
-}
-
inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y)
{
ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index bb01584ff4..fd71f3ed4d 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -26,8 +26,7 @@
#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
-#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -47,7 +46,10 @@ class ICLTensor;
* @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
*
* This function runs the following kernels:
+ * -# If axis is not 0:
+ * -# @ref CLPermute
* -# @ref CLLogits1DNormKernel
+ * -# @ref CLLogits1DMaxShiftExpSumKernel
*/
template <bool IS_LOG = false>
class CLSoftmaxLayerGeneric : public IFunction
@@ -60,70 +62,47 @@ public:
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*/
- void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 0);
+ void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, int32_t axis = 0);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 0);
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, int32_t axis = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
*
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[in] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t axis = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = 0);
// Inherited methods overridden:
void run() override;
private:
- /** Utility method to configure the kernels needed to flatten the input
- * tensor.
- *
- * @note This function changes the internal state of this class. In particular,
- * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
- * @p _output_flat
- *
- * @param[in] input Original source tensor.
- * @param[in] output Original destination tensor.
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
- */
- void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis);
- /** Utility method to configure the kernels needed to flatten the input
- * tensor.
- *
- * @note This function changes the internal state of this class. In particular,
- * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
- * @p _output_flat
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Original source tensor.
- * @param[in] output Original destination tensor.
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
- */
- void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis);
-
MemoryGroup _memory_group;
+ CLPermute _permute_input;
+ CLPermute _permute_output;
CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;
CLLogits1DNormKernel _norm_kernel;
- std::unique_ptr<IFunction> _flatten_ptr;
- CLReshapeLayer _reshape;
CLTensor _max;
CLTensor _sum;
CLTensor _tmp;
- CLTensor _input_flattened;
- CLTensor _output_flattened;
- bool _needs_flattening;
+ CLTensor _input_permuted;
+ CLTensor _output_permuted;
+ bool _needs_permute;
};
using CLSoftmaxLayer = CLSoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
index 4ccfe2684e..0279edf63d 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
@@ -50,17 +50,15 @@ public:
GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F16/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] beta (Optional) A scaling factor for the exponent. Only beta = 1 is supported
- * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
- * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
- * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
- * Must be in range [0, input_num_dimensions).
+ * @param[in] input Source tensor. Data types supported: F16/F32
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ * @param[in] beta (Optional) A scaling factor for the exponent. Only beta = 1 is supported
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*
- * @note The value of @p reduce_end_axis must be always 0 for GLES
+ * @note The value of @p axis must be always 0 for GLES
*/
- void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
+ void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, int32_t axis = 0);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 9fb4d85262..20b20201d5 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -28,8 +28,7 @@
#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
@@ -44,7 +43,9 @@ class ITensor;
* Log Softmax is calculated by :
* @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
*
- * This function runs the following kernels:
+ * This function runs the following function/kernels:
+ * -# If axis is not 0:
+ * -# @ref NEPermute
* -# @ref NEFillBorderKernel
* -# @ref NELogits1DMaxKernel
* -# @ref NELogits1DSoftmaxKernel
@@ -70,7 +71,8 @@ public:
* last value of each row to the nearest multiple.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] beta (Optional) A scaling factor for the exponent.
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*/
void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer
@@ -78,7 +80,8 @@ public:
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor info. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent.
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
+ * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and
+ * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0
*
* @return a status
*/
@@ -88,30 +91,17 @@ public:
void run() override;
private:
- /** Utility method to configure the kernels needed to flatten the input
- * tensor.
- *
- * @note This function changes the internal state of this class. In particular,
- * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
- * @p _output_flat
- *
- * @param[in] input Original source tensor.
- * @param[in] output Original destination tensor.
- * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0.
- */
- void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis);
-
MemoryGroup _memory_group;
+ NEPermute _permute_input;
+ NEPermute _permute_output;
NELogits1DMaxKernel _max_kernel;
NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel;
- std::unique_ptr<IFunction> _flat_or_reshape_ptr;
NEFillBorderKernel _fill_border_kernel;
- NEReshapeLayer _reshape;
Tensor _max;
Tensor _tmp;
- Tensor _input_flattened;
- Tensor _output_flattened;
- bool _needs_flattening;
+ Tensor _input_permuted;
+ Tensor _output_permuted;
+ bool _needs_permute;
};
using NESoftmaxLayer = NESoftmaxLayerGeneric<false>;