diff options
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r-- | arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 57 | ||||
-rw-r--r-- | arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h | 16 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NESoftmaxLayer.h | 36 |
3 files changed, 38 insertions, 71 deletions
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index bb01584ff4..fd71f3ed4d 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -26,8 +26,7 @@ #include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" -#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -47,7 +46,10 @@ class ICLTensor; * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * * This function runs the following kernels: + * -# If axis is not 0: + * -# @ref CLPermute * -# @ref CLLogits1DNormKernel + * -# @ref CLLogits1DMaxShiftExpSumKernel */ template <bool IS_LOG = false> class CLSoftmaxLayerGeneric : public IFunction @@ -60,70 +62,47 @@ public: * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ - void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 0); + void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, int32_t axis = 0); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 0); + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, int32_t axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[in] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t axis = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = 0); // Inherited methods overridden: void run() override; private: - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. - */ - void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis); - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. - */ - void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis); - MemoryGroup _memory_group; + CLPermute _permute_input; + CLPermute _permute_output; CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel; CLLogits1DNormKernel _norm_kernel; - std::unique_ptr<IFunction> _flatten_ptr; - CLReshapeLayer _reshape; CLTensor _max; CLTensor _sum; CLTensor _tmp; - CLTensor _input_flattened; - CLTensor _output_flattened; - bool _needs_flattening; + CLTensor _input_permuted; + CLTensor _output_permuted; + bool _needs_permute; }; using CLSoftmaxLayer = CLSoftmaxLayerGeneric<false>; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h index 4ccfe2684e..0279edf63d 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h @@ -50,17 +50,15 @@ public: GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[in] beta (Optional) A scaling factor for the exponent. Only beta = 1 is supported - * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] beta (Optional) A scaling factor for the exponent. Only beta = 1 is supported + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * - * @note The value of @p reduce_end_axis must be always 0 for GLES + * @note The value of @p axis must be always 0 for GLES */ - void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0); + void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, int32_t axis = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 9fb4d85262..20b20201d5 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -28,8 +28,7 @@ #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -44,7 +43,9 @@ class ITensor; * Log Softmax is calculated by : * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * - * This function runs the following kernels: + * This function runs the following function/kernels: + * -# If axis is not 0: + * -# @ref NEPermute * -# @ref NEFillBorderKernel * -# @ref NELogits1DMaxKernel * -# @ref NELogits1DSoftmaxKernel @@ -70,7 +71,8 @@ public: * last value of each row to the nearest multiple. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer @@ -78,7 +80,8 @@ public: * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * * @return a status */ @@ -88,30 +91,17 @@ public: void run() override; private: - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. - */ - void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis); - MemoryGroup _memory_group; + NEPermute _permute_input; + NEPermute _permute_output; NELogits1DMaxKernel _max_kernel; NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel; - std::unique_ptr<IFunction> _flat_or_reshape_ptr; NEFillBorderKernel _fill_border_kernel; - NEReshapeLayer _reshape; Tensor _max; Tensor _tmp; - Tensor _input_flattened; - Tensor _output_flattened; - bool _needs_flattening; + Tensor _input_permuted; + Tensor _output_permuted; + bool _needs_permute; }; using NESoftmaxLayer = NESoftmaxLayerGeneric<false>; |