diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/Helpers.h | 25 | ||||
-rw-r--r-- | arm_compute/core/Helpers.inl | 5 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 57 | ||||
-rw-r--r-- | arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h | 16 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NESoftmaxLayer.h | 36 |
5 files changed, 53 insertions, 86 deletions
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index 48ac38b170..90dd6082e1 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -801,16 +801,6 @@ inline T wrap_around(T x, T m) return x >= 0 ? x % m : (x % m + m) % m; } -/** Convert a dimension axis to the number of dimensions in the range [0, @p dim_axis] - * Handle negative axis, negative axis is used to specify axis from the end (e.g. -1 for the last axis). - * - * @param[in] dim_axis The last axis (inclusive) in the range [0, @p dim_axis] - * @param[in] num_dims The total number of dimensions - * - * @return The number of dimensions in the range [0, @p dim_axis] - */ -inline size_t dim_index_2_num_dims(int32_t dim_axis, int32_t num_dims); - /** Convert negative coordinates to positive in the range [0, num_dims_input] * * @param[out] coords Array of coordinates to be converted. @@ -852,6 +842,21 @@ inline unsigned int get_next_power_two(unsigned int x) return x; } + +/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front + * + * @note This function assumes a tensor rank <= 4 + * + * Axis selects the dimension on which softmax is performed. + * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5. + * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is + * required to put the dimension specified by @p axis to the first dimension. + * + * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed) + * + * @return the permutation vector + */ +PermutationVector get_permutation_vector_from_softmax_axis(size_t axis); } // namespace arm_compute #include "arm_compute/core/Helpers.inl" diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index df0c929372..5613e8c74e 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -29,11 +29,6 @@ namespace arm_compute { -inline size_t dim_index_2_num_dims(int32_t dim_axis, int32_t num_dims) -{ - return static_cast<size_t>(wrap_around(dim_axis, num_dims)) + 1; -} - inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) { ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index bb01584ff4..fd71f3ed4d 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -26,8 +26,7 @@ #include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" -#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -47,7 +46,10 @@ class ICLTensor; * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * * This function runs the following kernels: + * -# If axis is not 0: + * -# @ref CLPermute * -# @ref CLLogits1DNormKernel + * -# @ref CLLogits1DMaxShiftExpSumKernel */ template <bool IS_LOG = false> class CLSoftmaxLayerGeneric : public IFunction @@ -60,70 +62,47 @@ public: * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ - void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 0); + void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, int32_t axis = 0); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 0); + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, int32_t axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax * @param[in] output Destination tensor. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t axis = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = 0); // Inherited methods overridden: void run() override; private: - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. - */ - void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis); - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. - */ - void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis); - MemoryGroup _memory_group; + CLPermute _permute_input; + CLPermute _permute_output; CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel; CLLogits1DNormKernel _norm_kernel; - std::unique_ptr<IFunction> _flatten_ptr; - CLReshapeLayer _reshape; CLTensor _max; CLTensor _sum; CLTensor _tmp; - CLTensor _input_flattened; - CLTensor _output_flattened; - bool _needs_flattening; + CLTensor _input_permuted; + CLTensor _output_permuted; + bool _needs_permute; }; using CLSoftmaxLayer = CLSoftmaxLayerGeneric<false>; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h index 4ccfe2684e..0279edf63d 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h @@ -50,17 +50,15 @@ public: GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[in] beta (Optional) A scaling factor for the exponent. Only beta = 1 is supported - * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0. - * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image, - * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image. - * Must be in range [0, input_num_dimensions). + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] beta (Optional) A scaling factor for the exponent. Only beta = 1 is supported + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * - * @note The value of @p reduce_end_axis must be always 0 for GLES + * @note The value of @p axis must be always 0 for GLES */ - void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0); + void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, int32_t axis = 0); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 9fb4d85262..20b20201d5 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -28,8 +28,7 @@ #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" +#include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -44,7 +43,9 @@ class ITensor; * Log Softmax is calculated by : * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * - * This function runs the following kernels: + * This function runs the following function/kernels: + * -# If axis is not 0: + * -# @ref NEPermute * -# @ref NEFillBorderKernel * -# @ref NELogits1DMaxKernel * -# @ref NELogits1DSoftmaxKernel @@ -70,7 +71,8 @@ public: * last value of each row to the nearest multiple. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 */ void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer @@ -78,7 +80,8 @@ public: * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: same as @p input * @param[in] beta (Optional) A scaling factor for the exponent. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. + * @param[in] axis (Optional) The dimension in which to apply the function. E.g. for input of shape 4x5x6 and + * axis=1, softmax will be applied to 4x6=24 vectors of size 5. Defaults to 0 * * @return a status */ @@ -88,30 +91,17 @@ public: void run() override; private: - /** Utility method to configure the kernels needed to flatten the input - * tensor. - * - * @note This function changes the internal state of this class. In particular, - * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and - * @p _output_flat - * - * @param[in] input Original source tensor. - * @param[in] output Original destination tensor. - * @param[in] axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Only supports axis 0. - */ - void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis); - MemoryGroup _memory_group; + NEPermute _permute_input; + NEPermute _permute_output; NELogits1DMaxKernel _max_kernel; NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel; - std::unique_ptr<IFunction> _flat_or_reshape_ptr; NEFillBorderKernel _fill_border_kernel; - NEReshapeLayer _reshape; Tensor _max; Tensor _tmp; - Tensor _input_flattened; - Tensor _output_flattened; - bool _needs_flattening; + Tensor _input_permuted; + Tensor _output_permuted; + bool _needs_permute; }; using NESoftmaxLayer = NESoftmaxLayerGeneric<false>; |