diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/NEKernels.h | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/NESoftmaxLayerKernel.h | 141 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuSoftmaxKernel.cpp (renamed from src/core/NEON/kernels/NESoftmaxLayerKernel.cpp) | 184 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuSoftmaxKernel.h | 107 | ||||
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/NEON/list.h (renamed from src/core/NEON/kernels/softmax/impl/NEON/list.h) | 4 | ||||
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/SVE/list.h (renamed from src/core/NEON/kernels/softmax/impl/SVE/list.h) | 0 |
6 files changed, 207 insertions, 230 deletions
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h index c636e5b3be..66309f9296 100644 --- a/src/core/NEON/NEKernels.h +++ b/src/core/NEON/NEKernels.h @@ -117,7 +117,6 @@ #include "src/core/NEON/kernels/NESobel3x3Kernel.h" #include "src/core/NEON/kernels/NESobel5x5Kernel.h" #include "src/core/NEON/kernels/NESobel7x7Kernel.h" -#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" #include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" #include "src/core/NEON/kernels/NEStackLayerKernel.h" diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.h b/src/core/NEON/kernels/NESoftmaxLayerKernel.h deleted file mode 100644 index 70e2417fc2..0000000000 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H -#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H - -#include "src/core/NEON/INEKernel.h" -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the identifying the max value of 1D Logits */ -class NELogits1DMaxKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NELogits1DMaxKernel"; - } - /** Default constructor */ - NELogits1DMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DMaxKernel(const NELogits1DMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DMaxKernel &operator=(const NELogits1DMaxKernel &) = delete; - /** Allow instances of this class to be moved */ - NELogits1DMaxKernel(NELogits1DMaxKernel &&) = default; - /** Allow instances of this class to be moved */ - NELogits1DMaxKernel &operator=(NELogits1DMaxKernel &&) = default; - /** Default destructor */ - ~NELogits1DMaxKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; -}; - -/** Interface for softmax computation for QASYMM8 with pre-computed max. */ -template <bool IS_LOG = false> -class NELogits1DSoftmaxKernel : public INEKernel -{ -public: - const char *name() const override - { - if(IS_LOG) - { - return "NELogits1DSoftmaxKernel"; - } - else - { - return "NELogits1DLogSoftmaxKernel"; - } - } - /** Default constructor */ - NELogits1DSoftmaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete; - /** Allow instances of this class to be moved */ - NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default; - /** Allow instances of this class to be moved */ - NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default; - /** Default destructor */ - ~NELogits1DSoftmaxKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1. - * Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] beta A scaling factor for the exponent. - * - * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input. - */ - void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. - * Data types supported: same as @p input. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] beta A scaling factor for the exponent. - * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, - const ITensorInfo *output, const float beta, const ITensorInfo *tmp); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_max; - ITensor *_output; - float _beta; - ITensor *_tmp; //Temporary. Used internally -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */ diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp index fe09f1ec59..a8542b6be1 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/cpu/kernels/CpuSoftmaxKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -33,12 +33,16 @@ #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" -#include "src/core/NEON/kernels/softmax/impl/NEON/list.h" -#include "src/core/NEON/kernels/softmax/impl/SVE/list.h" #include "src/core/common/Registrars.h" +#include "src/core/cpu/kernels/softmax/impl/NEON/list.h" +#include "src/core/cpu/kernels/softmax/impl/SVE/list.h" namespace arm_compute { +namespace cpu +{ +namespace kernels +{ namespace { struct SoftmaxSelectorData @@ -208,98 +212,90 @@ Status validate_arguments_logits_1d_max(const ITensorInfo &input, const ITensorI } // namespace -NELogits1DMaxKernel::NELogits1DMaxKernel() - : _border_size() +CpuLogits1DMaxKernel::CpuLogits1DMaxKernel() { } -BorderSize NELogits1DMaxKernel::border_size() const +void CpuLogits1DMaxKernel::configure(const ITensorInfo *src, ITensorInfo *dst) { - return _border_size; -} + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); -void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_NULLPTR(input->info(), output->info()); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_max(*input->info(), *output->info())); - // Configure kernel window + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_max(*src, *dst)); // Softmax across the x dimension - const TensorShape output_shape = TensorShape(input->info()->tensor_shape()).set(0, 1); + const TensorShape output_shape = TensorShape(src->tensor_shape()).set(0, 1); // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + auto_init_if_empty(*dst, output_shape, 1, src->data_type(), src->quantization_info()); - Window win = calculate_max_window(*input->info(), Steps()); + Window win = calculate_max_window(*src, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - _input = input; - _output = output; - - const int input_width = input->info()->valid_region().shape.x(); - const int num_elems_processed_per_iteration = 16U / data_size_from_type(input->info()->data_type()); - const int num_elems_read_per_iteration = ceil_to_multiple(input_width, num_elems_processed_per_iteration); + coord.set_num_dimensions(dst->num_dimensions()); + dst->set_valid_region(ValidRegion(coord, dst->tensor_shape())); - _border_size = BorderSize(0, num_elems_read_per_iteration - input_width, 0, 0); - - INEKernel::configure(win); + ICpuKernel::configure(win); } -Status NELogits1DMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output) +Status CpuLogits1DMaxKernel::validate(const ITensorInfo *src, const ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_max(*input, *output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_max(*src, *dst)); return Status{}; } -void NELogits1DMaxKernel::run(const Window &window, const ThreadInfo &info) +void CpuLogits1DMaxKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); + auto dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ _input->info()->data_type() }); - uk->ukernel(_input, _output, window); + const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type() }); + uk->ukernel(src, dst, window); +} + +const char *CpuLogits1DMaxKernel::name() const +{ + return "CpuLogits1DMaxKernel"; } namespace { -Status validate_arguments_logits_softmax(const ITensorInfo &input, const ITensorInfo &max, - const ITensorInfo &output, const float beta, const ITensorInfo &tmp, bool is_log) +Status validate_arguments_logits_softmax(const ITensorInfo &src, const ITensorInfo &max, + const ITensorInfo &dst, const float beta, const ITensorInfo &tmp, bool is_log) { ARM_COMPUTE_UNUSED(beta); // Check input - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(input.data_type()); + const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(src.data_type()); // Check max - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &max); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(TensorShape(input.tensor_shape()).set(0, 1), max.tensor_shape()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&input, &max); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &max); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(TensorShape(src.tensor_shape()).set(0, 1), max.tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(&src, &max); // Check output if configured - if(output.total_size() != 0) + if(dst.total_size() != 0) { - const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(input.data_type(), is_log) : output.quantization_info(); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&input, &output); - ARM_COMPUTE_RETURN_ERROR_ON(output.quantization_info() != output_quantization); + const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(src.data_type(), is_log) : dst.quantization_info(); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&src, &dst); + ARM_COMPUTE_RETURN_ERROR_ON(dst.quantization_info() != output_quantization); } // Check tmp if configured if(tmp.total_size() != 0) { - const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : input.data_type(); + const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : src.data_type(); ARM_COMPUTE_RETURN_ERROR_ON(tmp.data_type() != tmp_data_type); // We could potentially reduce tmp memory if we could predict or make an assumption // on the maximum number of threads that will run in parallel. - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&input, &tmp); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&src, &tmp); } return Status{}; @@ -307,74 +303,90 @@ Status validate_arguments_logits_softmax(const ITensorInfo &input, const ITensor } // namespace template <bool IS_LOG> -NELogits1DSoftmaxKernel<IS_LOG>::NELogits1DSoftmaxKernel() - : _input(nullptr), _max(nullptr), _output(nullptr), _beta(1.0f), _tmp(nullptr) +CpuLogits1DSoftmaxKernel<IS_LOG>::CpuLogits1DSoftmaxKernel() + : _beta(1.0f) { } template <bool IS_LOG> -void NELogits1DSoftmaxKernel<IS_LOG>::configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp) +void CpuLogits1DSoftmaxKernel<IS_LOG>::configure(const ITensorInfo *src, const ITensorInfo *max, ITensorInfo *dst, const float beta, ITensorInfo *tmp) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, output, tmp); - ARM_COMPUTE_ERROR_ON_NULLPTR(input->info(), max->info(), output->info(), tmp->info()); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, max, dst, tmp); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, max, dst, tmp); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_softmax(*input->info(), *max->info(), *output->info(), beta, *tmp->info(), IS_LOG)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_softmax(*src, *max, *dst, beta, *tmp, IS_LOG)); + + _beta = beta; // Configure kernel window - const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(input->info()->data_type()); + const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type()); // Output auto initialization if not yet initialized - const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(input->info()->data_type(), IS_LOG) : output->info()->quantization_info(); - auto_init_if_empty(*output->info(), TensorInfo(*input->info()).set_quantization_info(output_quantization).reset_padding()); + const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(src->data_type(), IS_LOG) : dst->quantization_info(); + auto_init_if_empty(*dst, TensorInfo(*src).set_quantization_info(output_quantization).reset_padding()); // Tmp auto initialization if not yet initialized - const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : input->info()->data_type(); - auto_init_if_empty(*tmp->info(), TensorInfo(*input->info()).set_data_type(tmp_data_type).reset_padding()); + const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : src->data_type(); + auto_init_if_empty(*tmp, TensorInfo(*src).set_data_type(tmp_data_type).reset_padding()); // Configure kernel window - Window win = calculate_max_window(*max->info(), Steps()); + Window win = calculate_max_window(*max, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - _input = input; - _max = max; - _output = output; - _beta = beta; - _tmp = tmp; + coord.set_num_dimensions(dst->num_dimensions()); + dst->set_valid_region(ValidRegion(coord, dst->tensor_shape())); - INEKernel::configure(win); + ICpuKernel::configure(win); } template <bool IS_LOG> -Status NELogits1DSoftmaxKernel<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *max, - const ITensorInfo *output, const float beta, const ITensorInfo *tmp) +Status CpuLogits1DSoftmaxKernel<IS_LOG>::validate(const ITensorInfo *src, const ITensorInfo *max, + const ITensorInfo *dst, const float beta, const ITensorInfo *tmp) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, output, tmp); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_softmax(*input, *max, *output, beta, *tmp, IS_LOG)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, max, dst, tmp); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_softmax(*src, *max, *dst, beta, *tmp, IS_LOG)); return Status{}; } template <bool IS_LOG> -void NELogits1DSoftmaxKernel<IS_LOG>::run(const Window &window, const ThreadInfo &info) +void CpuLogits1DSoftmaxKernel<IS_LOG>::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); - const unsigned int num_elems_processed_per_iteration = _input->info()->valid_region().shape.x(); - const unsigned int tmp_size_for_thread = _tmp->info()->element_size() * num_elems_processed_per_iteration; + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0); + auto max = tensors.get_tensor(TensorType::ACL_SRC_1); + auto dst = tensors.get_tensor(TensorType::ACL_DST_0); + auto tmp = tensors.get_tensor(TensorType::ACL_DST_1); - ARM_COMPUTE_ERROR_ON(_tmp->info()->total_size() < (info.num_threads * tmp_size_for_thread)); + const unsigned int num_elems_processed_per_iteration = src->info()->valid_region().shape.x(); + const unsigned int tmp_size_for_thread = tmp->info()->element_size() * num_elems_processed_per_iteration; - void *tmp_for_thread = _tmp->buffer() + (info.thread_id * tmp_size_for_thread); + ARM_COMPUTE_ERROR_ON(tmp->info()->total_size() < (info.num_threads * tmp_size_for_thread)); + + void *tmp_for_thread = tmp->buffer() + (info.thread_id * tmp_size_for_thread); + + const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type() }); + uk->ukernel(src, max, tmp_for_thread, dst, _beta, IS_LOG, window); +} - const auto *uk = get_implementation_logits(SoftmaxSelectorData{ _input->info()->data_type() }); - uk->ukernel(_input, _max, tmp_for_thread, _output, _beta, IS_LOG, window); +template <bool IS_LOG> +const char *CpuLogits1DSoftmaxKernel<IS_LOG>::name() const +{ + if(IS_LOG) + { + return "CpuLogits1DSoftmaxKernel"; + } + else + { + return "CpuLogits1DLogSoftmaxKernel"; + } } -template class NELogits1DSoftmaxKernel<true>; -template class NELogits1DSoftmaxKernel<false>; +template class CpuLogits1DSoftmaxKernel<true>; +template class CpuLogits1DSoftmaxKernel<false>; +} // namespace kernels +} // namespace cpu } // namespace arm_compute diff --git a/src/core/cpu/kernels/CpuSoftmaxKernel.h b/src/core/cpu/kernels/CpuSoftmaxKernel.h new file mode 100644 index 0000000000..aa10467965 --- /dev/null +++ b/src/core/cpu/kernels/CpuSoftmaxKernel.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_SOFTMAXKERNEL_H +#define ARM_COMPUTE_CPU_SOFTMAXKERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/cpu/ICpuKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +/** Interface for the identifying the max value of 1D Logits */ +class CpuLogits1DMaxKernel : public ICpuKernel +{ +public: + /** Constructor */ + CpuLogits1DMaxKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DMaxKernel); + /** Set the input and output tensors. + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] dst Destination tensor info. Data types supported: same as @p input + */ + void configure(const ITensorInfo *src, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref CpuLogits1DMaxKernel + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] dst Destination tensor info. Data types supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; +}; + +/** Interface for softmax computation for QASYMM8 with pre-computed max. */ +template <bool IS_LOG = false> +class CpuLogits1DSoftmaxKernel : public ICpuKernel +{ +public: + /** Default constructor */ + CpuLogits1DSoftmaxKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DSoftmaxKernel); + + /** Set the input and output tensors. + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[out] dst Destination tensor info. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * + * @param tmp Auxiliary tensor info. Must be type F32 and same shape as the input. + */ + void configure(const ITensorInfo *src, const ITensorInfo *max, ITensorInfo *dst, const float beta, ITensorInfo *tmp); + /** Static function to check if given info will lead to a valid configuration of @ref CpuLogits1DSoftmaxKernel + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. + * Data types supported: same as @p input. + * @param[in] dst Destination tensor info. Data types supported: same as @p input. + * @param[in] beta A scaling factor for the exponent. + * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *max, + const ITensorInfo *dst, const float beta, const ITensorInfo *tmp); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; + +private: + float _beta; +}; +} // namespace kernels +} // namespace cpu +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPU_SOFTMAXKERNEL_H */ diff --git a/src/core/NEON/kernels/softmax/impl/NEON/list.h b/src/core/cpu/kernels/softmax/impl/NEON/list.h index a8f781f439..1aa7e8fac7 100644 --- a/src/core/NEON/kernels/softmax/impl/NEON/list.h +++ b/src/core/cpu/kernels/softmax/impl/NEON/list.h @@ -24,10 +24,10 @@ #ifndef SRC_CORE_NEON_KERNELS_SOFTMAX_LIST_H #define SRC_CORE_NEON_KERNELS_SOFTMAX_LIST_H -#include "src/core/NEON/wrapper/wrapper.h" -#include "support/SaturateCast.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" +#include "src/core/NEON/wrapper/wrapper.h" +#include "support/SaturateCast.h" namespace arm_compute { diff --git a/src/core/NEON/kernels/softmax/impl/SVE/list.h b/src/core/cpu/kernels/softmax/impl/SVE/list.h index 0936bd5a56..0936bd5a56 100644 --- a/src/core/NEON/kernels/softmax/impl/SVE/list.h +++ b/src/core/cpu/kernels/softmax/impl/SVE/list.h |