diff options
Diffstat (limited to 'src/cpu/kernels/CpuElementwiseUnaryKernel.cpp')
-rw-r--r-- | src/cpu/kernels/CpuElementwiseUnaryKernel.cpp | 108 |
1 files changed, 103 insertions, 5 deletions
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp index 335de78aca..0adf28af63 100644 --- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "src/core/CPP/Validate.h" #include "src/core/common/Registrars.h" @@ -42,6 +43,72 @@ namespace kernels { namespace { +#ifdef __aarch64__ + +std::unique_ptr<uint8_t[]> q8_prepare_lut(ElementWiseUnary op, const ITensorInfo *src, const ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON(src->data_type() != dst->data_type()); + ARM_COMPUTE_ERROR_ON(!is_data_type_quantized(src->data_type())); + ARM_COMPUTE_ERROR_ON(src->element_size() != 1); + + auto lut = std::unique_ptr<uint8_t[]>(new uint8_t[256]); + const auto is_signed = src->data_type() == DataType::QASYMM8_SIGNED; + const auto src_qi = src->quantization_info().uniform(); + const auto dst_qi = dst->quantization_info().uniform(); + + const auto dst_min_fp = (((is_signed) ? -128 : 0) - dst_qi.offset) * dst_qi.scale; + const auto dst_max_fp = (((is_signed) ? 127 : 255) - dst_qi.offset) * dst_qi.scale; + + for(int i = 0; i < 256; ++i) + { + const auto in = (is_signed) ? dequantize_qasymm8_signed(static_cast<int8_t>(i), src_qi) : dequantize_qasymm8(i, src_qi); + float result = 0; + + switch(op) + { + case ElementWiseUnary::RSQRT: + result = 1 / sqrt(in); + break; + + case ElementWiseUnary::EXP: + result = std::exp(in); + break; + + case ElementWiseUnary::NEG: + result = -in; + break; + + case ElementWiseUnary::LOG: + result = std::log(in); + break; + + case ElementWiseUnary::ABS: + result = std::abs(in); + break; + + case ElementWiseUnary::ROUND: + result = support::cpp11::nearbyint(in); + break; + + case ElementWiseUnary::SIN: + result = std::sin(in); + break; + + default: + ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); + } + + result = utility::clamp(result, dst_min_fp, dst_max_fp); + + const auto out = (is_signed) ? static_cast<uint8_t>(quantize_qasymm8_signed(result, dst_qi)) : quantize_qasymm8(result, dst_qi); + lut[i] = out; + } + + return lut; +} + +#endif // __aarch64__ + static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> available_kernels = { { @@ -50,7 +117,8 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai { return (data.dt == DataType::F32 && data.isa.sve); }, - REGISTER_FP32_SVE(sve_fp32_elementwise_unary) + REGISTER_FP32_SVE(sve_fp32_elementwise_unary), + nullptr, }, { "sve_fp16_elementwise_unary", @@ -59,6 +127,7 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai return (data.dt == DataType::F16 && data.isa.sve && data.isa.fp16); }, REGISTER_FP16_SVE(sve_fp16_elementwise_unary), + nullptr, }, { "sve_s32_elementwise_unary", @@ -67,6 +136,7 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai return (data.dt == DataType::S32 && data.isa.sve); }, REGISTER_INTEGER_SVE(sve_s32_elementwise_unary), + nullptr, }, { "neon_fp32_elementwise_unary", @@ -75,6 +145,7 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai return data.dt == DataType::F32; }, REGISTER_FP32_NEON(neon_fp32_elementwise_unary), + nullptr, }, { "neon_fp16_elementwise_unary", @@ -83,6 +154,7 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai return data.dt == DataType::F16 && data.isa.fp16; }, REGISTER_FP16_NEON(neon_fp16_elementwise_unary), + nullptr, }, { "neon_s32_elementwise_unary", @@ -91,7 +163,28 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai return data.dt == DataType::S32; }, REGISTER_INTEGER_NEON(neon_s32_elementwise_unary), + nullptr, }, +#ifdef __aarch64__ + { + "sve_q8_elementwise_unary", + [](const DataTypeISASelectorData & data) + { + return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve; + }, + REGISTER_QASYMM8_SVE(sve_q8_elementwise_unary), + &q8_prepare_lut, + }, + { + "neon_q8_elementwise_unary", + [](const DataTypeISASelectorData & data) + { + return data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED; + }, + REGISTER_QASYMM8_NEON(neon_q8_elementwise_unary), + &q8_prepare_lut, + }, +#endif // __aarch64__ }; } // namespace @@ -112,6 +205,11 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo return; } + if(uk->prepare_func != nullptr) + { + _lut = uk->prepare_func(op, &src, &dst); + } + auto shape_and_window = compute_output_shape_and_window(src.tensor_shape()); auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type()); ICpuKernel::configure(shape_and_window.second); @@ -132,11 +230,11 @@ Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInf case ElementWiseUnary::LOG: case ElementWiseUnary::ROUND: case ElementWiseUnary::SIN: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED); break; case ElementWiseUnary::NEG: case ElementWiseUnary::ABS: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED); break; default: ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported"); @@ -157,7 +255,7 @@ void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &windo auto src = tensors.get_const_tensor(TensorType::ACL_SRC); auto dst = tensors.get_tensor(TensorType::ACL_DST); - _run_method(src, dst, window, _op); + _run_method(src, dst, window, _op, _lut.get()); } const char *CpuElementwiseUnaryKernel::name() const |