From c9573f35c4267aa55648df4a134ebec82c5af93b Mon Sep 17 00:00:00 2001 From: giuros01 Date: Thu, 20 Jun 2019 10:30:17 +0100 Subject: COMPMID-2407: Add (logistic and tanh) activation support for QSYMM16 for NEON Change-Id: Ib89c9cfe12975e51d1710af736c73ce79e667363 Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/1412 Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/NEON/NESymm.h | 47 ++++++++ .../core/NEON/kernels/NEActivationLayerKernel.h | 10 +- arm_compute/core/QuantizationInfo.h | 4 +- .../runtime/NEON/functions/NEActivationLayer.h | 6 +- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 127 ++++++++++++++++++++- tests/validation/NEON/ActivationLayer.cpp | 29 ++++- tests/validation/fixtures/ActivationLayerFixture.h | 39 +++++-- tests/validation/reference/ActivationLayer.cpp | 11 ++ 8 files changed, 255 insertions(+), 18 deletions(-) diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h index 0479753426..364a317bc7 100644 --- a/arm_compute/core/NEON/NESymm.h +++ b/arm_compute/core/NEON/NESymm.h @@ -102,5 +102,52 @@ inline int16_t finalize_quantization_int16(int32_t in_value, int result_fixedpoi return out_s16; } + +/** Dequantize a neon vector holding 8 16-bit quantized values. + * + * @param[in] qv Input values to be dequantized. + * @param[in] scale Quantization scale + * + * @return Dequantized values in a neon vector + */ +inline float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale) +{ + const float32x4_t vscale = vdupq_n_f32(scale); + const float32x4x2_t vdequantized_input = + { + { + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale), + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale) + } + }; + return vdequantized_input; +} + +/** Quantize a neon vector holding 8 floating point values. + * + * @param[in] qv Input values to be quantized. + * @param[in] scale Quantization scale + * + * @return A neon vector holding the quantized values + */ +inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale) +{ + const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); + + const int32x4x2_t rf = + { + { +#ifdef __aarch64__ + vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), + vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)) +#else //__aarch64__ + vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), + vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)) +#endif //__aarch64__ + } + }; + return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])); +} + } // namespace arm_compute #endif // __ARM_COMPUTE_NESYMM_H__ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 9381beaded..5e87bd76a5 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -58,7 +58,7 @@ public: * @note If the output tensor is a nullptr, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer information. */ @@ -66,7 +66,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] act_info Activation layer information. * @@ -97,6 +97,12 @@ private: */ template typename std::enable_if::value, void>::type activation(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); private: ITensor *_input; diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index dcfdd6ba16..1c49cd29ed 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -34,6 +34,7 @@ namespace arm_compute { using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */ using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */ +using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */ /** Quantization info when assuming per layer quantization */ struct UniformQuantizationInfo @@ -350,6 +351,5 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo) { return dequantize_qsymm16(value, qinfo.uniform()); } - } // namespace arm_compute -#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */ \ No newline at end of file +#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 588de04332..c0b5f7ab37 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,7 @@ public: * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer parameters. */ @@ -52,7 +52,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] act_info Activation layer information. * diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 64342512a0..3953305996 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -29,6 +29,7 @@ #include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" +#include "arm_compute/core/NEON/NESymm.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" @@ -47,9 +48,9 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - static std::set qs8_supported_activations = + static std::set qasymm8_supported_activations = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, @@ -57,15 +58,26 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ActivationLayerInfo::ActivationFunction::LOGISTIC, ActivationLayerInfo::ActivationFunction::TANH }; + static std::set qsymm16_supported_activations = + { + ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH + }; const DataType data_type = input->data_type(); const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info(); const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation(); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qs8_supported_activations.count(f_act) == 0), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0), "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0), + "For QSYMM16 only tanh and logistic are supported"); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128))); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0))); + // Checks performed when output is configured if((output != nullptr) && (output->total_size() != 0)) { @@ -163,11 +175,21 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation }, }; + // Activation functions : QSYMM16 + static std::map act_map_qsymm16 = + { + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, + }; + switch(input->info()->data_type()) { case DataType::QASYMM8: _func = act_map_qasymm8[activation_info.activation()]; break; + case DataType::QSYMM16: + _func = act_map_qsymm16[activation_info.activation()]; + break; case DataType::F32: _func = act_map_f32[activation_info.activation()]; break; @@ -469,6 +491,105 @@ typename std::enable_if::value, void>::type NEActivat input, output); } +template +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + const int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + const ActivationFunction act = F; + + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + wrapper::traits::neon_bitvector_t tmp; + ARM_COMPUTE_UNUSED(tmp); + + // Compute S elements per iteration + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(input_ptr + x); + if(act == ActivationFunction::LOGISTIC) + { + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = + { + { + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))), + } + }; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } + else if(act == ActivationFunction::TANH) + { + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = + { + { + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), + } + }; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + wrapper::vstore(output_ptr + x, tmp); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + T in = *(reinterpret_cast(input_ptr + x)); + T tmp; + if(act == ActivationFunction::LOGISTIC) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = 1.f / (1.f + std::exp(-tmp_f)); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else if(act == ActivationFunction::TANH) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + *(output_ptr + x) = tmp; + } + }, + input, output); +} + Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_UNUSED(act_info); diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp index a5030b94b7..1174a055c7 100644 --- a/tests/validation/NEON/ActivationLayer.cpp +++ b/tests/validation/NEON/ActivationLayer.cpp @@ -104,6 +104,8 @@ constexpr AbsoluteTolerance tolerance_qasymm8(0); constexpr AbsoluteTolerance tolerance_qasymm8(1); #endif // defined(__aarch64__) +constexpr AbsoluteTolerance tolerance_qsymm16(1); + /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { @@ -233,7 +235,6 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ ActivationLayerInfo::ActivationFunction::LOGISTIC, ActivationLayerInfo::ActivationFunction::TANH }); - const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), QuantizedActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); @@ -256,6 +257,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerQuantizedFixture, fra validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() // QASYMM8 + +/** Input data sets. */ +const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH + }); +const auto Int16QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), Int16QuantizedActivationFunctionsDataset), + framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); + +TEST_SUITE(QSYMM16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), Int16QuantizedActivationDataset), + framework::dataset::make("DataType", + DataType::QSYMM16)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 32768.f, 0.f) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qsymm16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), Int16QuantizedActivationDataset), + framework::dataset::make("DataType", + DataType::QSYMM16)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 32768.f, 0.f) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qsymm16); +} +TEST_SUITE_END() // QSYMM16 TEST_SUITE_END() // Quantized TEST_SUITE_END() // ActivationLayer diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h index 464382a1ec..4aaf8e7ce3 100644 --- a/tests/validation/fixtures/ActivationLayerFixture.h +++ b/tests/validation/fixtures/ActivationLayerFixture.h @@ -52,11 +52,11 @@ public: ActivationLayerInfo info(function, alpha_beta, alpha_beta); _in_place = in_place; - _output_quantization_info = calculate_output_quantization_info(info, quantization_info); - _input_quantization_info = in_place ? _output_quantization_info : quantization_info; _data_type = data_type; - _function = function; + _output_quantization_info = calculate_output_quantization_info(_data_type, info, quantization_info); + _input_quantization_info = in_place ? _output_quantization_info : quantization_info; + _function = function; _target = compute_target(shape, info); _reference = compute_reference(shape, info); } @@ -73,7 +73,7 @@ protected: std::uniform_real_distribution<> distribution(min_bound, max_bound); library->fill(tensor, distribution, 0); } - else if(is_data_type_quantized_asymmetric(tensor.data_type())) + else if(is_data_type_quantized_asymmetric(tensor.data_type()) || (is_data_type_quantized_symmetric(tensor.data_type()))) { library->fill_tensor_uniform(tensor, 0); } @@ -141,14 +141,39 @@ protected: } private: - QuantizationInfo calculate_output_quantization_info(const ActivationLayerInfo &act_info, const QuantizationInfo &default_qinfo) + QuantizationInfo calculate_output_quantization_info(DataType dt, const ActivationLayerInfo &act_info, const QuantizationInfo &default_qinfo) { + auto qasymm8_max = float(std::numeric_limits::max()) + 1.f; + auto qsymm16_max = float(std::numeric_limits::max()) + 1.f; + switch(act_info.activation()) { case ActivationLayerInfo::ActivationFunction::TANH: - return QuantizationInfo(1.f / 128.f, 128); + if(dt == DataType::QSYMM16) + { + return QuantizationInfo(1.f / qsymm16_max, 0); + } + else if(dt == DataType::QASYMM8) + { + return QuantizationInfo(1.f / (0.5 * qasymm8_max), int(0.5 * qasymm8_max)); + } + else + { + return default_qinfo; + } case ActivationLayerInfo::ActivationFunction::LOGISTIC: - return QuantizationInfo(1.f / 256.f, 0); + if(dt == DataType::QSYMM16) + { + return QuantizationInfo(1.f / qsymm16_max, 0); + } + else if(dt == DataType::QASYMM8) + { + return QuantizationInfo(1.f / qasymm8_max, 0); + } + else + { + return default_qinfo; + } default: return default_qinfo; } diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp index f5e98aa7e8..f573d12df8 100644 --- a/tests/validation/reference/ActivationLayer.cpp +++ b/tests/validation/reference/ActivationLayer.cpp @@ -65,6 +65,17 @@ SimpleTensor activation_layer(const SimpleTensor &src return dst; } +template <> +SimpleTensor activation_layer(const SimpleTensor &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +{ + const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; + + SimpleTensor src_tmp = convert_from_symmetric(src); + SimpleTensor dst_tmp = activation_layer(src_tmp, info); + SimpleTensor dst = convert_to_symmetric(dst_tmp, dst_qinfo); + return dst; +} + template SimpleTensor activation_layer(const SimpleTensor &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); template SimpleTensor activation_layer(const SimpleTensor &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); } // namespace reference -- cgit v1.2.1