From 8d4d1b85bc57d5f76f3939bb422e44df68dc2342 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 28 Nov 2019 11:31:23 +0000 Subject: COMPMID-2796: Add support for QASYMM8_SIGNED in NEActivationLayer and NEPReluLayer Change-Id: I089fd19a6beab7779d690bc9ace327f661c2753d Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/2407 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 175 +++++++++++++++++++++- 1 file changed, 172 insertions(+), 3 deletions(-) (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index c338ef09c7..44f76f6e22 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -48,7 +48,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info) { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); static std::set qasymm8_supported_activations = { @@ -72,8 +72,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0), "For QSYMM16 only tanh and logistic are supported"); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128))); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) + && (oq_info != QuantizationInfo(1.f / 128.f, 128))); + ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) + && (oq_info != QuantizationInfo(1.f / 256.f, 0))); + + ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128))); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0))); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0))); @@ -173,6 +178,17 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat }; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ + // Activation functions : QASYMM8_SIGNED + static std::map act_map_qasymm8_signed = + { + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation }, + { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, + { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation }, + }; + // Activation functions : QASYMM8 static std::map act_map_qasymm8 = { @@ -193,6 +209,9 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat switch(input->info()->data_type()) { + case DataType::QASYMM8_SIGNED: + _func = act_map_qasymm8_signed[activation_info.activation()]; + break; case DataType::QASYMM8: _func = act_map_qasymm8[activation_info.activation()]; break; @@ -507,6 +526,156 @@ typename std::enable_if::value, void>::type NEActivat input, output); } +template +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + const int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + const ActivationFunction act = F; + + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in)); + const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in)); + const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in); + const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in); + const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in); + const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + + // Initialise scale/offset for re-quantization + float s = qi_in.scale / qi_out.scale; + float o = -qi_in.offset * s + qi_out.offset; + float32x4_t vs = vdupq_n_f32(s); + float32x4_t vo = vdupq_n_f32(o); + + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + wrapper::traits::neon_bitvector_t tmp; + + // Compute S elements per iteration + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(input_ptr + x); + if(act == ActivationFunction::RELU) + { + // Perform activation + tmp = vmaxq_s8(vconst_0, vin); + // Re-quantize to new output space + tmp = vmlaq_qasymm8_signed(tmp, vs, vo); + } + else if(act == ActivationFunction::BOUNDED_RELU) + { + // Perform activation + tmp = vminq_s8(va, vmaxq_s8(vconst_0, vin)); + // Re-quantize to new output space + tmp = vmlaq_qasymm8_signed(tmp, vs, vo); + } + else if(act == ActivationFunction::LU_BOUNDED_RELU) + { + // Perform activation + tmp = vminq_s8(va, vmaxq_s8(vb, vin)); + // Re-quantize to new output space + tmp = vmlaq_qasymm8_signed(tmp, vs, vo); + } + else if(act == ActivationFunction::LOGISTIC) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))), + } + }; + // Re-quantize to new output space + tmp = vquantize_signed(tmp_dep, qi_out); + } + else if(act == ActivationFunction::TANH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = + { + { + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))), + } + }; + // Re-quantize to new output space + tmp = vquantize_signed(tmp_dep, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + wrapper::vstore(output_ptr + x, tmp); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + T in = *(reinterpret_cast(input_ptr + x)); + T tmp; + if(act == ActivationFunction::RELU) + { + tmp = std::max(const_0, in); + tmp = std::max(0, std::min(tmp * s + o, 255)); + } + else if(act == ActivationFunction::BOUNDED_RELU) + { + tmp = std::min(a, std::max(const_0, in)); + tmp = std::max(0, std::min(tmp * s + o, 255)); + } + else if(act == ActivationFunction::LU_BOUNDED_RELU) + { + tmp = std::min(a, std::max(b, in)); + tmp = std::max(0, std::min(tmp * s + o, 255)); + } + else if(act == ActivationFunction::LOGISTIC) + { + float tmp_f = dequantize_qasymm8_signed(in, qi_in); + tmp_f = 1.f / (1.f + std::exp(-tmp_f)); + tmp = quantize_qasymm8_signed(tmp_f, qi_out); + } + else if(act == ActivationFunction::TANH) + { + float tmp_f = dequantize_qasymm8_signed(in, qi_in); + tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); + tmp = quantize_qasymm8_signed(tmp_f, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + *(output_ptr + x) = tmp; + } + }, + input, output); +} + template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) { -- cgit v1.2.1