diff options
Diffstat (limited to 'src/cpu/kernels/activation/generic/neon/qsymm16.cpp')
-rw-r--r-- | src/cpu/kernels/activation/generic/neon/qsymm16.cpp | 163 |
1 files changed, 79 insertions, 84 deletions
diff --git a/src/cpu/kernels/activation/generic/neon/qsymm16.cpp b/src/cpu/kernels/activation/generic/neon/qsymm16.cpp index 2aea6cba3c..891646ea00 100644 --- a/src/cpu/kernels/activation/generic/neon/qsymm16.cpp +++ b/src/cpu/kernels/activation/generic/neon/qsymm16.cpp @@ -21,11 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/experimental/Types.h" #include "arm_compute/function_info/ActivationLayerInfo.h" + #include "src/core/NEON/NEMath.h" #include "src/core/NEON/NESymm.h" #include "src/core/NEON/wrapper/wrapper.h" @@ -38,7 +39,10 @@ namespace arm_compute { namespace cpu { -void neon_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) +void neon_qsymm16_activation(const ITensor *src, + ITensor *dst, + const ActivationLayerInfo &act_info, + const Window &window) { constexpr int window_step_x = 8; const auto window_start_x = static_cast<int>(window.x().start()); @@ -59,103 +63,94 @@ void neon_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationL const float a_f32 = act_info.a(); const float b_f32 = act_info.b(); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto input_ptr = reinterpret_cast<const qsymm16_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<qsymm16_t *>(output.ptr()); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast<const qsymm16_t *>(input.ptr()); + const auto output_ptr = reinterpret_cast<qsymm16_t *>(output.ptr()); - wrapper::traits::neon_bitvector_t<qsymm16_t, wrapper::traits::BitWidth::W128> tmp; - ARM_COMPUTE_UNUSED(tmp); + wrapper::traits::neon_bitvector_t<qsymm16_t, wrapper::traits::BitWidth::W128> tmp; + ARM_COMPUTE_UNUSED(tmp); - // Compute S elements per iteration - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(input_ptr + x); - if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC) + // Compute S elements per iteration + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) { - // De-quantize - const auto vin_deq = vdequantize_int16(vin, qi_in.scale); - // Perform activation - const float32x4x2_t tmp_dep = + const auto vin = wrapper::vloadq(input_ptr + x); + if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC) { - { + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = {{ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))), wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))), - } - }; - // Re-quantize to new output space - tmp = vquantize_int16(tmp_dep, qi_out.scale); - } - else if(act == ActivationLayerInfo::ActivationFunction::TANH) - { - // De-quantize - const auto vin_deq = vdequantize_int16(vin, qi_in.scale); - // Perform activation - const float32x4x2_t tmp_dep = + }}; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } + else if (act == ActivationLayerInfo::ActivationFunction::TANH) { - { + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = {{ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), - } - }; - // Re-quantize to new output space - tmp = vquantize_int16(tmp_dep, qi_out.scale); - } + }}; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } - else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - { - // De-quantize - const auto vin_deq = vdequantize_int16(vin, qi_in.scale); - // Perform activation - const float32x4x2_t tmp_dep = + else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) { - { - wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[0])), - wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[1])) - } - }; - // Re-quantize to new output space - tmp = vquantize_int16(tmp_dep, qi_out.scale); - } - else - { - ARM_COMPUTE_ERROR("Unsupported activation function"); + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = {{wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[0])), + wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[1]))}}; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + wrapper::vstore(output_ptr + x, tmp); } - wrapper::vstore(output_ptr + x, tmp); - } - // Compute left-over elements - for(; x < window_end_x; ++x) - { - qsymm16_t in = *(reinterpret_cast<const qsymm16_t *>(input_ptr + x)); - qsymm16_t tmp = 0; - if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC) - { - float tmp_f = dequantize_qsymm16(in, qi_in.scale); - tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = quantize_qsymm16(tmp_f, qi_out); - } - else if(act == ActivationLayerInfo::ActivationFunction::TANH) + // Compute left-over elements + for (; x < window_end_x; ++x) { - float tmp_f = dequantize_qsymm16(in, qi_in.scale); - tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); - tmp = quantize_qsymm16(tmp_f, qi_out); - } - else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - { - float tmp_f = dequantize_qsymm16(in, qi_in.scale); - tmp_f = std::min<float>(a_f32, std::max<float>(b_f32, tmp_f)); - tmp = quantize_qsymm16(tmp_f, qi_out); - } - else - { - ARM_COMPUTE_ERROR("Unsupported activation function"); + qsymm16_t in = *(reinterpret_cast<const qsymm16_t *>(input_ptr + x)); + qsymm16_t tmp = 0; + if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = 1.f / (1.f + std::exp(-tmp_f)); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else if (act == ActivationLayerInfo::ActivationFunction::TANH) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = std::min<float>(a_f32, std::max<float>(b_f32, tmp_f)); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + *(output_ptr + x) = tmp; } - *(output_ptr + x) = tmp; - } - }, - input, output); + }, + input, output); } } // namespace cpu } // namespace arm_compute |