aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/neon/qsymm16.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/activation/generic/neon/qsymm16.cpp')
-rw-r--r--src/cpu/kernels/activation/generic/neon/qsymm16.cpp163
1 files changed, 79 insertions, 84 deletions
diff --git a/src/cpu/kernels/activation/generic/neon/qsymm16.cpp b/src/cpu/kernels/activation/generic/neon/qsymm16.cpp
index 2aea6cba3c..891646ea00 100644
--- a/src/cpu/kernels/activation/generic/neon/qsymm16.cpp
+++ b/src/cpu/kernels/activation/generic/neon/qsymm16.cpp
@@ -21,11 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "src/core/NEON/NEMath.h"
#include "src/core/NEON/NESymm.h"
#include "src/core/NEON/wrapper/wrapper.h"
@@ -38,7 +39,10 @@ namespace arm_compute
{
namespace cpu
{
-void neon_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+void neon_qsymm16_activation(const ITensor *src,
+ ITensor *dst,
+ const ActivationLayerInfo &act_info,
+ const Window &window)
{
constexpr int window_step_x = 8;
const auto window_start_x = static_cast<int>(window.x().start());
@@ -59,103 +63,94 @@ void neon_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationL
const float a_f32 = act_info.a();
const float b_f32 = act_info.b();
- execute_window_loop(win_collapsed, [&](const Coordinates &)
- {
- const auto input_ptr = reinterpret_cast<const qsymm16_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<qsymm16_t *>(output.ptr());
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const qsymm16_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<qsymm16_t *>(output.ptr());
- wrapper::traits::neon_bitvector_t<qsymm16_t, wrapper::traits::BitWidth::W128> tmp;
- ARM_COMPUTE_UNUSED(tmp);
+ wrapper::traits::neon_bitvector_t<qsymm16_t, wrapper::traits::BitWidth::W128> tmp;
+ ARM_COMPUTE_UNUSED(tmp);
- // Compute S elements per iteration
- int x = window_start_x;
- for(; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const auto vin = wrapper::vloadq(input_ptr + x);
- if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
{
- // De-quantize
- const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
- // Perform activation
- const float32x4x2_t tmp_dep =
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
{
- {
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{
wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
- }
- };
- // Re-quantize to new output space
- tmp = vquantize_int16(tmp_dep, qi_out.scale);
- }
- else if(act == ActivationLayerInfo::ActivationFunction::TANH)
- {
- // De-quantize
- const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
- // Perform activation
- const float32x4x2_t tmp_dep =
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
+ else if (act == ActivationLayerInfo::ActivationFunction::TANH)
{
- {
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{
wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
- }
- };
- // Re-quantize to new output space
- tmp = vquantize_int16(tmp_dep, qi_out.scale);
- }
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
- else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
- {
- // De-quantize
- const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
- // Perform activation
- const float32x4x2_t tmp_dep =
+ else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
{
- {
- wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[0])),
- wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[1]))
- }
- };
- // Re-quantize to new output space
- tmp = vquantize_int16(tmp_dep, qi_out.scale);
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported activation function");
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[0])),
+ wrapper::vmin(va_f32, wrapper::vmax(vb_f32, vin_deq.val[1]))}};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
}
- wrapper::vstore(output_ptr + x, tmp);
- }
- // Compute left-over elements
- for(; x < window_end_x; ++x)
- {
- qsymm16_t in = *(reinterpret_cast<const qsymm16_t *>(input_ptr + x));
- qsymm16_t tmp = 0;
- if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
- {
- float tmp_f = dequantize_qsymm16(in, qi_in.scale);
- tmp_f = 1.f / (1.f + std::exp(-tmp_f));
- tmp = quantize_qsymm16(tmp_f, qi_out);
- }
- else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
{
- float tmp_f = dequantize_qsymm16(in, qi_in.scale);
- tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
- tmp = quantize_qsymm16(tmp_f, qi_out);
- }
- else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
- {
- float tmp_f = dequantize_qsymm16(in, qi_in.scale);
- tmp_f = std::min<float>(a_f32, std::max<float>(b_f32, tmp_f));
- tmp = quantize_qsymm16(tmp_f, qi_out);
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported activation function");
+ qsymm16_t in = *(reinterpret_cast<const qsymm16_t *>(input_ptr + x));
+ qsymm16_t tmp = 0;
+ if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = 1.f / (1.f + std::exp(-tmp_f));
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else if (act == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = std::min<float>(a_f32, std::max<float>(b_f32, tmp_f));
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
}
- *(output_ptr + x) = tmp;
- }
- },
- input, output);
+ },
+ input, output);
}
} // namespace cpu
} // namespace arm_compute