diff options
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve2/qsymm16.cpp')
-rw-r--r-- | src/cpu/kernels/activation/generic/sve2/qsymm16.cpp | 121 |
1 files changed, 66 insertions, 55 deletions
diff --git a/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp b/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp index 5154fac8a7..f955893307 100644 --- a/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp +++ b/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp @@ -21,24 +21,27 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/experimental/Types.h" #include "arm_compute/function_info/ActivationLayerInfo.h" -#include <cmath> -#include <cstddef> - #include "src/core/NEON/SVEMath.h" #include "src/core/NEON/SVESymm.h" + #include <arm_sve.h> +#include <cmath> +#include <cstddef> namespace arm_compute { namespace cpu { -void sve2_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) +void sve2_qsymm16_activation(const ITensor *src, + ITensor *dst, + const ActivationLayerInfo &act_info, + const Window &window) { const auto window_start_x = static_cast<int>(window.x().start()); const auto window_end_x = static_cast<int>(window.x().end()); @@ -56,62 +59,70 @@ void sve2_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationL const auto va_f32 = svdup_n_f32(act_info.a()); const auto vb_f32 = svdup_n_f32(act_info.b()); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr()); + const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); - svint16_t tmp; + svint16_t tmp; - int x = window_start_x; - svbool_t pg = svwhilelt_b16(x, window_end_x); - do - { - const auto vin = svld1_s16(pg, input_ptr + x); - if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC) - { - // De-quantize - auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); - // Perform activation - const svfloat32x2_t tmp_dep = svcreate2_f32(svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))), - svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1)))))); - // Re-quantize to new output space - tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); - } - else if(act == ActivationLayerInfo::ActivationFunction::TANH) - { - // De-quantize - auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); - // Perform activation - const svfloat32x2_t tmp_dep = svcreate2_f32(svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))), - svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32)))); - // Re-quantize to new output space - tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); - } - else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - { - // De-quantize - auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); - // Perform activation - const svfloat32x2_t tmp_dep = svcreate2_f32(svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 0))), - svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 1)))); - // Re-quantize to new output space - tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); - } - else + int x = window_start_x; + svbool_t pg = svwhilelt_b16(x, window_end_x); + do { - ARM_COMPUTE_ERROR("Unsupported activation function"); - } + const auto vin = svld1_s16(pg, input_ptr + x); + if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + // De-quantize + auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); + // Perform activation + const svfloat32x2_t tmp_dep = svcreate2_f32( + svdiv_f32_z( + pg, vconst_1, + svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))), + svdiv_f32_z( + pg, vconst_1, + svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1)))))); + // Re-quantize to new output space + tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); + } + else if (act == ActivationLayerInfo::ActivationFunction::TANH) + { + // De-quantize + auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); + // Perform activation + const svfloat32x2_t tmp_dep = svcreate2_f32( + svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))), + svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32)))); + // Re-quantize to new output space + tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); + } + else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + { + // De-quantize + auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); + // Perform activation + const svfloat32x2_t tmp_dep = + svcreate2_f32(svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 0))), + svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 1)))); + // Re-quantize to new output space + tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } - svst1_s16(pg, output_ptr + x, tmp); + svst1_s16(pg, output_ptr + x, tmp); - x += svcnth(); - pg = svwhilelt_b16(x, window_end_x); + x += svcnth(); + pg = svwhilelt_b16(x, window_end_x); - } - while(svptest_any(svptrue_b16(), pg)); - }, - input, output); + } while (svptest_any(svptrue_b16(), pg)); + }, + input, output); } } // namespace cpu } // namespace arm_compute |