From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- .../kernels/gemm_matrix_add/generic/neon/fp16.cpp | 44 +++++++++++----------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp') diff --git a/src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp b/src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp index 505a37174e..4d7507a5da 100644 --- a/src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp +++ b/src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp @@ -48,30 +48,32 @@ void matrix_addition_f16(const ITensor *src, ITensor *dst, const Window &window, Iterator in(src, win); Iterator out(dst, win); - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); - - int x = window_start_x; - for(; x < (window_end_x - window_step_x); x += window_step_x) + execute_window_loop( + win, + [&](const Coordinates &) { - float16x8x2_t alpha_ab = vld2q_f16(out_ptr + x); - const float16x8x2_t c = vld2q_f16(in_ptr + x); - // Multiply matrix C by its weight and accumulate - alpha_ab.val[0] = vaddq_f16(alpha_ab.val[0], vmulq_f16(c.val[0], beta_f16)); - alpha_ab.val[1] = vaddq_f16(alpha_ab.val[1], vmulq_f16(c.val[1], beta_f16)); + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); - vst2q_f16(out_ptr + x, alpha_ab); - } + int x = window_start_x; + for (; x < (window_end_x - window_step_x); x += window_step_x) + { + float16x8x2_t alpha_ab = vld2q_f16(out_ptr + x); + const float16x8x2_t c = vld2q_f16(in_ptr + x); + // Multiply matrix C by its weight and accumulate + alpha_ab.val[0] = vaddq_f16(alpha_ab.val[0], vmulq_f16(c.val[0], beta_f16)); + alpha_ab.val[1] = vaddq_f16(alpha_ab.val[1], vmulq_f16(c.val[1], beta_f16)); - // Left-over loop - for(; x < window_end_x; ++x) - { - *(out_ptr + x) += *(in_ptr + x) * static_cast(beta); - } - }, - in, out); + vst2q_f16(out_ptr + x, alpha_ab); + } + + // Left-over loop + for (; x < window_end_x; ++x) + { + *(out_ptr + x) += *(in_ptr + x) * static_cast(beta); + } + }, + in, out); } } // namespace void neon_fp16_gemm_matrix_add(const ITensor *src, ITensor *dst, const Window &window, float beta) -- cgit v1.2.1