diff options
author | Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-27 17:46:17 +0100 |
---|---|---|
committer | felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-28 12:08:05 +0000 |
commit | afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch) | |
tree | 03bc7d5a762099989b16a656fa8d397b490ed70e /src/cpu/kernels/activation/generic/sve2/qsymm16.cpp | |
parent | bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff) | |
download | ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz |
Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve2/qsymm16.cpp')
-rw-r--r-- | src/cpu/kernels/activation/generic/sve2/qsymm16.cpp | 121 |
1 files changed, 66 insertions, 55 deletions
diff --git a/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp b/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp index 5154fac8a7..f955893307 100644 --- a/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp +++ b/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp @@ -21,24 +21,27 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/experimental/Types.h" #include "arm_compute/function_info/ActivationLayerInfo.h" -#include <cmath> -#include <cstddef> - #include "src/core/NEON/SVEMath.h" #include "src/core/NEON/SVESymm.h" + #include <arm_sve.h> +#include <cmath> +#include <cstddef> namespace arm_compute { namespace cpu { -void sve2_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) +void sve2_qsymm16_activation(const ITensor *src, + ITensor *dst, + const ActivationLayerInfo &act_info, + const Window &window) { const auto window_start_x = static_cast<int>(window.x().start()); const auto window_end_x = static_cast<int>(window.x().end()); @@ -56,62 +59,70 @@ void sve2_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationL const auto va_f32 = svdup_n_f32(act_info.a()); const auto vb_f32 = svdup_n_f32(act_info.b()); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr()); + const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); - svint16_t tmp; + svint16_t tmp; - int x = window_start_x; - svbool_t pg = svwhilelt_b16(x, window_end_x); - do - { - const auto vin = svld1_s16(pg, input_ptr + x); - if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC) - { - // De-quantize - auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); - // Perform activation - const svfloat32x2_t tmp_dep = svcreate2_f32(svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))), - svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1)))))); - // Re-quantize to new output space - tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); - } - else if(act == ActivationLayerInfo::ActivationFunction::TANH) - { - // De-quantize - auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); - // Perform activation - const svfloat32x2_t tmp_dep = svcreate2_f32(svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))), - svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32)))); - // Re-quantize to new output space - tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); - } - else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - { - // De-quantize - auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); - // Perform activation - const svfloat32x2_t tmp_dep = svcreate2_f32(svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 0))), - svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 1)))); - // Re-quantize to new output space - tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); - } - else + int x = window_start_x; + svbool_t pg = svwhilelt_b16(x, window_end_x); + do { - ARM_COMPUTE_ERROR("Unsupported activation function"); - } + const auto vin = svld1_s16(pg, input_ptr + x); + if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + // De-quantize + auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); + // Perform activation + const svfloat32x2_t tmp_dep = svcreate2_f32( + svdiv_f32_z( + pg, vconst_1, + svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))), + svdiv_f32_z( + pg, vconst_1, + svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1)))))); + // Re-quantize to new output space + tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); + } + else if (act == ActivationLayerInfo::ActivationFunction::TANH) + { + // De-quantize + auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); + // Perform activation + const svfloat32x2_t tmp_dep = svcreate2_f32( + svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))), + svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32)))); + // Re-quantize to new output space + tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); + } + else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + { + // De-quantize + auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale); + // Perform activation + const svfloat32x2_t tmp_dep = + svcreate2_f32(svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 0))), + svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 1)))); + // Re-quantize to new output space + tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } - svst1_s16(pg, output_ptr + x, tmp); + svst1_s16(pg, output_ptr + x, tmp); - x += svcnth(); - pg = svwhilelt_b16(x, window_end_x); + x += svcnth(); + pg = svwhilelt_b16(x, window_end_x); - } - while(svptest_any(svptrue_b16(), pg)); - }, - input, output); + } while (svptest_any(svptrue_b16(), pg)); + }, + input, output); } } // namespace cpu } // namespace arm_compute |