diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-09 15:39:05 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-14 17:04:27 +0000 |
commit | 0d05b6690fe69c57f63ca43d59b551f074613062 (patch) | |
tree | 497965914895a34035399a12d9e325518454a31b /src/cpu/kernels/CpuMulKernel.cpp | |
parent | 5687e55250613417c151864cb74229fc91ea6462 (diff) | |
download | ComputeLibrary-0d05b6690fe69c57f63ca43d59b551f074613062.tar.gz |
Interpreting tensor as 1D for CPU multiplication
* Also fix a bug in mul_U8_U8_U8.
Resolves: COMPMID-5460
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Ie1edafeae7aaad91164caeeb04661a8974a7fc1b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8244
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuMulKernel.cpp')
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index da7b6d7d66..2f04bf9f26 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -531,11 +531,11 @@ void mul_U8_U8_U8(const ITensor *src1, const ITensor *src2, ITensor *out, const } if(is_sat) { - vst1q_u8(output_ptr, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); } else { - vst1q_u8(output_ptr, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); } } @@ -1618,7 +1618,8 @@ void CpuMulKernel::configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo * } // Configure kernel window - Window win = calculate_max_window(out_shape); + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src1, *src2); ICpuKernel::configure(win); } |