diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-09 15:39:05 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-14 17:04:27 +0000 |
commit | 0d05b6690fe69c57f63ca43d59b551f074613062 (patch) | |
tree | 497965914895a34035399a12d9e325518454a31b /src/cpu | |
parent | 5687e55250613417c151864cb74229fc91ea6462 (diff) | |
download | ComputeLibrary-0d05b6690fe69c57f63ca43d59b551f074613062.tar.gz |
Interpreting tensor as 1D for CPU multiplication
* Also fix a bug in mul_U8_U8_U8.
Resolves: COMPMID-5460
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Ie1edafeae7aaad91164caeeb04661a8974a7fc1b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8244
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu')
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 9 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 10 | ||||
-rw-r--r-- | src/cpu/operators/CpuMul.cpp | 5 |
3 files changed, 18 insertions, 6 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index da7b6d7d66..2f04bf9f26 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -531,11 +531,11 @@ void mul_U8_U8_U8(const ITensor *src1, const ITensor *src2, ITensor *out, const } if(is_sat) { - vst1q_u8(output_ptr, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); } else { - vst1q_u8(output_ptr, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); } } @@ -1618,7 +1618,8 @@ void CpuMulKernel::configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo * } // Configure kernel window - Window win = calculate_max_window(out_shape); + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src1, *src2); ICpuKernel::configure(win); } diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 85fcf88a96..5727b9d012 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -80,6 +80,15 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. + * + * @return The split dimension hint. + */ + size_t get_split_dimension_hint() const + { + return _split_dimension; + } + private: /** Common signature for all the specialised multiplication functions with integer scaling factor * @@ -115,6 +124,7 @@ private: MulFunctionQuantized *_func_quantized{ nullptr }; float _scale{ 0 }; int _scale_exponent{ 0 }; + size_t _split_dimension{ Window::DimY }; }; /** Interface for the complex pixelwise multiplication kernel. */ diff --git a/src/cpu/operators/CpuMul.cpp b/src/cpu/operators/CpuMul.cpp index 9cb93b7784..4c15015206 100644 --- a/src/cpu/operators/CpuMul.cpp +++ b/src/cpu/operators/CpuMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,8 @@ void CpuMul::configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, f void CpuMul::run(ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); + auto split_dimension = static_cast<kernels::CpuMulKernel *>(_kernel.get())->get_split_dimension_hint(); + NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors); } Status CpuComplexMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) |