diff options
Diffstat (limited to 'src/cpu/kernels')
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 9 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 10 |
2 files changed, 15 insertions, 4 deletions
diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index da7b6d7d66..2f04bf9f26 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -531,11 +531,11 @@ void mul_U8_U8_U8(const ITensor *src1, const ITensor *src2, ITensor *out, const } if(is_sat) { - vst1q_u8(output_ptr, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); } else { - vst1q_u8(output_ptr, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); } } @@ -1618,7 +1618,8 @@ void CpuMulKernel::configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo * } // Configure kernel window - Window win = calculate_max_window(out_shape); + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src1, *src2); ICpuKernel::configure(win); } diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 85fcf88a96..5727b9d012 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -80,6 +80,15 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. + * + * @return The split dimension hint. + */ + size_t get_split_dimension_hint() const + { + return _split_dimension; + } + private: /** Common signature for all the specialised multiplication functions with integer scaling factor * @@ -115,6 +124,7 @@ private: MulFunctionQuantized *_func_quantized{ nullptr }; float _scale{ 0 }; int _scale_exponent{ 0 }; + size_t _split_dimension{ Window::DimY }; }; /** Interface for the complex pixelwise multiplication kernel. */ |