diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-09 15:39:05 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-14 17:04:27 +0000 |
commit | 0d05b6690fe69c57f63ca43d59b551f074613062 (patch) | |
tree | 497965914895a34035399a12d9e325518454a31b /src | |
parent | 5687e55250613417c151864cb74229fc91ea6462 (diff) | |
download | ComputeLibrary-0d05b6690fe69c57f63ca43d59b551f074613062.tar.gz |
Interpreting tensor as 1D for CPU multiplication
* Also fix a bug in mul_U8_U8_U8.
Resolves: COMPMID-5460
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Ie1edafeae7aaad91164caeeb04661a8974a7fc1b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8244
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/helpers/WindowHelpers.cpp | 53 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.h | 13 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.cpp | 9 | ||||
-rw-r--r-- | src/cpu/kernels/CpuMulKernel.h | 10 | ||||
-rw-r--r-- | src/cpu/operators/CpuMul.cpp | 5 |
5 files changed, 83 insertions, 7 deletions
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp index 75ffb71b4b..fa152c9f58 100644 --- a/src/core/helpers/WindowHelpers.cpp +++ b/src/core/helpers/WindowHelpers.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020-2021 Arm Limited. +* Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -231,4 +231,55 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St return window; } + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1) +{ + const auto &shape0 = src0.tensor_shape(); + const auto &shape1 = src1.tensor_shape(); + const auto &strides0 = src0.strides_in_bytes(); + const auto &strides1 = src1.strides_in_bytes(); + const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions()); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + + size_t squashed_bytes = src0.element_size(); + + // Try to squash the low dimensions together. + for(; dim < num_dimensions; ++dim) + { + if(shape0[dim] != shape1[dim] || strides0[dim] != squashed_bytes || strides1[dim] != squashed_bytes) + { + break; + } + + squashed_bytes *= shape0[dim]; + } + + if(dim == num_dimensions) + { + auto squashed_elements = squashed_bytes / src0.element_size(); + + split_dimension = Window::DimX; + + // The input tensors can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + + for(dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generates the max window. + for(dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, std::max(shape0[dim], shape1[dim]), 1)); + } + } + + return std::make_pair(win, split_dimension); +} } // namespace arm_compute diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h index 28c39ccd82..c9e5a135c0 100644 --- a/src/core/helpers/WindowHelpers.h +++ b/src/core/helpers/WindowHelpers.h @@ -176,6 +176,19 @@ inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps return calculate_max_enlarged_window(info.valid_region(), steps, border_size); } +/** Calculate the squashed or maximum window for the given tensor shapes. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shapes. + * + * @param[in] src0 Tensor info object defining the shape of the first input tensor. + * @param[in] src1 Tensor info object defining the shape of the second input tensor. + * + * @return The squashed or maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1); + /** Function to compute the shape of output and window for the given inputs * * @param[in] infos Input tensor informations diff --git a/src/cpu/kernels/CpuMulKernel.cpp b/src/cpu/kernels/CpuMulKernel.cpp index da7b6d7d66..2f04bf9f26 100644 --- a/src/cpu/kernels/CpuMulKernel.cpp +++ b/src/cpu/kernels/CpuMulKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -531,11 +531,11 @@ void mul_U8_U8_U8(const ITensor *src1, const ITensor *src2, ITensor *out, const } if(is_sat) { - vst1q_u8(output_ptr, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vqmovn_u16(tmp1_low), vqmovn_u16(tmp1_high))); } else { - vst1q_u8(output_ptr, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); + vst1q_u8(output_ptr + x, vcombine_u8(vmovn_u16(tmp1_low), vmovn_u16(tmp1_high))); } } @@ -1618,7 +1618,8 @@ void CpuMulKernel::configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo * } // Configure kernel window - Window win = calculate_max_window(out_shape); + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src1, *src2); ICpuKernel::configure(win); } diff --git a/src/cpu/kernels/CpuMulKernel.h b/src/cpu/kernels/CpuMulKernel.h index 85fcf88a96..5727b9d012 100644 --- a/src/cpu/kernels/CpuMulKernel.h +++ b/src/cpu/kernels/CpuMulKernel.h @@ -80,6 +80,15 @@ public: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; const char *name() const override; + /** Get the preferred dimension in which the scheduler splits the work into multiple jobs. + * + * @return The split dimension hint. + */ + size_t get_split_dimension_hint() const + { + return _split_dimension; + } + private: /** Common signature for all the specialised multiplication functions with integer scaling factor * @@ -115,6 +124,7 @@ private: MulFunctionQuantized *_func_quantized{ nullptr }; float _scale{ 0 }; int _scale_exponent{ 0 }; + size_t _split_dimension{ Window::DimY }; }; /** Interface for the complex pixelwise multiplication kernel. */ diff --git a/src/cpu/operators/CpuMul.cpp b/src/cpu/operators/CpuMul.cpp index 9cb93b7784..4c15015206 100644 --- a/src/cpu/operators/CpuMul.cpp +++ b/src/cpu/operators/CpuMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,8 @@ void CpuMul::configure(ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, f void CpuMul::run(ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); + auto split_dimension = static_cast<kernels::CpuMulKernel *>(_kernel.get())->get_split_dimension_hint(); + NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors); } Status CpuComplexMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) |