diff options
author | Jakub Sujak <jakub.sujak@arm.com> | 2022-09-17 13:08:56 +0100 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-10-07 10:07:42 +0000 |
commit | 842ad211c11417ba456a2dca7e89988db98eb256 (patch) | |
tree | de816f1d53973961b229b09a9767c23c2628dad4 /src/cpu/kernels | |
parent | ad9a7ed2f9969381af0b9c97438a3402e16d9483 (diff) | |
download | ComputeLibrary-842ad211c11417ba456a2dca7e89988db98eb256.tar.gz |
Optimize Neon™ SUB operator by squashing execution window
Resolves: COMPMID-5462
Change-Id: I2c7151c8faf4016cc33592fff04d492d7cbc8fd6
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8366
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels')
-rw-r--r-- | src/cpu/kernels/CpuSubKernel.cpp | 3 | ||||
-rw-r--r-- | src/cpu/kernels/CpuSubKernel.h | 6 |
2 files changed, 8 insertions, 1 deletions
diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp index c55d11e899..d908e4ed28 100644 --- a/src/cpu/kernels/CpuSubKernel.cpp +++ b/src/cpu/kernels/CpuSubKernel.cpp @@ -131,7 +131,8 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I _name = std::string("CpuSubKernel").append("/").append(uk->name); // CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped - Window win = calculate_max_window(out_shape, Steps()); + Window win; + std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src0, *src1); ICpuKernel::configure(win); } diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h index 323a3f1316..e835bac3d5 100644 --- a/src/cpu/kernels/CpuSubKernel.h +++ b/src/cpu/kernels/CpuSubKernel.h @@ -82,10 +82,16 @@ public: static const std::vector<SubKernel> &get_available_kernels(); + size_t get_split_dimension() const + { + return _split_dimension; + } + private: ConvertPolicy _policy{}; SubKernelPtr _run_method{ nullptr }; std::string _name{}; + size_t _split_dimension{ Window::DimY }; }; } // namespace kernels } // namespace cpu |