aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Sujak <jakub.sujak@arm.com>2022-09-17 13:08:56 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2022-10-07 10:07:42 +0000
commit842ad211c11417ba456a2dca7e89988db98eb256 (patch)
treede816f1d53973961b229b09a9767c23c2628dad4
parentad9a7ed2f9969381af0b9c97438a3402e16d9483 (diff)
downloadComputeLibrary-842ad211c11417ba456a2dca7e89988db98eb256.tar.gz
Optimize Neon™ SUB operator by squashing execution window
Resolves: COMPMID-5462 Change-Id: I2c7151c8faf4016cc33592fff04d492d7cbc8fd6 Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8366 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/cpu/kernels/CpuSubKernel.cpp3
-rw-r--r--src/cpu/kernels/CpuSubKernel.h6
-rw-r--r--src/cpu/operators/CpuSub.cpp11
-rw-r--r--src/cpu/operators/CpuSub.h5
4 files changed, 22 insertions, 3 deletions
diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp
index c55d11e899..d908e4ed28 100644
--- a/src/cpu/kernels/CpuSubKernel.cpp
+++ b/src/cpu/kernels/CpuSubKernel.cpp
@@ -131,7 +131,8 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
_name = std::string("CpuSubKernel").append("/").append(uk->name);
// CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped
- Window win = calculate_max_window(out_shape, Steps());
+ Window win;
+ std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src0, *src1);
ICpuKernel::configure(win);
}
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index 323a3f1316..e835bac3d5 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -82,10 +82,16 @@ public:
static const std::vector<SubKernel> &get_available_kernels();
+ size_t get_split_dimension() const
+ {
+ return _split_dimension;
+ }
+
private:
ConvertPolicy _policy{};
SubKernelPtr _run_method{ nullptr };
std::string _name{};
+ size_t _split_dimension{ Window::DimY };
};
} // namespace kernels
} // namespace cpu
diff --git a/src/cpu/operators/CpuSub.cpp b/src/cpu/operators/CpuSub.cpp
index f0a7770cea..91a5b6e63c 100644
--- a/src/cpu/operators/CpuSub.cpp
+++ b/src/cpu/operators/CpuSub.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "src/common/utils/Log.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
namespace arm_compute
{
namespace cpu
@@ -45,5 +47,12 @@ Status CpuSub::validate(const ITensorInfo *src0, const ITensorInfo *src1, const
ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
return kernels::CpuSubKernel::validate(src0, src1, dst, policy);
}
+
+void CpuSub::run(ITensorPack &tensors)
+{
+ const auto split_dimension = static_cast<kernels::CpuSubKernel *>(_kernel.get())->get_split_dimension();
+
+ NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);
+}
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/operators/CpuSub.h b/src/cpu/operators/CpuSub.h
index 025c928d8f..d463d1e063 100644
--- a/src/cpu/operators/CpuSub.h
+++ b/src/cpu/operators/CpuSub.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,9 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
};
} // namespace cpu
} // namespace arm_compute