aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuSubKernel.h
diff options
context:
space:
mode:
authorFadi Arafeh <fadi.arafeh@arm.com>2022-10-06 16:20:14 +0000
committerfadi.arafeh <fadi.arafeh@arm.com>2022-11-22 14:04:45 +0000
commit73bb6b7ad80801e56633ad4ea12b0404b586a979 (patch)
tree9f35a75499df4e1cc49cc6f3336c805384a53c13 /src/cpu/kernels/CpuSubKernel.h
parentca1a52d14551147456a9a1ea2e24f5c141a6d80e (diff)
downloadComputeLibrary-73bb6b7ad80801e56633ad4ea12b0404b586a979.tar.gz
ONCPUML-1072: Tuned MWS values (for N1, V1) for binary operators used by oneDNN
Added approximate values for MWS for the following binary operators: Add, Sub, Mul, Min, Max, Div Change-Id: I5c4c75511129982a3f44c038ee272f09598469de Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com> Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/459609 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Signed-off-by: fadara01 <fadi.arafeh@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8392 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuSubKernel.h')
-rw-r--r--src/cpu/kernels/CpuSubKernel.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index e835bac3d5..3d80b34279 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -73,6 +73,15 @@ public:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
+ /** Return minimum workload size of the relevant kernel
+ *
+ * @param[in] platform The CPU platform used to create the context.
+ * @param[in] thread_count Number of threads in the execution.
+ *
+ * @return[out] mws Minimum workload size for requested configuration.
+ */
+ size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
+
struct SubKernel
{
const char *name;