aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/DefaultLWSHeuristics.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-09-16 14:14:21 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-10-06 15:34:16 +0000
commitad9a7ed2f9969381af0b9c97438a3402e16d9483 (patch)
tree440ef7484418b49778e897bf00fb6396c24d0986 /src/core/CL/DefaultLWSHeuristics.cpp
parent3bedd2f031680f53e2982638adfe99a29dca8d06 (diff)
downloadComputeLibrary-ad9a7ed2f9969381af0b9c97438a3402e16d9483.tar.gz
Rework DepthwiseConvolution heuristic on OpenCL
Resolves COMPMID-5632 Change-Id: I2bdbe69a610ca2510fbd74d5d412842679299762 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8365 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/DefaultLWSHeuristics.cpp')
-rw-r--r--src/core/CL/DefaultLWSHeuristics.cpp21
1 files changed, 20 insertions, 1 deletions
diff --git a/src/core/CL/DefaultLWSHeuristics.cpp b/src/core/CL/DefaultLWSHeuristics.cpp
index c082d7fbf9..c739b9dc03 100644
--- a/src/core/CL/DefaultLWSHeuristics.cpp
+++ b/src/core/CL/DefaultLWSHeuristics.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -68,6 +68,21 @@ cl::NDRange get_direct_lws(size_t gws_x, size_t gws_y, size_t gws_z)
return cl::NDRange(8, 4, 1);
}
}
+
+cl::NDRange get_dwc_lws(size_t gws_x, size_t gws_y, size_t gws_z)
+{
+ ARM_COMPUTE_UNUSED(gws_y);
+ ARM_COMPUTE_UNUSED(gws_z);
+
+ if(gws_x < 32)
+ {
+ return cl::NDRange(gws_x, 4, 4);
+ }
+ else
+ {
+ return cl::NDRange(8, 4, 2);
+ }
+}
} // namespace
namespace arm_compute
@@ -92,6 +107,10 @@ cl::NDRange get_default_lws_for_type(CLKernelType kernel_type, cl::NDRange gws)
{
return get_winograd_lws(gws_x, gws_y, gws_z);
}
+ case CLKernelType::DEPTHWISE:
+ {
+ return get_dwc_lws(gws_x, gws_y, gws_z);
+ }
default:
{
return CLKernelLibrary::get().default_ndrange();