aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-09-16 14:14:21 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-11-01 09:24:45 +0000
commit3394f3e3df7fd2d924c41822a8564493fc06473a (patch)
tree8859ab95e39a237b204031a2aa68cde752003dde /src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
parent910e3f9b686d16657e37d4c18f234b566c8deec2 (diff)
downloadComputeLibrary-3394f3e3df7fd2d924c41822a8564493fc06473a.tar.gz
Rework direct convolution heuristic on OpenCL
Resolves COMPMID-5634 Change-Id: I075de70d509d0c4430b4bcf3f218384e237a3a56 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/453708 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8473 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp')
-rw-r--r--src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp64
1 files changed, 53 insertions, 11 deletions
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
index ad94678335..b693568c67 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
@@ -144,32 +144,45 @@ DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(
const int32_t ofm = dst_shape[0];
const int32_t m = dst_shape[1] * dst_shape[2];
+ const int32_t k = wei_shape[0];
const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
desc.export_weights_to_cl_image = export_weights_to_cl_image;
if(dst_shape[0] <= 4)
{
+ // k0 should be as larger as possible. However, we should avoid
+ // having left-over for loops that make the implementation slower.
+ if((k % 16) == 0)
+ {
+ desc.k0 = 16;
+ }
+ else if((k % 8) == 0)
+ {
+ desc.k0 = 8;
+ }
+ else
+ {
+ desc.k0 = 4;
+ }
+
if(is_pointwise)
{
if(ofm == 4)
{
desc.m0 = 1;
desc.n0 = 4;
- desc.k0 = 16;
}
else
{
desc.m0 = 1;
desc.n0 = 1;
- desc.k0 = 16;
}
}
else
{
desc.m0 = 1;
desc.n0 = dst_shape[0];
- desc.k0 = 16;
}
}
else
@@ -178,21 +191,50 @@ DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(
{
desc.m0 = 1;
desc.n0 = 1;
- desc.k0 = 16;
+ if((k % 16) == 0)
+ {
+ desc.k0 = 16;
+ }
+ else if((k % 8) == 0)
+ {
+ desc.k0 = 8;
+ }
+ else
+ {
+ desc.k0 = 4;
+ }
}
else
{
- if(ofm > 16)
+ if(ofm >= 16)
{
- desc.m0 = 4;
- desc.n0 = 4;
- desc.k0 = 8;
+ if(m / 6 > 24000)
+ {
+ desc.m0 = 6;
+ }
+ else
+ {
+ desc.m0 = 5;
+ }
+ desc.n0 = 8;
+ desc.k0 = 4;
}
else
{
- desc.m0 = 4;
- desc.n0 = 4;
- desc.k0 = 16;
+ desc.m0 = 2;
+ desc.n0 = 8;
+ if((k % 16) == 0)
+ {
+ desc.k0 = 16;
+ }
+ else if((k % 8) == 0)
+ {
+ desc.k0 = 8;
+ }
+ else
+ {
+ desc.k0 = 4;
+ }
}
}
}