aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClConv2d.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-09-06 15:06:40 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-09-09 09:51:10 +0000
commit4478e1cb2d7be9190147be597c3cfbf4c6f99f09 (patch)
tree3a60b5d73205327a178c1204b828e165fea49c5f /src/gpu/cl/operators/ClConv2d.cpp
parent0eed305680ade0c48d07f592c4c4a8aaaad077b7 (diff)
downloadComputeLibrary-4478e1cb2d7be9190147be597c3cfbf4c6f99f09.tar.gz
Rework heuristic in ClConv2d
The heuristic has been tweaked to call direct convolution when we think it can be faster than gemm-based convolution. The main change is affecting the selection of the convolution method on the first layer. In general, the question we should ask for the first convolution layer of a model is: when the execution time of im2col + gemm < direct?. Since im2col does not depend on the OFM, it means that when OFM is big enough, the contribution of im2col is small and the GEMM approach is preferable. From internal experiments, the OFM threshold is 64. Resolves COMPMID-5504, COMPMID-5504, COMPMID-5477 Change-Id: If1bd1fa93c185ffa874388e29866244e62ca3494 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8231 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClConv2d.cpp')
-rw-r--r--src/gpu/cl/operators/ClConv2d.cpp20
1 files changed, 19 insertions, 1 deletions
diff --git a/src/gpu/cl/operators/ClConv2d.cpp b/src/gpu/cl/operators/ClConv2d.cpp
index 16fc0e90d3..54e5d002da 100644
--- a/src/gpu/cl/operators/ClConv2d.cpp
+++ b/src/gpu/cl/operators/ClConv2d.cpp
@@ -261,9 +261,11 @@ ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const
const bool is_ifm_ge_8 = src->dimension(idx_c) >= 8;
const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16;
const bool is_ofm_lte_8 = weights->dimension(3U) <= 8;
+ const bool is_ofm_lt_64 = weights->dimension(3U) < 64;
const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;
const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U);
const bool is_m_one = output_shape[1] * output_shape[2] == 1;
+ const bool is_unit_stride = (conv2d_info.conv_info.stride().first == 1) && (conv2d_info.conv_info.stride().second == 1);
// Run Winograd if valid and IFM >= 8
if(is_wino_valid && is_ifm_ge_8)
@@ -300,7 +302,23 @@ ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const
}
else
{
- if( ((is_large_kernel_sz || is_m_one) && workload_gte_8192) || is_ofm_lte_8 )
+ // Direct convolution used for the first layer of the network
+ if(workload_gte_8192 && !is_ifm_ge_16 && !is_unit_stride && is_ofm_lt_64)
+ {
+ // In general, the question we should ask for the first convolution layer of a model is:
+ // when the execution time of im2col + gemm < direct?. Since im2col does not depend on the OFM, it means that
+ // when OFM is big enough, the contribution of im2col is small and the GEMM approach is preferable.
+ // From internal experiments, the OFM threshold is 64 (is_ofm_lt_64)
+ return ConvolutionMethod::DIRECT;
+ }
+
+ if((is_large_kernel_sz || is_m_one) && workload_gte_8192 && is_ifm_ge_16)
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+
+ // Direct convolution used for the last layer of the network
+ if(is_ofm_lte_8)
{
return ConvolutionMethod::DIRECT;
}