aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-09-16 14:14:21 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-10-06 15:34:16 +0000
commitad9a7ed2f9969381af0b9c97438a3402e16d9483 (patch)
tree440ef7484418b49778e897bf00fb6396c24d0986 /src/runtime/CL
parent3bedd2f031680f53e2982638adfe99a29dca8d06 (diff)
downloadComputeLibrary-ad9a7ed2f9969381af0b9c97438a3402e16d9483.tar.gz
Rework DepthwiseConvolution heuristic on OpenCL
Resolves COMPMID-5632 Change-Id: I2bdbe69a610ca2510fbd74d5d412842679299762 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8365 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL')
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp37
1 files changed, 31 insertions, 6 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 8546471fdd..3eadaee0de 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -44,7 +44,7 @@ namespace
{
bool export_weights_to_cl_image_heuristic(const ITensorInfo *weights, unsigned int depth_multiplier, GPUTarget gpu_target)
{
- if(!export_weights_to_cl_image(weights))
+ if(!export_to_cl_image(weights))
{
return false;
}
@@ -75,9 +75,12 @@ bool export_weights_to_cl_image_heuristic(const ITensorInfo *weights, unsigned i
return true;
}
-void initialize_dwc_native_compute_info(DWCComputeKernelInfo &dwc_compute_info, const ITensorInfo *weights, const PadStrideInfo &conv_info, const Size2D &dilation, unsigned int depth_multiplier,
+void initialize_dwc_native_compute_info(DWCComputeKernelInfo &dwc_compute_info, const ITensorInfo *input, const ITensorInfo *weights, const PadStrideInfo &conv_info, const Size2D &dilation,
+ unsigned int depth_multiplier,
GPUTarget gpu_target)
{
+ ARM_COMPUTE_UNUSED(input);
+
if(!is_data_type_float(weights->data_type()))
{
dwc_compute_info.export_weights_to_cl_image = false;
@@ -97,6 +100,7 @@ void initialize_dwc_native_compute_info(DWCComputeKernelInfo &dwc_compute_info,
// Floating point path
// First check if we can export to cl_image.
+ dwc_compute_info.export_input_to_cl_image = false;
dwc_compute_info.export_weights_to_cl_image = export_weights_to_cl_image_heuristic(weights, depth_multiplier, gpu_target);
// Set n0
@@ -135,7 +139,28 @@ void initialize_dwc_native_compute_info(DWCComputeKernelInfo &dwc_compute_info,
const size_t idx_w = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
const size_t kernel_w = weights->tensor_shape()[idx_w];
- dwc_compute_info.m0 = (kernel_w >= 9) || (kernel_w == 1) ? 1 : 2;
+ if((kernel_w >= 9) || (kernel_w == 1))
+ {
+ dwc_compute_info.m0 = 1;
+ }
+ else
+ {
+ if(weights->data_type() == DataType::F16)
+ {
+ if((input->dimension(1) % 5) == 0)
+ {
+ dwc_compute_info.m0 = 5;
+ }
+ else
+ {
+ dwc_compute_info.m0 = 4;
+ }
+ }
+ else
+ {
+ dwc_compute_info.m0 = 2;
+ }
+ }
}
else
{
@@ -237,7 +262,7 @@ void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_cont
}
DWCComputeKernelInfo dwc_native_compute_info;
- initialize_dwc_native_compute_info(dwc_native_compute_info, weights_to_use->info(), conv_info, dilation, depth_multiplier, gpu_target);
+ initialize_dwc_native_compute_info(dwc_native_compute_info, input->info(), weights_to_use->info(), conv_info, dilation, depth_multiplier, gpu_target);
const ConvolutionInfo conv_kernel_info{ conv_info, depth_multiplier, act_info, dilation };
@@ -322,7 +347,7 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
DWCComputeKernelInfo dwc_native_compute_info;
- initialize_dwc_native_compute_info(dwc_native_compute_info, &permuted_weights, conv_info, dilation, depth_multiplier, gpu_target);
+ initialize_dwc_native_compute_info(dwc_native_compute_info, input, &permuted_weights, conv_info, dilation, depth_multiplier, gpu_target);
ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output,
dwc_native_compute_info, conv_kernel_info, &output_multipliers_shifts_info, &output_multipliers_shifts_info));
@@ -331,7 +356,7 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
else
{
DWCComputeKernelInfo dwc_native_compute_info;
- initialize_dwc_native_compute_info(dwc_native_compute_info, weights, conv_info, dilation, depth_multiplier, gpu_target);
+ initialize_dwc_native_compute_info(dwc_native_compute_info, input, weights, conv_info, dilation, depth_multiplier, gpu_target);
ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, dwc_native_compute_info, conv_kernel_info, &output_multipliers_shifts_info,
&output_multipliers_shifts_info));
}