From 3f8cc5893ca114bf36b7ef65f80a16668fb2e121 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 16 Oct 2019 14:32:55 +0100 Subject: COMPMID-2813: Update the heuristic in CLDepthWiseConvolutionLayer Change-Id: I62f3158a6d32e9a96274021e4b6fe8beefa16f71 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/2105 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/graph/backends/CL/CLFunctionsFactory.cpp | 2 +- src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 82b6dd6a54..6d231f2ef3 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -60,7 +60,7 @@ struct CLConvolutionLayerFunctions struct CLDepthwiseConvolutionLayerFunctions { using GenericDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer; - using OptimizedDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer3x3; + using OptimizedDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer; }; /** Collection of CL element-wise functions */ diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index d9c21150df..5ac7a7a7c6 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -278,7 +278,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); - const bool can_run_optimised_3x3_kernel = (weights->info()->dimension(idx_w) == 3) && (weights->info()->dimension(idx_h) == 3); + const GPUTarget gpu_target = CLScheduler::get().target(); + const bool can_run_optimised_3x3_kernel = (weights->info()->dimension(idx_w) == 3) && (weights->info()->dimension(idx_h) == 3) && (is_data_type_float(input->info()->data_type()) + || (get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)); _needs_permute = false; _is_prepared = false; @@ -347,7 +349,9 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right()); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom()); - const bool can_run_optimised_3x3_kernel = (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3); + const GPUTarget gpu_target = CLScheduler::get().target(); + const bool can_run_optimised_3x3_kernel = (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3) && (is_data_type_float(input->data_type()) + || (get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)); if(!can_run_optimised_3x3_kernel) { -- cgit v1.2.1