aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2019-10-16 14:32:55 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-10-17 09:40:07 +0000
commit3f8cc5893ca114bf36b7ef65f80a16668fb2e121 (patch)
treeb4bad8b316189c81317542c419d5ac12bda209f0
parent1af089c784ce2c9091fae3c15a4b11379921b690 (diff)
downloadComputeLibrary-3f8cc5893ca114bf36b7ef65f80a16668fb2e121.tar.gz
COMPMID-2813: Update the heuristic in CLDepthWiseConvolutionLayer
Change-Id: I62f3158a6d32e9a96274021e4b6fe8beefa16f71 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/2105 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/graph/backends/CL/CLFunctionsFactory.cpp2
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp8
2 files changed, 7 insertions, 3 deletions
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 82b6dd6a54..6d231f2ef3 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -60,7 +60,7 @@ struct CLConvolutionLayerFunctions
struct CLDepthwiseConvolutionLayerFunctions
{
using GenericDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
- using OptimizedDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer3x3;
+ using OptimizedDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
};
/** Collection of CL element-wise functions */
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index d9c21150df..5ac7a7a7c6 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -278,7 +278,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
- const bool can_run_optimised_3x3_kernel = (weights->info()->dimension(idx_w) == 3) && (weights->info()->dimension(idx_h) == 3);
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ const bool can_run_optimised_3x3_kernel = (weights->info()->dimension(idx_w) == 3) && (weights->info()->dimension(idx_h) == 3) && (is_data_type_float(input->info()->data_type())
+ || (get_arch_from_target(gpu_target) == GPUTarget::MIDGARD));
_needs_permute = false;
_is_prepared = false;
@@ -347,7 +349,9 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
- const bool can_run_optimised_3x3_kernel = (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3);
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ const bool can_run_optimised_3x3_kernel = (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3) && (is_data_type_float(input->data_type())
+ || (get_arch_from_target(gpu_target) == GPUTarget::MIDGARD));
if(!can_run_optimised_3x3_kernel)
{