aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-04-30 13:28:23 +0100
committerManuel Bottini <manuel.bottini@arm.com>2020-05-12 09:34:17 +0000
commit6e10aa395e81b83edb3437191acd7abe1639c7dc (patch)
treefce78e75102402df6dbbd37e715f9ef855846008 /src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
parent0e240151637641e9e0c425d52dd75b7bd11d1159 (diff)
downloadComputeLibrary-6e10aa395e81b83edb3437191acd7abe1639c7dc.tar.gz
COMPMID-3316: NEDeconvolutionLayer failing for a big input
- Using NEDirectConvolution for big shapes since the memory required explodes for 9x9 kernel - Adding test cases - Fix enables only the NEON Deconvolution for NHWC Change-Id: I8a541346428e5686818f8ecb7f69e2a9106cbceb Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3135 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 65538848df..751a3fa1fb 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -76,7 +76,8 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- DataType data_type = output->data_type();
+ // output might not be initialized since it can be an intermediate tensor of another layer
+ DataType data_type = input->data_type();
TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type));
// Validate Convolution kernel