aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h2
-rw-r--r--src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h4
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp36
3 files changed, 33 insertions, 9 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index aadc429864..65c2ef7e0b 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -275,10 +275,12 @@ private:
NEReshapeLayer _reshape_layer;
const ITensor *_original_weights;
+ const ITensor *_original_output;
Tensor _im2col_output;
Tensor _weights_reshaped;
Tensor _gemm_output;
+ Tensor _gemm_output_3d;
Tensor _tmp_output;
DataLayout _data_layout;
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index 466e60183a..381fa4de31 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -117,7 +117,7 @@ public:
void run() override;
private:
- std::unique_ptr<IFallback> _arm_gemm; /** Interface for the arm_gemm fallback */
+ std::unique_ptr<IFallback> _arm_gemm; /**< Interface for the arm_gemm fallback */
MemoryGroup _memory_group; /**< Function memory group */
IWeightsManager *_weights_manager; /**< Pointer to the weights manager */
};
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 8fc788c402..74ef3eef56 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -109,8 +109,8 @@ NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default;
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
- _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false),
- _skip_col2im(false), _is_quantized(false), _is_prepared(false)
+ _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _original_output(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _tmp_output(),
+ _data_layout(DataLayout::NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false)
{
}
@@ -281,6 +281,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_is_prepared = weights_info.retain_internal_weights();
_original_weights = weights;
+ _original_output = output;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
_data_layout = data_layout;
_skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
@@ -368,6 +369,15 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
// Update GEMM output
gemm_output_to_use = &_gemm_output;
}
+ else
+ {
+ _gemm_output.allocator()->init(*output->info());
+ _memory_group.manage(&_gemm_output);
+ _gemm_output_3d.allocator()->init(*output->info());
+
+ // Update GEMM output
+ gemm_output_to_use = &_gemm_output_3d;
+ }
// Configure GEMM
// In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix
@@ -393,16 +403,18 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_reshape_layer.configure(gemm_output_to_use, output);
}
}
+ else
+ {
+ // Configure reshape layer
+ _reshape_layer.configure(gemm_output_to_use, output);
+ }
if(_is_quantized && !_skip_col2im)
{
_tmp_output.allocator()->allocate();
}
- if(!_skip_col2im || _is_quantized)
- {
- _gemm_output.allocator()->allocate();
- }
+ _gemm_output.allocator()->allocate();
ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
"Output shape does not match the expected one");
@@ -554,6 +566,8 @@ void NEGEMMConvolutionLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
+ bool out_has_padding = _skip_col2im && (_original_output->info()->padding().bottom != 0 || _original_output->info()->padding().top != 0);
+
if(!_skip_im2col)
{
// Run input reshaping
@@ -561,6 +575,10 @@ void NEGEMMConvolutionLayer::run()
NEScheduler::get().schedule(_im2col_kernel.get(), y_dim);
}
+ // Handle the case where output has top/bottom padding
+ const ITensor *out_to_use = out_has_padding ? &_gemm_output : _original_output;
+ _gemm_output_3d.allocator()->import_memory(out_to_use->buffer());
+
// Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions
if(_is_quantized)
{
@@ -585,6 +603,10 @@ void NEGEMMConvolutionLayer::run()
_reshape_layer.run();
}
}
+ else if(out_has_padding)
+ {
+ _reshape_layer.run();
+ }
}
void NEGEMMConvolutionLayer::prepare()