aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-03-26 16:20:05 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:37 +0000
commit1ed1fc6d3b7d8494ce3bbc5f8b46bfde6fc586f9 (patch)
treedc299cf46073d2bdd5a3a0252935ede216cf332e /src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
parent9373c8b2650f34b2804d3685588bad8e408ebe63 (diff)
downloadComputeLibrary-1ed1fc6d3b7d8494ce3bbc5f8b46bfde6fc586f9.tar.gz
COMPMID-812 Add NHWC data format support for NEON depthwise convolution (optimized case).
Change-Id: Icdfd6c02ed526daf4f59a4b76c7bbc1bc48fde74 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125938 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp60
1 files changed, 39 insertions, 21 deletions
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index f28ed715f6..8691fb9f76 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -37,7 +37,7 @@ using namespace arm_compute::misc::shape_calculator;
NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
: _dwc_kernel(), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _accumulator(), _input_nhwc(), _weights_hwio(), _output_nhwc(), _has_bias(false),
- _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false)
+ _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false), _is_nchw(true), _is_first_run(true)
{
}
@@ -52,30 +52,38 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
_has_bias = biases != nullptr;
_is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->info()->tensor_shape(),
conv_info,
- input->info()->data_type());
+ input->info()->data_type(),
+ input->info()->data_layout());
_are_weights_reshaped = false;
+ _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
+
+ ARM_COMPUTE_ERROR_ON(!_is_optimized && !_is_nchw);
if(_is_optimized)
{
- // Configure the function to transform the input tensor from NCHW -> NHWC
- _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
-
- // Configure the function to transform the weights tensor from IHW -> HWI
- _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U));
+ if(_is_nchw)
+ {
+ // Configure the function to transform the input tensor from NCHW -> NHWC
+ _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
- // Configure optimized depthwise
- _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC);
+ // Configure the function to transform the weights tensor from IHW -> HWI
+ _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U));
- // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
- _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U));
+ // Configure optimized depthwise
+ _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC);
- // Allocate tensors
- _input_nhwc.allocator()->allocate();
- _weights_hwio.allocator()->allocate();
- _output_nhwc.allocator()->allocate();
+ // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
+ _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U));
- // Create convolver (deferred)
- _dwc_kernel.generate_convolver();
+ // Allocate tensors
+ _input_nhwc.allocator()->allocate();
+ _weights_hwio.allocator()->allocate();
+ _output_nhwc.allocator()->allocate();
+ }
+ else
+ {
+ _dwc_kernel.configure(input, weights, output, conv_info, DataLayout::NHWC);
+ }
}
else
{
@@ -116,8 +124,15 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
void NEDepthwiseConvolutionLayer3x3::run()
{
+ if(_is_first_run && _is_optimized)
+ {
+ _is_first_run = false;
+ // Create convolver (deferred)
+ _dwc_kernel.generate_convolver();
+ }
+
// Permute weights in HWIO format if the optimized kernel will be executedd
- if(!_are_weights_reshaped && _is_optimized)
+ if(!_are_weights_reshaped && _is_optimized && _is_nchw)
{
_are_weights_reshaped = true;
_permute_weights.run();
@@ -126,8 +141,11 @@ void NEDepthwiseConvolutionLayer3x3::run()
// Handle input
if(_is_optimized)
{
- // Permute input to NHWC format execution
- _permute_input.run();
+ if(_is_nchw)
+ {
+ // Permute input to NHWC format execution
+ _permute_input.run();
+ }
}
else
{
@@ -139,7 +157,7 @@ void NEDepthwiseConvolutionLayer3x3::run()
NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
// Permute output to ACL's native NCHW format in case of NHWC execution
- if(_is_optimized)
+ if(_is_optimized && _is_nchw)
{
_permute_output.run();
}