From e250389ed6d78153a55382fa5b3519c151bfd79f Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 23 Apr 2018 15:17:31 +0100 Subject: COMPMID-810 Add NHWC data format support for NEON convolution Change-Id: I2a7b49a12da7f3bc3f04749243b1dc111160de6e Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129348 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/runtime/NEON/AssemblyHelper.h | 9 +++++++-- .../runtime/NEON/functions/NEGEMMConvolutionLayer.h | 16 ++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'arm_compute/runtime/NEON') diff --git a/arm_compute/runtime/NEON/AssemblyHelper.h b/arm_compute/runtime/NEON/AssemblyHelper.h index 3db419e148..ecaf35ac3e 100644 --- a/arm_compute/runtime/NEON/AssemblyHelper.h +++ b/arm_compute/runtime/NEON/AssemblyHelper.h @@ -84,7 +84,12 @@ public: const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput); const int ldd = _d->info()->strides_in_bytes().y() / sizeof(TypeOutput); - const int batch_stride_a = _a->info()->strides_in_bytes().z() / sizeof(TypeInput); + // In the case of NHWC we want to interpret the output shape as 3D. Thus, the batch stride for A is + // the relevant multiple of the row stride. + const bool is_nhwc = _a->info()->data_layout() == DataLayout::NHWC; + const int stride_in_bytes_a = is_nhwc ? _a->info()->strides_in_bytes().y() * _d->info()->dimension(1) : _a->info()->strides_in_bytes().z(); + + const int batch_stride_a = stride_in_bytes_a / sizeof(TypeInput); const int batch_stride_d = _d->info()->strides_in_bytes().z() / sizeof(TypeOutput); const int multi_stride_a = _a->info()->strides_in_bytes()[3] / sizeof(TypeInput); @@ -158,7 +163,7 @@ inline bool setup_assembly_kernel(const ITensor *a, const ITensor *b, ITensor *d const int M = d->info()->tensor_shape().y(); const int N = d->info()->tensor_shape().x(); const int K = a->info()->tensor_shape().x(); - const int batches = a->info()->tensor_shape().total_size_upper(2); + const int batches = d->info()->tensor_shape().total_size_upper(2); const int multis = b->info()->tensor_shape().z(); unsigned int num_threads = NEScheduler::get().num_threads(); diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 752693188c..d64fd9e771 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -26,6 +26,7 @@ #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" @@ -176,6 +177,7 @@ private: NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; NECol2ImKernel _output_col2im_kernel; NEActivationLayer _activationlayer_function; + NEArithmeticAdditionKernel _add_bias_kernel; const ITensor *_original_weights; @@ -187,12 +189,14 @@ private: Tensor _workspace; Tensor _B_pretransposed; - bool _append_bias; - bool _is_fully_connected_convolution; - bool _are_weights_reshaped; - bool _is_quantized; - bool _is_interleaved; - bool _is_activationlayer_enabled; + DataLayout _data_layout; + bool _append_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; + bool _is_quantized; + bool _is_interleaved; + bool _is_activationlayer_enabled; + bool _skip_im2col; }; } #endif /* __ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H__ */ -- cgit v1.2.1