path: root/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
diff options
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-08-08 12:29:38 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitdb9d46da3a8645d0c2cc71d035448999a36770ec (patch)
tree79c61d6ad845e60d86fa3059056b8ac3fffc236b /src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
parent980002bd5848f065b02a31bb105e47a5deb7bc98 (diff)
COMPMID-1485 - Add support for NHWC when running NEGEMMConvolutionLayer with FP16/QASYMM8
When the GEMM3D check fails, now we fallback to the classic implementation with im2col and col2im. In this manner the function can work with QASYMM8 and FP16 Change-Id: I359e9da3a63956f33b5acbc9bca4383b14af10e2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143372 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp')
1 files changed, 105 insertions, 55 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 33284470f4..52b461e255 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -90,8 +90,8 @@ void NEConvolutionLayerReshapeWeights::run()
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
: _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
- _add_bias_kernel(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _append_bias(false), _skip_im2col(false),
- _skip_col2im(false), _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
+ _add_bias_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _append_bias(false),
+ _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
@@ -128,7 +128,7 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
- const GEMMInfo &gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */, gemm_3d_depth, skip_im2col);
+ const GEMMInfo gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */, gemm_3d_depth, skip_im2col);
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
@@ -142,14 +142,28 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
// Perform validation step on GEMMLowp
- NEGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), output, gemm_info);
+ return NEGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), output, gemm_info);
// Perform validation step on Matrix multiply function
- NEGEMM::validate(input, weights, nullptr, output, 1.0f, 0.0f, gemm_info);
+ return NEGEMM::validate(input, weights, nullptr, output, 1.0f, 0.0f, gemm_info);
- return Status{};
+Status NEGEMMConvolutionLayer::validate_gemm3d(DataType data_type, int gemm_3d_depth, bool skip_im2col)
+ const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
+ const DataType output_gemm_data_type = is_quantized ? DataType::S32 : data_type;
+ const unsigned int mult_y = skip_im2col ? 1U : gemm_3d_depth;
+ const unsigned int mult_z = skip_im2col ? gemm_3d_depth : 1U;
+ // Set dummy tensor shapes for the validation
+ const TensorInfo dummy_input_info(TensorShape(4U, 4U * mult_y, 1U * mult_z), 1, data_type);
+ const TensorInfo dummy_weights_info(TensorShape(4U, 4U), 1, data_type);
+ const TensorInfo dummy_output_info(TensorShape(4U, 4U, gemm_3d_depth), 1, output_gemm_data_type);
+ return validate_mm(&dummy_input_info, &dummy_weights_info, &dummy_output_info, gemm_3d_depth, skip_im2col);
void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
@@ -180,25 +194,14 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_original_weights = weights;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
_data_layout = data_layout;
- _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1) && !_is_quantized;
- _skip_col2im = (data_layout == DataLayout::NHWC) && !_is_quantized;
+ _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
+ _skip_col2im = data_layout == DataLayout::NHWC;
_append_bias = (biases != nullptr) && (!_is_quantized);
- // TODO (giaiod01): Validate GEMM3D
- const bool is_nhwc = _data_layout == DataLayout::NHWC;
const ITensor *gemm_input_to_use = input;
ITensor *gemm_output_to_use = output;
ITensor *gemm_output_staged_to_use = output;
- const unsigned bias_element = (_append_bias && !_skip_im2col) ? 1 : 0;
- const ITensor *biases_to_use = (_append_bias && !_skip_im2col) ? biases : nullptr;
- // Get parameters from conv_info
- unsigned int stride_x = 0;
- unsigned int stride_y = 0;
- std::tie(stride_x, stride_y) = conv_info.stride();
// Get convolved dimensions
unsigned int conv_w = 0;
unsigned int conv_h = 0;
@@ -209,6 +212,25 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
+ // Check if GEMM3D is supported
+ if(_skip_col2im)
+ {
+ // If not supported, we need to perform im2col and col2im (or reshape layer)
+ if(!bool(validate_gemm3d(input->info()->data_type(), conv_h, _skip_im2col)))
+ {
+ _skip_im2col = false;
+ _skip_col2im = false;
+ }
+ }
+ const unsigned bias_element = (_append_bias && !_skip_im2col) ? 1 : 0;
+ const ITensor *biases_to_use = (_append_bias && !_skip_im2col) ? biases : nullptr;
+ // Get parameters from conv_info
+ unsigned int stride_x = 0;
+ unsigned int stride_y = 0;
+ std::tie(stride_x, stride_y) = conv_info.stride();
unsigned int mat_weights_cols = weights->info()->dimension(idx_kernels);
unsigned int mat_weights_rows = weights->info()->dimension(idx_width) * weights->info()->dimension(idx_height) * weights->info()->dimension(idx_channel) + bias_element;
@@ -216,8 +238,6 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
// Just append biases and do not transpose 1xW as it will be reshaped in NEGEMM
_reshape_weights.configure(weights, biases_to_use, &_weights_reshaped);
- weights = &_weights_reshaped;
// Create tensor to store im2col reshaped inputs
@@ -244,8 +264,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_add_bias_kernel.configure(output, biases, output, ConvertPolicy::SATURATE);
- // Create GEMM output tensor
- if(!is_nhwc || _is_quantized)
+ // Create temporary GEMM output tensor in case we cannot skip col2im
+ if(!_skip_col2im)
// Calculate GEMM output shape
TensorShape shape_gemm = _im2col_output.info()->tensor_shape();
@@ -264,8 +284,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
gemm_output_to_use = &_gemm_output;
- // Configure and tune GEMM
- configure_mm(gemm_input_to_use, weights, gemm_output_to_use, (data_layout == DataLayout::NHWC) ? conv_h : 1);
+ // Configure GEMM
+ configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, _skip_col2im ? conv_h : 1);
@@ -289,13 +309,25 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
- // Configure and tune Col2Im
- _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, Size2D(conv_w, conv_h));
+ if(_data_layout == DataLayout::NCHW)
+ {
+ // Configure col2im
+ _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, Size2D(conv_w, conv_h));
+ }
+ else
+ {
+ // Configure reshape layer
+ _reshape_layer.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output);
+ }
- if(!is_nhwc || _is_quantized)
+ if(_is_quantized)
+ }
+ if(!_skip_col2im)
+ {
@@ -338,11 +370,35 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
const ITensorInfo *gemm_output_staged_to_use = output;
const ITensorInfo *weights_to_use = weights;
- const bool is_nhwc = data_layout == DataLayout::NHWC;
- const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
- bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1) && !is_quantized;
- const bool append_bias = (biases != nullptr) && (!is_quantized);
- const unsigned bias_element = (append_bias && !skip_im2col) ? 1 : 0;
+ const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
+ const bool append_bias = (biases != nullptr) && (!is_quantized);
+ bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
+ bool skip_col2im = data_layout == DataLayout::NHWC;
+ // Get convolved dimensions
+ unsigned int conv_w = 0;
+ unsigned int conv_h = 0;
+ std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
+ input->dimension(idx_height),
+ kernel_width,
+ kernel_height,
+ conv_info,
+ dilation);
+ // Check if GEMM3D is supported
+ if(skip_col2im)
+ {
+ // If not supported, we need to perform im2col and col2im (or reshape layer)
+ if(!bool(validate_gemm3d(input->data_type(), conv_h, skip_im2col)))
+ {
+ skip_im2col = false;
+ skip_col2im = false;
+ }
+ }
+ const unsigned bias_element = (append_bias && !skip_im2col) ? 1 : 0;
+ const ITensorInfo *biases_to_use = (append_bias && !skip_im2col) ? biases : nullptr;
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_channel) != input->dimension(idx_channel));
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
@@ -367,32 +423,19 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
- // Get convolved dimensions
- unsigned int conv_w = 0;
- unsigned int conv_h = 0;
- std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(idx_width),
- input->dimension(idx_height),
- kernel_width,
- kernel_height,
- conv_info,
- dilation);
unsigned int mat_weights_cols = weights->dimension(idx_kernels);
unsigned int mat_weights_rows = weights->dimension(idx_width) * weights->dimension(idx_height) * weights->dimension(idx_channel) + bias_element;
// Output tensor auto inizialization if not yet initialized
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr));
- weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, append_bias), 1, data_type);
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, biases_to_use, nullptr));
+ weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col)), 1, data_type);
weights_to_use = &weights_reshaped_info;
- // TODO (giaiod01): Validate GEMM3D
// Create tensor info for im2col reshaped inputs
// For NEON the batch size is on the fourth dimension
- // TODO (giaiod01): Use auto-init COMPMID-1277
+ // TODO (giaiod01): Auto-initialize the output shape of im2col COMPMID-1482
TensorShape shape_im2col = input->tensor_shape();
shape_im2col.set(0, mat_weights_rows);
shape_im2col.set(1, conv_w * conv_h);
@@ -410,8 +453,8 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(output, biases, output, ConvertPolicy::SATURATE));
- // Create GEMM output tensor
- if(!is_nhwc || is_quantized)
+ // Create temporary GEMM output tensor in case we cannot skip col2im
+ if(!skip_col2im)
TensorShape shape_gemm = gemm_input_to_use->tensor_shape();
shape_gemm.set(0, mat_weights_cols);
@@ -424,7 +467,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
gemm_output_to_use = &info_gemm;
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, (data_layout == DataLayout::NHWC) ? conv_h : 1, skip_im2col));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 1, skip_im2col));
@@ -440,8 +483,8 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, output->quantization_info().offset);
- // Validate Col2Im
- if(!is_nhwc || is_quantized)
+ // Validate Col2Im/ReshapeLayer
+ if(!skip_col2im && (data_layout == DataLayout::NCHW))
ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(is_quantized ? gemm_output_staged_to_use : gemm_output_to_use,
@@ -493,7 +536,14 @@ void NEGEMMConvolutionLayer::run()
// Reshape output matrix
- NEScheduler::get().schedule(&_col2im_kernel, Window::DimY);
+ if(_data_layout == DataLayout::NCHW)
+ {
+ NEScheduler::get().schedule(&_col2im_kernel, Window::DimY);
+ }
+ else
+ {
+ _reshape_layer.run();
+ }