aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-14 19:03:09 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-23 12:08:12 +0000
commit48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch)
treef857d733ccf446c704823dc7ac796a96eb55095e /src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
parent1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff)
downloadComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp118
1 files changed, 37 insertions, 81 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 0034dd2545..f4377cdaf2 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -29,9 +29,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "support/ToolchainSupport.h"
-#include <cmath>
#include <set>
#include <tuple>
@@ -90,19 +88,27 @@ void NEConvolutionLayerReshapeWeights::run()
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
- _col2im_kernel(), _activationlayer_function(), _add_bias_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(),
- _data_layout(DataLayout::NCHW), _append_bias(false), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
+ _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false),
+ _skip_col2im(false), _is_quantized(false), _is_prepared(false)
{
}
void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info, int gemm_3d_depth)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
- ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output == nullptr ? nullptr : output->info(), act_info, gemm_3d_depth,
- _skip_im2col));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_mm(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output == nullptr ? nullptr : output->info(),
+ act_info, gemm_3d_depth, _skip_im2col));
+ // Create GEMMInfo structure
const GEMMInfo &gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
- gemm_3d_depth, _skip_im2col /* Reinterpret the input as 3D if im2col is skipped */);
+ gemm_3d_depth, _skip_im2col /* Reinterpret the input as 3D if im2col is skipped */,
+ false, GEMMLowpOutputStageInfo(), false, false, act_info);
+
+ // Supported activations in GEMM
+ const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
+ };
if(_is_quantized)
{
@@ -125,19 +131,13 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
int min_activation = 0;
int max_activation = 255;
- const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
- ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
- ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
- };
- if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
+ if(supported_acts.count(act_info.activation()) != 0)
{
const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo);
const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int;
max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
-
- _is_activationlayer_enabled = false;
}
GEMMLowpOutputStageInfo output_info;
@@ -157,18 +157,21 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
else
{
// Configure matrix multiply function
- _mm_gemm.configure(input, weights, nullptr, output, 1.0f, 0.0f, gemm_info);
+ _mm_gemm.configure(input, weights, biases, output, 1.0f, 0.0f, gemm_info);
}
}
-Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info,
- int gemm_3d_depth, bool skip_im2col)
+Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col)
{
const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
const bool is_activation_enabled = act_info.enabled();
- const GEMMInfo &gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
- gemm_3d_depth, skip_im2col /* Reinterpret the input as 3D if im2col is skipped */);
+ // Create GEMMInfo structure
+ const GEMMInfo gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
+ gemm_3d_depth, skip_im2col /* Reinterpret the input as 3D if im2col is skipped */,
+ false, GEMMLowpOutputStageInfo(), false, false, act_info);
+
if(is_quantized)
{
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
@@ -241,7 +244,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_UNUSED(num_groups);
+ ARM_COMPUTE_UNUSED(num_groups, weights_info);
ARM_COMPUTE_ERROR_THROW_ON(NEGEMMConvolutionLayer::validate(input->info(),
weights->info(),
biases != nullptr ? biases->info() : nullptr,
@@ -261,13 +264,11 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
const unsigned int kernel_width = weights->info()->dimension(idx_width);
const unsigned int kernel_height = weights->info()->dimension(idx_height);
- _is_prepared = weights_info.retain_internal_weights();
- _original_weights = weights;
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
- _data_layout = data_layout;
- _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
- _append_bias = (biases != nullptr) && (!_is_quantized);
- _is_activationlayer_enabled = act_info.enabled();
+ _is_prepared = weights_info.retain_internal_weights();
+ _original_weights = weights;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _data_layout = data_layout;
+ _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
const ITensor *gemm_input_to_use = input;
ITensor *gemm_output_to_use = output;
@@ -297,8 +298,6 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_skip_col2im = false;
}
- const ITensor *biases_to_use = (_append_bias && !_skip_im2col) ? biases : nullptr;
-
// Get parameters from conv_info
unsigned int stride_x = 0;
unsigned int stride_y = 0;
@@ -312,12 +311,12 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
if(_weights_manager && _weights_manager->are_weights_managed(weights))
{
- _reshape_weights_managed.configure(weights, biases_to_use);
+ _reshape_weights_managed.configure(weights, nullptr);
weights_to_use = _weights_manager->acquire(weights, &_reshape_weights_managed);
}
else
{
- _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped);
+ _reshape_weights.configure(weights, nullptr, &_weights_reshaped);
weights_to_use = &_weights_reshaped;
}
@@ -327,16 +326,11 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
_memory_group.manage(&_im2col_output);
// Configure
- _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation);
+ _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation);
// Update GEMM input
gemm_input_to_use = &_im2col_output;
}
- else if(_append_bias)
- {
- // Configure add bias kernel
- _add_bias_kernel.configure(output, biases, output, ConvertPolicy::SATURATE);
- }
// Create temporary GEMM output tensor in case we cannot skip col2im
if(!_skip_col2im)
@@ -394,14 +388,6 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
"Output shape does not match the expected one");
-
- // Configure Activation Layer
- if(_is_activationlayer_enabled)
- {
- _activationlayer_function.configure(output, nullptr, act_info);
- }
-
- ARM_COMPUTE_UNUSED(weights_info);
}
Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
@@ -432,10 +418,9 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
const ITensorInfo *gemm_output_to_use = output;
const ITensorInfo *weights_to_use = weights;
- const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
- const bool append_bias = (biases != nullptr) && (!is_quantized);
- bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
- bool is_activation_enabled = act_info.enabled();
+ const bool append_bias = false;
+ const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
+ bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
// Get convolved dimensions
unsigned int conv_w = 0;
@@ -470,9 +455,6 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
}
}
- const unsigned bias_element = (append_bias && !skip_im2col) ? 1 : 0;
- const ITensorInfo *biases_to_use = (append_bias && !skip_im2col) ? biases : nullptr;
-
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_channel) != input->dimension(idx_channel));
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
@@ -491,17 +473,12 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
}
- if(act_info.enabled())
- {
- ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
- }
-
unsigned int mat_weights_cols = weights->dimension(idx_kernels);
- unsigned int mat_weights_rows = weights->dimension(idx_width) * weights->dimension(idx_height) * weights->dimension(idx_channel) + bias_element;
+ unsigned int mat_weights_rows = weights->dimension(idx_width) * weights->dimension(idx_height) * weights->dimension(idx_channel);
// Output tensor auto inizialization if not yet initialized
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, biases_to_use, nullptr));
- weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col)), 1, data_type);
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, nullptr, nullptr));
+ weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, append_bias), 1, data_type);
weights_reshaped_info.set_quantization_info(weights->quantization_info());
weights_to_use = &weights_reshaped_info;
@@ -521,11 +498,6 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation));
gemm_input_to_use = &im2col_reshaped_info;
}
- else if(append_bias)
- {
- // Validate add bias kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(output, biases, output, ConvertPolicy::SATURATE));
- }
// Create temporary GEMM output tensor in case we cannot skip col2im
if(!skip_col2im)
@@ -549,12 +521,6 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h)));
}
- //Validate Activation Layer
- if(is_activation_enabled)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
- }
-
return Status{};
}
@@ -583,11 +549,6 @@ void NEGEMMConvolutionLayer::run()
_mm_gemm.run();
}
- if(_skip_im2col && _append_bias)
- {
- NEScheduler::get().schedule(&_add_bias_kernel, Window::DimY);
- }
-
// Reshape output matrix
if(!_skip_col2im)
{
@@ -600,11 +561,6 @@ void NEGEMMConvolutionLayer::run()
_reshape_layer.run();
}
}
-
- if(_is_activationlayer_enabled)
- {
- _activationlayer_function.run();
- }
}
void NEGEMMConvolutionLayer::prepare()