aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp2
-rw-r--r--src/core/utils/quantization/AsymmHelpers.cpp21
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp2
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp38
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp6
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp8
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp2
9 files changed, 54 insertions, 33 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index 73b1d41eb1..b2fb3e0278 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -112,7 +112,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
- bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+ bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0);
Window win{};
Window win_out{};
@@ -227,7 +227,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
_input1 = input1;
_output = output;
_reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
- _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+ _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0);
// In case both input and output have to be reinterpreted as 3D tensors,
// force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index 715edae606..5e02dda9e3 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -110,7 +110,7 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
- bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+ bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0);
// In case both input and output have to be reinterpreted as 3D tensors,
// force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
@@ -227,7 +227,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
_input1 = input1;
_output = output;
_reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
- _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+ _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0);
// In case both input and output have to be reinterpreted as 3D tensors,
// force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 0ba0d0e2f9..54ef23f2a2 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -109,7 +109,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const int yin_end = input->dimension(1);
const int xout_start = 0;
- const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration) : output->dimension(0);
+ const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? output->dimension(0) + (num_elems_processed_per_iteration - input->dimension(0)) : output->dimension(0);
const int yout_start = 0;
const int yout_end = output->dimension(1);
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp
index 8bb6d8e173..ea9ba776a9 100644
--- a/src/core/utils/quantization/AsymmHelpers.cpp
+++ b/src/core/utils/quantization/AsymmHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,21 +30,30 @@
using namespace arm_compute::quantization;
constexpr int64_t fixed_point_one_Q0 = (1ll << 31);
+constexpr float epsilon = 0.00001f;
-arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(double multiplier,
+arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(float multiplier,
int *quant_multiplier,
int *right_shift)
{
ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr);
ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr);
- ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 0);
- ARM_COMPUTE_RETURN_ERROR_ON(multiplier >= 1);
- if(multiplier == 0)
+ ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -epsilon);
+ ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + epsilon);
+ if(std::fabs(1.0f - multiplier) < epsilon)
+ {
+ *quant_multiplier = 1;
+ *right_shift = 0;
+ return arm_compute::Status{};
+ }
+
+ if(std::fabs(0.0f - multiplier) < epsilon)
{
*quant_multiplier = 0;
*right_shift = 0;
return arm_compute::Status{};
}
+
const double q = std::frexp(multiplier, right_shift);
*right_shift *= -1;
auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0));
@@ -61,7 +70,7 @@ arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_le
return arm_compute::Status{};
}
-arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(double multiplier,
+arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(float multiplier,
int *quantized_multiplier,
int *left_shift)
{
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index c5637dba26..6a2aac6457 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -101,7 +101,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
else
{
// Configure matrix multiply kernel
- _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 1, false, retain_internal_weights));
+ _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 0, false, retain_internal_weights));
}
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 67f55d56e2..4825d878f8 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -276,14 +276,16 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
{
const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
- int output_multiplier, output_shift;
+ const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
int min_activation = 0;
int max_activation = 0;
const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
};
@@ -308,7 +310,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
}
// Configure and tune GEMM
- configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, gemmlowp_output_stage, (data_layout == DataLayout::NHWC) ? conv_h : 1);
+ // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
+ const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
+
+ configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth);
if(!_skip_im2col)
{
@@ -454,9 +459,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
{
const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info();
- float multiplier = input->quantization_info().scale * weights->quantization_info().scale / output_quant_info.scale;
- int output_multiplier, output_shift;
- quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+ const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
int min_activation = 0;
int max_activation = 0;
@@ -485,17 +492,20 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
// If the activation layer is RELU, BOUNDED_RELU or LU_BOUNDED_RELU, we can use the GEMMLowp output stage to perform this operation
is_activationlayer_enabled = false;
-
- // Set the GEMMLowp output stage info
- gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
- gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
- gemmlowp_output_stage.gemmlowp_shift = output_shift;
- gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
- gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
}
+
+ // Set the GEMMLowp output stage info
+ gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
+ gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
+ gemmlowp_output_stage.gemmlowp_shift = output_shift;
+ gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
+ gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
}
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, gemmlowp_output_stage, skip_col2im ? conv_h : 1, skip_im2col));
+ // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
+ const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
+
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col));
// Validate Col2Im
if(!skip_col2im)
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 321ecf85d8..e8bf6732b2 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -140,7 +140,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
if(c != nullptr)
{
- ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 0);
ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.reinterpret_input_as_3d());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, c);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != c->dimension(1), "The C matrix must have the same number of rows as the matrix A");
@@ -150,7 +150,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0));
- if(gemm_info.depth_output_gemm3d() != 1)
+ if(gemm_info.depth_output_gemm3d() != 0)
{
if(gemm_info.reinterpret_input_as_3d())
{
@@ -174,7 +174,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
if(!run_optimised)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMM cannot reinterpret the input tensor as 3D");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 1, "NEGEMM cannot reinterpret the output tensor as 3D");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMM cannot reinterpret the output tensor as 3D");
// Check if the first input tensor is a vector.
const bool run_vector_matrix_multiplication = a->dimension(1) < 2;
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index fc65469488..02ae1715da 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -276,7 +276,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
}
// Configure GEMM
- configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, _skip_col2im ? conv_h : 1);
+ // In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix
+ const unsigned int gemm_3d_depth = _skip_col2im ? conv_h : 0;
+ configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, gemm_3d_depth);
if(!_skip_im2col)
{
@@ -477,7 +479,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
gemm_output_to_use = &info_gemm;
}
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 1, skip_im2col));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 0, skip_im2col));
if(is_quantized)
{
@@ -518,7 +520,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
}
// Validate output stage for quantized case
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 1);
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 0);
}
// Validate Col2Im/ReshapeLayer
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 80f5ab0c93..16ee3d07fd 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -198,7 +198,7 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the input tensor as 3D");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 1, "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the output tensor as 3D");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the output tensor as 3D");
int32_t a_offset = a->quantization_info().offset;
int32_t b_offset = b->quantization_info().offset;