diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp | 4 | ||||
-rw-r--r-- | src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 4 | ||||
-rw-r--r-- | src/core/CL/kernels/CLIm2ColKernel.cpp | 2 | ||||
-rw-r--r-- | src/core/utils/quantization/AsymmHelpers.cpp | 21 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 38 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 8 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 2 |
9 files changed, 54 insertions, 33 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp index 73b1d41eb1..b2fb3e0278 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp @@ -112,7 +112,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); Window win{}; Window win_out{}; @@ -227,7 +227,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC _input1 = input1; _output = output; _reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 715edae606..5e02dda9e3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -110,7 +110,7 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. @@ -227,7 +227,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen _input1 = input1; _output = output; _reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 0ba0d0e2f9..54ef23f2a2 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -109,7 +109,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen const int yin_end = input->dimension(1); const int xout_start = 0; - const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration) : output->dimension(0); + const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? output->dimension(0) + (num_elems_processed_per_iteration - input->dimension(0)) : output->dimension(0); const int yout_start = 0; const int yout_end = output->dimension(1); diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index 8bb6d8e173..ea9ba776a9 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,21 +30,30 @@ using namespace arm_compute::quantization; constexpr int64_t fixed_point_one_Q0 = (1ll << 31); +constexpr float epsilon = 0.00001f; -arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(double multiplier, +arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(float multiplier, int *quant_multiplier, int *right_shift) { ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 0); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier >= 1); - if(multiplier == 0) + ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -epsilon); + ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + epsilon); + if(std::fabs(1.0f - multiplier) < epsilon) + { + *quant_multiplier = 1; + *right_shift = 0; + return arm_compute::Status{}; + } + + if(std::fabs(0.0f - multiplier) < epsilon) { *quant_multiplier = 0; *right_shift = 0; return arm_compute::Status{}; } + const double q = std::frexp(multiplier, right_shift); *right_shift *= -1; auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0)); @@ -61,7 +70,7 @@ arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_le return arm_compute::Status{}; } -arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(double multiplier, +arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(float multiplier, int *quantized_multiplier, int *left_shift) { diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index c5637dba26..6a2aac6457 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -101,7 +101,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor else { // Configure matrix multiply kernel - _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 1, false, retain_internal_weights)); + _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 0, false, retain_internal_weights)); } } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 67f55d56e2..4825d878f8 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -276,14 +276,16 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * { const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; - int output_multiplier, output_shift; + const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale; + int output_multiplier = 0; + int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); int min_activation = 0; int max_activation = 0; const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, + ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU }; @@ -308,7 +310,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } // Configure and tune GEMM - configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, gemmlowp_output_stage, (data_layout == DataLayout::NHWC) ? conv_h : 1); + // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix + const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; + + configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth); if(!_skip_im2col) { @@ -454,9 +459,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info(); - float multiplier = input->quantization_info().scale * weights->quantization_info().scale / output_quant_info.scale; - int output_multiplier, output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale; + int output_multiplier = 0; + int output_shift = 0; + + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); int min_activation = 0; int max_activation = 0; @@ -485,17 +492,20 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI // If the activation layer is RELU, BOUNDED_RELU or LU_BOUNDED_RELU, we can use the GEMMLowp output stage to perform this operation is_activationlayer_enabled = false; - - // Set the GEMMLowp output stage info - gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; - gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; - gemmlowp_output_stage.gemmlowp_shift = output_shift; - gemmlowp_output_stage.gemmlowp_min_bound = min_activation; - gemmlowp_output_stage.gemmlowp_max_bound = max_activation; } + + // Set the GEMMLowp output stage info + gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; + gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; + gemmlowp_output_stage.gemmlowp_shift = output_shift; + gemmlowp_output_stage.gemmlowp_min_bound = min_activation; + gemmlowp_output_stage.gemmlowp_max_bound = max_activation; } - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, gemmlowp_output_stage, skip_col2im ? conv_h : 1, skip_im2col)); + // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix + const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; + + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col)); // Validate Col2Im if(!skip_col2im) diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 321ecf85d8..e8bf6732b2 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -140,7 +140,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso if(c != nullptr) { - ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 1); + ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 0); ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.reinterpret_input_as_3d()); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, c); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != c->dimension(1), "The C matrix must have the same number of rows as the matrix A"); @@ -150,7 +150,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso if(output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0)); - if(gemm_info.depth_output_gemm3d() != 1) + if(gemm_info.depth_output_gemm3d() != 0) { if(gemm_info.reinterpret_input_as_3d()) { @@ -174,7 +174,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso if(!run_optimised) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMM cannot reinterpret the input tensor as 3D"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 1, "NEGEMM cannot reinterpret the output tensor as 3D"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMM cannot reinterpret the output tensor as 3D"); // Check if the first input tensor is a vector. const bool run_vector_matrix_multiplication = a->dimension(1) < 2; diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index fc65469488..02ae1715da 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -276,7 +276,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } // Configure GEMM - configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, _skip_col2im ? conv_h : 1); + // In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix + const unsigned int gemm_3d_depth = _skip_col2im ? conv_h : 0; + configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, gemm_3d_depth); if(!_skip_im2col) { @@ -477,7 +479,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI gemm_output_to_use = &info_gemm; } - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 1, skip_im2col)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 0, skip_im2col)); if(is_quantized) { @@ -518,7 +520,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI } // Validate output stage for quantized case - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 1); + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 0); } // Validate Col2Im/ReshapeLayer diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 80f5ab0c93..16ee3d07fd 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -198,7 +198,7 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the input tensor as 3D"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 1, "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the output tensor as 3D"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the output tensor as 3D"); int32_t a_offset = a->quantization_info().offset; int32_t b_offset = b->quantization_info().offset; |