diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2018-11-05 14:26:32 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2018-11-08 13:31:33 +0000 |
commit | 3139f03a74ede3b3bd7cfc6ff219e6c9bc556632 (patch) | |
tree | cd67b8c9ec1688fcf739c576553b10e435e6938e /src | |
parent | 421405b6a21b124288a750e2da26dc01eb7391cb (diff) | |
download | ComputeLibrary-3139f03a74ede3b3bd7cfc6ff219e6c9bc556632.tar.gz |
COMPMID-1736: Fixed out-of-bound write in CLIm2Col
The issue was related to CLIm2Col when the number of input channels was less than
the number of elements processed by each thread.
The bug has been fixed in the validate_and_configure_window() function setting the correct number of elements accessed
in the output tensor.
Also fixed an issue GEMM3D when we have a single output channel
Change-Id: I094292d0c7662599c4a4c3916ec5f5821df5faef
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp | 4 | ||||
-rw-r--r-- | src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 4 | ||||
-rw-r--r-- | src/core/CL/kernels/CLIm2ColKernel.cpp | 2 | ||||
-rw-r--r-- | src/core/utils/quantization/AsymmHelpers.cpp | 21 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 38 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 8 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 2 |
9 files changed, 54 insertions, 33 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp index 73b1d41eb1..b2fb3e0278 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp @@ -112,7 +112,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); Window win{}; Window win_out{}; @@ -227,7 +227,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC _input1 = input1; _output = output; _reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 715edae606..5e02dda9e3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -110,7 +110,7 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. @@ -227,7 +227,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen _input1 = input1; _output = output; _reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1); + _reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 0); // In case both input and output have to be reinterpreted as 3D tensors, // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 0ba0d0e2f9..54ef23f2a2 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -109,7 +109,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen const int yin_end = input->dimension(1); const int xout_start = 0; - const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration) : output->dimension(0); + const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? output->dimension(0) + (num_elems_processed_per_iteration - input->dimension(0)) : output->dimension(0); const int yout_start = 0; const int yout_end = output->dimension(1); diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index 8bb6d8e173..ea9ba776a9 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,21 +30,30 @@ using namespace arm_compute::quantization; constexpr int64_t fixed_point_one_Q0 = (1ll << 31); +constexpr float epsilon = 0.00001f; -arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(double multiplier, +arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(float multiplier, int *quant_multiplier, int *right_shift) { ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 0); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier >= 1); - if(multiplier == 0) + ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -epsilon); + ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + epsilon); + if(std::fabs(1.0f - multiplier) < epsilon) + { + *quant_multiplier = 1; + *right_shift = 0; + return arm_compute::Status{}; + } + + if(std::fabs(0.0f - multiplier) < epsilon) { *quant_multiplier = 0; *right_shift = 0; return arm_compute::Status{}; } + const double q = std::frexp(multiplier, right_shift); *right_shift *= -1; auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0)); @@ -61,7 +70,7 @@ arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_le return arm_compute::Status{}; } -arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(double multiplier, +arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(float multiplier, int *quantized_multiplier, int *left_shift) { diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index c5637dba26..6a2aac6457 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -101,7 +101,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor else { // Configure matrix multiply kernel - _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 1, false, retain_internal_weights)); + _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 0, false, retain_internal_weights)); } } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 67f55d56e2..4825d878f8 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -276,14 +276,16 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * { const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); - float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale; - int output_multiplier, output_shift; + const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale; + int output_multiplier = 0; + int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); int min_activation = 0; int max_activation = 0; const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, + ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU }; @@ -308,7 +310,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } // Configure and tune GEMM - configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, gemmlowp_output_stage, (data_layout == DataLayout::NHWC) ? conv_h : 1); + // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix + const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; + + configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth); if(!_skip_im2col) { @@ -454,9 +459,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info(); - float multiplier = input->quantization_info().scale * weights->quantization_info().scale / output_quant_info.scale; - int output_multiplier, output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale; + int output_multiplier = 0; + int output_shift = 0; + + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); int min_activation = 0; int max_activation = 0; @@ -485,17 +492,20 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI // If the activation layer is RELU, BOUNDED_RELU or LU_BOUNDED_RELU, we can use the GEMMLowp output stage to perform this operation is_activationlayer_enabled = false; - - // Set the GEMMLowp output stage info - gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; - gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; - gemmlowp_output_stage.gemmlowp_shift = output_shift; - gemmlowp_output_stage.gemmlowp_min_bound = min_activation; - gemmlowp_output_stage.gemmlowp_max_bound = max_activation; } + + // Set the GEMMLowp output stage info + gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; + gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; + gemmlowp_output_stage.gemmlowp_shift = output_shift; + gemmlowp_output_stage.gemmlowp_min_bound = min_activation; + gemmlowp_output_stage.gemmlowp_max_bound = max_activation; } - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, gemmlowp_output_stage, skip_col2im ? conv_h : 1, skip_im2col)); + // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix + const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; + + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col)); // Validate Col2Im if(!skip_col2im) diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 321ecf85d8..e8bf6732b2 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -140,7 +140,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso if(c != nullptr) { - ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 1); + ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 0); ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.reinterpret_input_as_3d()); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, c); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != c->dimension(1), "The C matrix must have the same number of rows as the matrix A"); @@ -150,7 +150,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso if(output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0)); - if(gemm_info.depth_output_gemm3d() != 1) + if(gemm_info.depth_output_gemm3d() != 0) { if(gemm_info.reinterpret_input_as_3d()) { @@ -174,7 +174,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso if(!run_optimised) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMM cannot reinterpret the input tensor as 3D"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 1, "NEGEMM cannot reinterpret the output tensor as 3D"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMM cannot reinterpret the output tensor as 3D"); // Check if the first input tensor is a vector. const bool run_vector_matrix_multiplication = a->dimension(1) < 2; diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index fc65469488..02ae1715da 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -276,7 +276,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } // Configure GEMM - configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, _skip_col2im ? conv_h : 1); + // In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix + const unsigned int gemm_3d_depth = _skip_col2im ? conv_h : 0; + configure_mm(gemm_input_to_use, &_weights_reshaped, gemm_output_to_use, gemm_3d_depth); if(!_skip_im2col) { @@ -477,7 +479,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI gemm_output_to_use = &info_gemm; } - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 1, skip_im2col)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, gemm_output_to_use, skip_col2im ? conv_h : 0, skip_im2col)); if(is_quantized) { @@ -518,7 +520,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI } // Validate output stage for quantized case - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 1); + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 0); } // Validate Col2Im/ReshapeLayer diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 80f5ab0c93..16ee3d07fd 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -198,7 +198,7 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the input tensor as 3D"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 1, "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the output tensor as 3D"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMMLowpMatrixMultiplyCore cannot reinterpret the output tensor as 3D"); int32_t a_offset = a->quantization_info().offset; int32_t b_offset = b->quantization_info().offset; |