aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-02-19 12:46:29 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commit9be0c5a71cd7c1f08b65c48e53d083c59dd093a3 (patch)
treef9029878deaa3714dbded630bf3c48eb9af06623
parent287b570b86ba40a801136aded140b83435ca9314 (diff)
downloadComputeLibrary-9be0c5a71cd7c1f08b65c48e53d083c59dd093a3.tar.gz
COMPMID-936: Convolution failure in NEON Convolution Layer.
Change-Id: I68a98eff57c8db719a501b68541666e8bc5f2081 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121180 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h1
-rw-r--r--src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp2
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp18
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp6
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp6
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp12
10 files changed, 37 insertions, 18 deletions
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 26384651f1..e51c6bbe98 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -24,6 +24,7 @@
#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H__
#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H__
+#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Utils.h"
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index 8467b39910..9851475928 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -69,7 +69,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu
build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width));
build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height));
build_opts.add_option_if(has_bias, "-DHAS_BIAS");
- build_opts.add_option_if_else(is_data_type_quantized(input->info()->data_type()),
+ build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()),
"-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset),
"-DPAD_VALUE=0");
diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
index ae35bf64aa..83fc168f45 100644
--- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
@@ -50,7 +50,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso
output_shape.set(2, input->info()->tensor_shape()[0] / (conv_w * conv_h));
// Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
index cbc281b6ac..f23ecf3ad0 100644
--- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
@@ -144,7 +144,7 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
- // Auto-initialize output output if required
+ // Auto-initialize output if required
if(output != nullptr)
{
// Work out expected output data type
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index 92383d9f15..dad4fee837 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -272,8 +272,8 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic()
-conv_pad_top,
(num_x_steps - 1) * input_num_elems_processed + num_elems_read_per_iteration,
_input->info()->tensor_shape().y() + conv_pad_bottom);
- AccessWindowStatic weights_access(_weights->info(), 0, 0, _weights->info()->dimension(0), _weights->info()->dimension(1));
- AccessWindowStatic output_access(_output->info(), 0, 0, num_x_steps * _num_elems_written_per_iteration, output_shape.y());
+ AccessWindowStatic weights_access(_weights->info(), 0, 0, _weights->info()->dimension(0), _weights->info()->dimension(1));
+ AccessWindowHorizontal output_access(_output->info(), 0, _num_elems_written_per_iteration);
update_window_and_padding(win, input_access, weights_access, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(), _output->info()->tensor_shape()));
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
index 3f33c43b59..08d8f8ce56 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
@@ -451,8 +451,10 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const
break;
}
case DataType::S32:
+ {
_func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>;
break;
+ }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index fcf3969515..84bfb943fc 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -25,12 +25,14 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;
using namespace arm_compute::misc;
+using namespace arm_compute::misc::shape_calculator;
CLDepthwiseConvolutionLayer3x3::CLDepthwiseConvolutionLayer3x3()
: _kernel(), _border_handler()
@@ -81,9 +83,12 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
bool append_bias = (biases != nullptr) && !_is_quantized;
const GPUTarget gpu_target = CLScheduler::get().target();
- unsigned int conv_w = 0;
- unsigned int conv_h = 0;
- std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info);
+ // Calculate output shape
+ TensorShape dwc_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info);
+
+ // Output width and height
+ const unsigned int conv_w = dwc_output_shape.x();
+ const unsigned int conv_h = dwc_output_shape.y();
// Set up intermediate tensors
const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0);
@@ -112,15 +117,18 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
_v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out));
_v2mm_kernel.set_target(gpu_target);
_v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
+ _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(dwc_output_shape));
_vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output, conv_w, conv_h);
// Output staged configuration
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+
+ float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _output_stage_kernel.configure(&_output_reshaped, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _output_stage_kernel.configure(&_output_reshaped, biases, output, output_multiplier, output_shift, output_quant_info.offset);
_output_reshaped.allocator()->allocate();
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index c4cfe1e24c..60e1bde4e2 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -279,11 +279,13 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
// Configure output stage for quantized case
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+
+ float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
_memory_group.manage(&_tmp_output);
- _gemmlowp_output_stage.configure(&_gemm_output, biases, &_tmp_output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemm_output, biases, &_tmp_output, output_multiplier, output_shift, output_quant_info.offset);
}
// Configure Col2Im
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index f790f6a95f..335267522b 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -450,11 +450,13 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights,
// Configure output stage for quantized case
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+
+ float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
_memory_group.manage(&_tmp_output);
- _gemmlowp_output_stage.configure(&_gemm_output, biases, &_tmp_output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _gemmlowp_output_stage.configure(&_gemm_output, biases, &_tmp_output, output_multiplier, output_shift, output_quant_info.offset);
}
// Configure Col2Im
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index d35e3e6026..95fcf8805e 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -99,10 +99,12 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
{
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+
+ float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _output_stage_kernel.configure(&_accumulator, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _output_stage_kernel.configure(&_accumulator, biases, output, output_multiplier, output_shift, output_quant_info.offset);
_accumulator.allocator()->allocate();
}
else
@@ -208,10 +210,12 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
// Output staged configuration
if(_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+ const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+
+ float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
- _output_stage_kernel.configure(&_output_reshaped, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+ _output_stage_kernel.configure(&_output_reshaped, biases, output, output_multiplier, output_shift, output_quant_info.offset);
_output_reshaped.allocator()->allocate();
}