diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-02-19 12:46:29 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:18 +0000 |
commit | 9be0c5a71cd7c1f08b65c48e53d083c59dd093a3 (patch) | |
tree | f9029878deaa3714dbded630bf3c48eb9af06623 /src/core | |
parent | 287b570b86ba40a801136aded140b83435ca9314 (diff) | |
download | ComputeLibrary-9be0c5a71cd7c1f08b65c48e53d083c59dd093a3.tar.gz |
COMPMID-936: Convolution failure in NEON Convolution Layer.
Change-Id: I68a98eff57c8db719a501b68541666e8bc5f2081
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121180
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core')
5 files changed, 7 insertions, 5 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 8467b39910..9851475928 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -69,7 +69,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); - build_opts.add_option_if_else(is_data_type_quantized(input->info()->data_type()), + build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), "-DPAD_VALUE=0"); diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp index ae35bf64aa..83fc168f45 100644 --- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp @@ -50,7 +50,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso output_shape.set(2, input->info()->tensor_shape()[0] / (conv_w * conv_h)); // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp index cbc281b6ac..f23ecf3ad0 100644 --- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp @@ -144,7 +144,7 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons { ARM_COMPUTE_ERROR_ON_NULLPTR(input); - // Auto-initialize output output if required + // Auto-initialize output if required if(output != nullptr) { // Work out expected output data type diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index 92383d9f15..dad4fee837 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -272,8 +272,8 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic() -conv_pad_top, (num_x_steps - 1) * input_num_elems_processed + num_elems_read_per_iteration, _input->info()->tensor_shape().y() + conv_pad_bottom); - AccessWindowStatic weights_access(_weights->info(), 0, 0, _weights->info()->dimension(0), _weights->info()->dimension(1)); - AccessWindowStatic output_access(_output->info(), 0, 0, num_x_steps * _num_elems_written_per_iteration, output_shape.y()); + AccessWindowStatic weights_access(_weights->info(), 0, 0, _weights->info()->dimension(0), _weights->info()->dimension(1)); + AccessWindowHorizontal output_access(_output->info(), 0, _num_elems_written_per_iteration); update_window_and_padding(win, input_access, weights_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(), _output->info()->tensor_shape())); diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index 3f33c43b59..08d8f8ce56 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -451,8 +451,10 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const break; } case DataType::S32: + { _func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>; break; + } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { |