diff options
author | Gian Marco <gianmarco.iodice@arm.com> | 2017-12-12 10:08:38 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:33 +0000 |
commit | bfa3b52de2cfbd330efc19e2096134a20c645406 (patch) | |
tree | 30812054cbeaa87a268bb21174402d3b2ec199d4 /src | |
parent | 397252889a2d7e7d9d241ee9dcecff3edf2bcff7 (diff) | |
download | ComputeLibrary-bfa3b52de2cfbd330efc19e2096134a20c645406.tar.gz |
COMPMID-556 - Fix examples
- Fixed data type issue in cl_sgemm
- Added support for NEON and OpenCL targets in graph examples. Before we
could run only OpenCL target
- Add auto_init() in NEDepthwiseVectorToTensorKernel
Change-Id: I4410ce6f4992b2375b980634fe55f1083cf3c471
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112850
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp | 11 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLConvolutionLayer.cpp | 15 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEConvolutionLayer.cpp | 2 |
3 files changed, 23 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp index 6deda506ab..9b36df3c39 100644 --- a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp @@ -45,6 +45,17 @@ NEDepthwiseVectorToTensorKernel::NEDepthwiseVectorToTensorKernel() void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + TensorShape output_shape = input->info()->tensor_shape(); + output_shape.set(0, conv_w); + output_shape.set(1, conv_h); + output_shape.set(2, input->info()->tensor_shape()[0] / (conv_w * conv_h)); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 66548d19b2..d628bf93ce 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -222,7 +222,10 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig shape_im2col.set(0, mat_input_cols); shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); - _input_im2col_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); + // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. + TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position()); + im2col_reshaped_info.set_quantization_info(input->info()->quantization_info()); + _input_im2col_reshaped.allocator()->init(im2col_reshaped_info); _memory_group.manage(&_input_im2col_reshaped); // Create tensor (interleave) to prepare input tensor for GEMM @@ -231,7 +234,10 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig TensorShape shape_interleaved = shape_im2col; shape_interleaved.set(0, shape_interleaved.x() * 4); shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f)); - _input_interleaved_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_interleaved)); + // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. + TensorInfo interleaved_info(shape_interleaved, 1, dt, input->info()->fixed_point_position()); + interleaved_info.set_quantization_info(input->info()->quantization_info()); + _input_interleaved_reshaped.allocator()->init(interleaved_info); _memory_group.manage(&_input_interleaved_reshaped); } @@ -241,8 +247,9 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig shape_gemm.set(1, mat_input_rows); const DataType gemm_data_type = _is_quantized ? DataType::S32 : dt; // GEMM output should be S32 for acquiring raw integer accumulator without quantized postprocessing for quantized asymmetric input. - TensorInfo info_gemm(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_gemm).set_data_type(gemm_data_type).set_quantization_info( - output->info()->quantization_info())); + // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. + TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position()); + info_gemm.set_quantization_info(output->info()->quantization_info()); _gemm_output.allocator()->init(info_gemm); _memory_group.manage(&_gemm_output); diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 865672e525..2717bbfabc 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -267,7 +267,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, // Configure matrix multiplication kernel if(_is_fully_connected_convolution) { - _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace, 1.f, 0.f, false, false); + _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace, 1.f, 0.f, true, false); } else { |