From 2bbd96457e3740fd9df5556607514b5e80a25720 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 4 Jul 2017 16:46:32 +0100 Subject: COMPMID-436, COMPMID-437 - Port NEConvolutionLayer & NEFullyConnectedLayer to support 16 bit fixed point Change-Id: I69edf2dac242f941bac95c8479d921e7be6abca7 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79725 Tested-by: Kaizen Reviewed-by: Pablo Tello --- src/core/NEON/kernels/NECol2ImKernel.cpp | 3 ++- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 19 +++++++++++++++++-- src/core/NEON/kernels/NEIm2ColKernel.cpp | 21 ++++++++++++++++++--- src/core/NEON/kernels/NETransposeKernel.cpp | 3 ++- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 12 ++++++------ src/runtime/NEON/functions/NEConvolutionLayer.cpp | 5 ++--- .../NEON/functions/NEFullyConnectedLayer.cpp | 7 +++---- 7 files changed, 50 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index e9a73607e6..95a9364082 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -69,7 +69,8 @@ NECol2ImKernel::NECol2ImKernel() void NECol2ImKernel::configure(const ITensor *input, ITensor *output, std::pair convolved_dims) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape = input->info()->tensor_shape(); diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index 7a3bae50c0..826a386557 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -45,9 +45,9 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1); _biases = biases; @@ -121,6 +121,21 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window) in0_out, in1); break; } + case DataType::QS16: + { + execute_window_loop(window, [&](const Coordinates & id) + { + qint16x8x2_t accum = vld2q_s16(reinterpret_cast(in0_out.ptr())); + const qint16x8x2_t biases = vld2q_s16(reinterpret_cast(in1.ptr())); + + accum.val[0] = vqaddq_qs16(accum.val[0], biases.val[0]); + accum.val[1] = vqaddq_qs16(accum.val[1], biases.val[1]); + + vst2q_s16(reinterpret_cast(in0_out.ptr()), accum); + }, + in0_out, in1); + break; + } default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 8c9d12c57c..5bb8b1c22a 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -134,10 +134,14 @@ inline void linearize_volume(const uint8_t *const in_ptr, // Append 1 if the convolution layer has biases if(has_bias) { - if(std::is_same::value) + if(std::is_same::value) { *out_ptr = scvt_qs8_f32(1.0f, fixed_point_position); } + else if(std::is_same::value) + { + *out_ptr = scvt_qs16_f32(1.0f, fixed_point_position); + } else { *out_ptr = static_cast(1); @@ -249,10 +253,14 @@ void NEIm2ColKernel::run_reduced(const Window &window) // Add bias if(_has_bias) { - if(std::is_same::value) + if(std::is_same::value) { *(reinterpret_cast(out_ptr) + out_width - 1) = scvt_qs8_f32(1.0f, _input->info()->fixed_point_position()); } + else if(std::is_same::value) + { + *(reinterpret_cast(out_ptr) + out_width - 1) = scvt_qs16_f32(1.0f, _input->info()->fixed_point_position()); + } else { *(reinterpret_cast(out_ptr) + out_width - 1) = static_cast(1); @@ -269,8 +277,9 @@ NEIm2ColKernel::NEIm2ColKernel() void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QS8, DataType::QS16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); _input = input; _output = output; @@ -309,6 +318,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::QS8: _func = &NEIm2ColKernel::run_reduced; break; + case DataType::QS16: + _func = &NEIm2ColKernel::run_reduced; + break; default: ARM_COMPUTE_ERROR("Data type not supported"); break; @@ -329,6 +341,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::QS8: _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; + case DataType::QS16: + _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + break; default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index a990e9068e..732a0ef4f6 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -179,7 +179,8 @@ NETransposeKernel::NETransposeKernel() void NETransposeKernel::configure(const ITensor *input, ITensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QS8, DataType::U16, DataType::S16, DataType::QS16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape{ input->info()->tensor_shape() }; diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index ac688e1381..d685ec7962 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -95,7 +95,7 @@ NEWeightsReshapeKernel::NEWeightsReshapeKernel() void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias, ITensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); const int fixed_point_position = input->info()->fixed_point_position(); @@ -129,26 +129,26 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias _bias = bias; _output = output; - switch(_input->info()->data_type()) + switch(_input->info()->element_size()) { - case DataType::F32: + case 4: { _func = &weights_reshape; break; } - case DataType::F16: + case 2: { _func = &weights_reshape; break; } - case DataType::QS8: + case 1: { _func = &weights_reshape; break; } default: { - ARM_COMPUTE_ERROR_ON("Data type not supported"); + ARM_COMPUTE_ERROR_ON("Element size not supported"); break; } } diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index dc8652747f..f6481f1918 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -41,14 +41,13 @@ NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases); ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); @@ -96,7 +95,7 @@ NEConvolutionLayer::NEConvolutionLayer() void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 6e27ed344a..eb84ccaddc 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -39,7 +39,7 @@ NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights() void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); ARM_COMPUTE_ERROR_ON(output == nullptr); ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2); ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false)); @@ -196,10 +196,9 @@ void NEFullyConnectedLayer::configure_fc_fc_nb(const ITensor *input, const ITens void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2); const DataType dt = input->info()->data_type(); -- cgit v1.2.1