diff options
Diffstat (limited to 'src/runtime')
-rw-r--r-- | src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | 9 | ||||
-rw-r--r-- | src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp | 95 |
2 files changed, 96 insertions, 8 deletions
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 6cf7b97e66..5e47dd56ae 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -40,7 +40,10 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + if(!is_data_type_quantized_per_channel(weights->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + } ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1); const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); @@ -55,7 +58,7 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx)); } - const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); + const bool is_quantized = (!is_data_type_quantized_per_channel(weights->data_type())) && is_data_type_quantized_asymmetric(input->data_type()); if(is_quantized) { @@ -67,7 +70,6 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo ARM_COMPUTE_UNUSED(multiplier); ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f); } - if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation)) { TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); @@ -88,7 +90,6 @@ Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo { ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info)); } - return Status{}; } } // namespace diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index 92ad93e4a7..c564e22d46 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -84,6 +84,48 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> get_qasymm8_convolver(int kern } } +std::unique_ptr<depthwise::IDepthwiseConvolution> get_qsymm8_perchannel_convolver(int kernel_size, int stride_x, + int n_batches, int in_rows, int in_cols, int n_channels, + neon_convolution_kernels::ActivationFunction activation, + const qsymm8::QSymm8PerChannelParams &wqinfo, const qasymm8::QAsymm8Params &iqinfo, const qasymm8::QAsymm8Params &oqinfo, + const qsymm8::QSymm8PerChannelRescaleParams &rescale_params, + int padding_top, int padding_left, int padding_bottom, int padding_right) +{ + switch(kernel_size) + { + case 3: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 3, 3, 1, 1>>( + n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 3, 3, 2, 2>>( + n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + case 5: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 5, 5, 1, 1>>( + n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 5, 5, 2, 2>>( + n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + default: + return nullptr; + } +} + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp16_convolver(int kernel_size, int stride_x, int n_batches, int in_rows, int in_cols, int n_channels, @@ -187,6 +229,9 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor const int padding_bottom = conv_info.pad_bottom(); const int padding_right = conv_info.pad_right(); + const bool is_uniform_quantized = (data_type == DataType::QASYMM8) && (weights->info()->data_type() == DataType::QASYMM8); + const bool is_perchannel_quantized = (data_type == DataType::QASYMM8) && (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL); + const unsigned int stride_x = conv_info.stride().first; const unsigned int kernel_size = weights->info()->tensor_shape().y(); @@ -202,7 +247,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor } // Create quantized convolver - if(data_type == DataType::QASYMM8) + if(is_uniform_quantized) { const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform(); const UniformQuantizationInfo weights_qinfo = weights->info()->quantization_info().uniform(); @@ -226,6 +271,40 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor return get_qasymm8_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); } + else if(is_perchannel_quantized) + { + const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform(); + const QuantizationInfo weights_qinfo = weights->info()->quantization_info(); + const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform(); + + // Check that quantization info are in the range [0, 255] + ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255); + ARM_COMPUTE_ERROR_ON(output_qinfo.offset < 0 || output_qinfo.offset > 255); + const qasymm8::QAsymm8Params iqinfo{ static_cast<uint8_t>(input_qinfo.offset), input_qinfo.scale }; + const qsymm8::QSymm8PerChannelParams wqinfo{ weights_qinfo.scale() }; + const qasymm8::QAsymm8Params oqinfo{ static_cast<uint8_t>(output_qinfo.offset), output_qinfo.scale }; + + // Calculate rescale parameters + std::vector<float> fmultipliers; + std::vector<int> qmultipliers; + std::vector<int> qshifts; + + for(auto const s : wqinfo.scales) + { + const float fmultipler = iqinfo.scale * s / oqinfo.scale; + int qmultiplier = 0; + int qshift = 0; + quantization::calculate_quantized_multiplier_less_than_one(fmultipler, &qmultiplier, &qshift); + fmultipliers.push_back(fmultipler); + qmultipliers.push_back(qmultiplier); + qshifts.push_back(qshift); + } + + qsymm8::QSymm8PerChannelRescaleParams rescale_params(qshifts, qmultipliers, fmultipliers); + + return get_qsymm8_perchannel_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, activation, + wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + } else { // Create float convolver @@ -328,7 +407,10 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + if(weights->data_type() != DataType::QSYMM8_PER_CHANNEL) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + } ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); // Validate convolver @@ -378,7 +460,7 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso // Check data type const DataType data_type = weights->data_type(); - bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type); + bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type) || data_type == DataType::QSYMM8_PER_CHANNEL; // Check weighs size std::set<unsigned int> supported_kernel_sizes = { 3, 5 }; @@ -402,7 +484,12 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0); bool supported_padding = is_same_padding || is_valid_padding; // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported - bool is_dilation_supported = (dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1); + bool is_dilation_supported = ((dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1)); + + if(data_type == DataType::QSYMM8_PER_CHANNEL) + { + is_dilation_supported = is_dilation_supported && (dilation == Size2D(1U, 1U)); + } return is_data_type_valid && weights_supported && supported_strides && supported_padding && (depth_multiplier == 1) && is_dilation_supported; } |