aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp')
-rw-r--r--src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp95
1 files changed, 91 insertions, 4 deletions
diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
index 92ad93e4a7..c564e22d46 100644
--- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
@@ -84,6 +84,48 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> get_qasymm8_convolver(int kern
}
}
+std::unique_ptr<depthwise::IDepthwiseConvolution> get_qsymm8_perchannel_convolver(int kernel_size, int stride_x,
+ int n_batches, int in_rows, int in_cols, int n_channels,
+ neon_convolution_kernels::ActivationFunction activation,
+ const qsymm8::QSymm8PerChannelParams &wqinfo, const qasymm8::QAsymm8Params &iqinfo, const qasymm8::QAsymm8Params &oqinfo,
+ const qsymm8::QSymm8PerChannelRescaleParams &rescale_params,
+ int padding_top, int padding_left, int padding_bottom, int padding_right)
+{
+ switch(kernel_size)
+ {
+ case 3:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 3, 3, 1, 1>>(
+ n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 3, 3, 2, 2>>(
+ n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ case 5:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 5, 5, 1, 1>>(
+ n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::QSymm8HybridPerChannelDepthwiseConvolution<2, 2, 5, 5, 2, 2>>(
+ n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ default:
+ return nullptr;
+ }
+}
+
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp16_convolver(int kernel_size, int stride_x,
int n_batches, int in_rows, int in_cols, int n_channels,
@@ -187,6 +229,9 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
const int padding_bottom = conv_info.pad_bottom();
const int padding_right = conv_info.pad_right();
+ const bool is_uniform_quantized = (data_type == DataType::QASYMM8) && (weights->info()->data_type() == DataType::QASYMM8);
+ const bool is_perchannel_quantized = (data_type == DataType::QASYMM8) && (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL);
+
const unsigned int stride_x = conv_info.stride().first;
const unsigned int kernel_size = weights->info()->tensor_shape().y();
@@ -202,7 +247,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
}
// Create quantized convolver
- if(data_type == DataType::QASYMM8)
+ if(is_uniform_quantized)
{
const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform();
const UniformQuantizationInfo weights_qinfo = weights->info()->quantization_info().uniform();
@@ -226,6 +271,40 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
return get_qasymm8_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation,
wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
}
+ else if(is_perchannel_quantized)
+ {
+ const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform();
+ const QuantizationInfo weights_qinfo = weights->info()->quantization_info();
+ const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform();
+
+ // Check that quantization info are in the range [0, 255]
+ ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255);
+ ARM_COMPUTE_ERROR_ON(output_qinfo.offset < 0 || output_qinfo.offset > 255);
+ const qasymm8::QAsymm8Params iqinfo{ static_cast<uint8_t>(input_qinfo.offset), input_qinfo.scale };
+ const qsymm8::QSymm8PerChannelParams wqinfo{ weights_qinfo.scale() };
+ const qasymm8::QAsymm8Params oqinfo{ static_cast<uint8_t>(output_qinfo.offset), output_qinfo.scale };
+
+ // Calculate rescale parameters
+ std::vector<float> fmultipliers;
+ std::vector<int> qmultipliers;
+ std::vector<int> qshifts;
+
+ for(auto const s : wqinfo.scales)
+ {
+ const float fmultipler = iqinfo.scale * s / oqinfo.scale;
+ int qmultiplier = 0;
+ int qshift = 0;
+ quantization::calculate_quantized_multiplier_less_than_one(fmultipler, &qmultiplier, &qshift);
+ fmultipliers.push_back(fmultipler);
+ qmultipliers.push_back(qmultiplier);
+ qshifts.push_back(qshift);
+ }
+
+ qsymm8::QSymm8PerChannelRescaleParams rescale_params(qshifts, qmultipliers, fmultipliers);
+
+ return get_qsymm8_perchannel_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, activation,
+ wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ }
else
{
// Create float convolver
@@ -328,7 +407,10 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ if(weights->data_type() != DataType::QSYMM8_PER_CHANNEL)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ }
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
// Validate convolver
@@ -378,7 +460,7 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso
// Check data type
const DataType data_type = weights->data_type();
- bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type);
+ bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type) || data_type == DataType::QSYMM8_PER_CHANNEL;
// Check weighs size
std::set<unsigned int> supported_kernel_sizes = { 3, 5 };
@@ -402,7 +484,12 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso
bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0);
bool supported_padding = is_same_padding || is_valid_padding;
// TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
- bool is_dilation_supported = (dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1);
+ bool is_dilation_supported = ((dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1));
+
+ if(data_type == DataType::QSYMM8_PER_CHANNEL)
+ {
+ is_dilation_supported = is_dilation_supported && (dilation == Size2D(1U, 1U));
+ }
return is_data_type_valid && weights_supported && supported_strides && supported_padding && (depth_multiplier == 1) && is_dilation_supported;
}