aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp45
1 files changed, 35 insertions, 10 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
index 5f42450b9f..208d06d7cd 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
@@ -33,6 +33,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
using namespace arm_compute;
@@ -48,14 +49,22 @@ BorderSize CLDepthwiseConvolution3x3Kernel::border_size() const
void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
if(biases != nullptr)
{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
+ if(is_data_type_quantized_asymmetric(weights->info()->data_type()))
+ {
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
+ }
ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2));
ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
}
@@ -80,13 +89,12 @@ void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, const IC
// Set build options
ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3);
- std::set<std::string> options{ "-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x) };
- if(_biases != nullptr)
- {
- options.emplace("-DHAS_BIAS");
- }
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x));
+ build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_convolution_3x3", options));
+ std::string kernel_name = is_data_type_quantized_asymmetric(_input->info()->data_type()) ? "depthwise_convolution_3x3_quantized" : "depthwise_convolution_3x3";
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel window
const unsigned int num_elems_processed_per_iteration = 2;
@@ -105,6 +113,23 @@ void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, const IC
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
ICLKernel::configure(win);
+
+ // Set static arguments
+ if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
+ {
+ float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+ quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+ unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0);
+
+ _kernel.setArg(idx++, -_input->info()->quantization_info().offset);
+ _kernel.setArg(idx++, -_weights->info()->quantization_info().offset);
+ _kernel.setArg(idx++, _output->info()->quantization_info().offset);
+ _kernel.setArg(idx++, output_multiplier);
+ _kernel.setArg(idx++, output_shift);
+ }
}
void CLDepthwiseConvolution3x3Kernel::run(const Window &window, cl::CommandQueue &queue)