aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index d5f37f32ce..8db85edc62 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -260,6 +260,14 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
build_opts.add_option_if(is_quantized_per_channel, "-DPER_CHANNEL_QUANTIZATION");
build_opts.add_option_if(is_dot8_supported, "-DIS_DOT8");
+ // Compute non-per-channel multiplier and shift anyway to make OpenCL kernel simpler
+ float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
+ build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
+ build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
+
if(act_info.enabled())
{
const int a_val = quantize_qasymm8(act_info.a(), oq_info);