aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-23 10:53:10 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-11-28 10:02:15 +0000
commit14cbfb2921990d8bf125231e350e2ac8dcd95a8b (patch)
tree9bec073d72c44c480c8807601889481d9b89ee7e /src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
parented7b27dd7cbdae57b880029840ad0235523848e0 (diff)
downloadComputeLibrary-14cbfb2921990d8bf125231e350e2ac8dcd95a8b.tar.gz
COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL
Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2158 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/depthwise_convolution_quantized.cl')
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution_quantized.cl146
1 files changed, 106 insertions, 40 deletions
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
index 08358755b1..ac1406b6d8 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
@@ -47,7 +47,7 @@
#define VEC_TYPE(size) VEC_DATA_TYPE(DATA_TYPE, size)
-#if defined(WEIGHTS_OFFSET) && defined(INPUT_OFFSET) && defined(K_OFFSET) && (defined(OUTPUT_OFFSET) || defined(REAL_MULTIPLIER))
+#if defined(WEIGHTS_OFFSET) && defined(INPUT_OFFSET) && defined(K_OFFSET) && ((defined(OUTPUT_OFFSET) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)) || defined(REAL_MULTIPLIER))
#if defined(WEIGHTS_PROMOTED_TYPE)
#define VEC_WEIGHTS_PROMOTED_TYPE(size) VEC_DATA_TYPE(WEIGHTS_PROMOTED_TYPE, size)
@@ -224,9 +224,6 @@ __kernel void dwc_3x3_native_quantized8_nchw(
#if defined(PER_CHANNEL_QUANTIZATION)
const int output_multiplier = *((__global int *)vector_offset(&output_multipliers, channel));
const int output_shift = *((__global int *)vector_offset(&output_shifts, channel));
-#else // defined(PER_CHANNEL_QUANTIZATION)
- const int output_multiplier = *((__global int *)vector_offset(&output_multipliers, 0));
- const int output_shift = *((__global int *)vector_offset(&output_shifts, 0));
#endif // defined(PER_CHANNEL_QUANTIZATION)
int8 values0 = 0;
@@ -335,7 +332,17 @@ __kernel void dwc_3x3_native_quantized8_nchw(
#else // defined(REAL_MULTIPLIER)
- values0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values0, output_multiplier, output_shift, 8);
+#if defined(PER_CHANNEL_QUANTIZATION)
+ int8 res0_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values0, output_multiplier, output_shift, 8);
+ int8 res0_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values0, output_multiplier, output_shift, 8);
+ values0 = select(res0_shift_lt0, res0_shift_gt0, (int8)(output_shift) >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ values0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#else // OUTPUT_SHIFT < 0
+ values0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#endif // OUTPUT_OFFSET < 0
+#endif // defined(PER_CHANNEL_QUANTIZATION)
#endif // defined(REAL_MULTIPLIER)
@@ -351,7 +358,17 @@ __kernel void dwc_3x3_native_quantized8_nchw(
#else // defined(REAL_MULTIPLIER)
- values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, output_multiplier, output_shift, 8);
+#if defined(PER_CHANNEL_QUANTIZATION)
+ int8 res1_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values1, output_multiplier, output_shift, 8);
+ int8 res1_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, output_multiplier, output_shift, 8);
+ values1 = select(res1_shift_lt0, res1_shift_gt0, (int8)(output_shift) >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#else // OUTPUT_SHIFT < 0
+ values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#endif // OUTPUT_OFFSET < 0
+#endif // defined(PER_CHANNEL_QUANTIZATION)
#endif // defined(REAL_MULTIPLIER)
@@ -667,7 +684,17 @@ __kernel void dwc_3x3_native_quantized8_dot8_nchw(
#else // defined(REAL_MULTIPLIER)
- values0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values0, output_multiplier, output_shift, 8);
+#if defined(PER_CHANNEL_QUANTIZATION)
+ int8 res0_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values0, output_multiplier, output_shift, 8);
+ int8 res0_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values0, output_multiplier, output_shift, 8);
+ values0 = select(res0_shift_lt0, res0_shift_gt0, (int8)(output_shift) >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ values0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#else // OUTPUT_SHIFT < 0
+ values0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#endif // OUTPUT_OFFSET < 0
+#endif // defined(PER_CHANNEL_QUANTIZATION)
#endif // defined(REAL_MULTIPLIER)
@@ -684,7 +711,17 @@ __kernel void dwc_3x3_native_quantized8_dot8_nchw(
#else // defined(REAL_MULTIPLIER)
- values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, output_multiplier, output_shift, 8);
+#if defined(PER_CHANNEL_QUANTIZATION)
+ int8 res1_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values1, output_multiplier, output_shift, 8);
+ int8 res1_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, output_multiplier, output_shift, 8);
+ values1 = select(res1_shift_lt0, res1_shift_gt0, (int8)(output_shift) >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(values1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#else // OUTPUT_SHIFT < 0
+ values1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(values1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#endif // OUTPUT_OFFSET < 0
+#endif // defined(PER_CHANNEL_QUANTIZATION)
#endif // defined(REAL_MULTIPLIER)
@@ -943,17 +980,23 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc(
acc = CONVERT(round(CONVERT(acc, VEC_FLOAT) * (VEC_FLOAT)REAL_MULTIPLIER), VEC_INT);
#else // defined(REAL_MULTIPLIER)
+
#if defined(PER_CHANNEL_QUANTIZATION)
Vector output_multipliers = CONVERT_TO_VECTOR_STRUCT(output_multipliers);
Vector output_shifts = CONVERT_TO_VECTOR_STRUCT(output_shifts);
VEC_INT output_multiplier = VLOAD(VEC_SIZE)(0, (__global int *)output_multipliers.ptr);
VEC_INT output_shift = VLOAD(VEC_SIZE)(0, (__global int *)output_shifts.ptr);
-#else // defined(PER_CHANNEL_QUANTIZATION)
- const int output_multiplier = *((__global int *)(output_multipliers_ptr + output_multipliers_offset_first_element_in_bytes));
- const int output_shift = *((__global int *)(output_shifts_ptr + output_shifts_offset_first_element_in_bytes));
-#endif // defined(PER_CHANNEL_QUANTIZATION)
- acc = asymm_mult_by_quant_multiplier_less_than_one(acc, output_multiplier, output_shift);
+ VEC_INT res_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc, output_multiplier, output_shift, VEC_SIZE);
+ VEC_INT res_shift_gt0 = asymm_mult_by_quant_multiplier_less_than_one(acc, output_multiplier, output_shift);
+ acc = select(res_shift_lt0, res_shift_gt0, output_shift >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ acc = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, VEC_SIZE);
+#else // OUTPUT_SHIFT < 0
+ acc = asymm_mult_by_quant_multiplier_less_than_one(acc, OUTPUT_MULTIPLIER, OUTPUT_SHIFT);
+#endif // OUTPUT_SHIFT < 0
+#endif // defined(PER_CHANNEL_QUANTIZATION)
#endif // defined(REAL_MULTIPLIER)
@@ -1255,15 +1298,32 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc(
Vector output_shifts = CONVERT_TO_VECTOR_STRUCT(output_shifts);
VEC_INT output_multiplier = VLOAD(VEC_SIZE)(0, (__global int *)output_multipliers.ptr);
VEC_INT output_shift = VLOAD(VEC_SIZE)(0, (__global int *)output_shifts.ptr);
-#else // defined(PER_CHANNEL_QUANTIZATION)
- const int output_multiplier = *((__global int *)(output_multipliers_ptr + output_multipliers_offset_first_element_in_bytes));
- const int output_shift = *((__global int *)(output_shifts_ptr + output_shifts_offset_first_element_in_bytes));
-#endif // defined(PER_CHANNEL_QUANTIZATION)
- acc0 = asymm_mult_by_quant_multiplier_less_than_one(acc0, output_multiplier, output_shift);
- acc1 = asymm_mult_by_quant_multiplier_less_than_one(acc1, output_multiplier, output_shift);
- acc2 = asymm_mult_by_quant_multiplier_less_than_one(acc2, output_multiplier, output_shift);
- acc3 = asymm_mult_by_quant_multiplier_less_than_one(acc3, output_multiplier, output_shift);
+ res0_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc0, output_multiplier, output_shift, VEC_SIZE);
+ res1_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc1, output_multiplier, output_shift, VEC_SIZE);
+ res2_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc2, output_multiplier, output_shift, VEC_SIZE);
+ res3_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc3, output_multiplier, output_shift, VEC_SIZE);
+ res0_shift_gt0 = asymm_mult_by_quant_multiplier_less_than_one(acc0, output_multiplier, output_shift);
+ res1_shift_gt0 = asymm_mult_by_quant_multiplier_less_than_one(acc1, output_multiplier, output_shift);
+ res2_shift_gt0 = asymm_mult_by_quant_multiplier_less_than_one(acc2, output_multiplier, output_shift);
+ res3_shift_gt0 = asymm_mult_by_quant_multiplier_less_than_one(acc3, output_multiplier, output_shift);
+ acc0 = select(res0_shift_lt0, res0_shift_gt0, output_shift >= 0);
+ acc1 = select(res1_shift_lt0, res1_shift_gt0, output_shift >= 0);
+ acc2 = select(res2_shift_lt0, res2_shift_gt0, output_shift >= 0);
+ acc3 = select(res3_shift_lt0, res3_shift_gt0, output_shift >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ acc0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc0, output_multiplier, output_shift, VEC_SIZE);
+ acc1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc1, output_multiplier, output_shift, VEC_SIZE);
+ acc2 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc2, output_multiplier, output_shift, VEC_SIZE);
+ acc3 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc3, output_multiplier, output_shift, VEC_SIZE);
+#else // OUTPUT_SHIFT < 0
+ acc0 = asymm_mult_by_quant_multiplier_less_than_one(acc0, output_multiplier, output_shift);
+ acc1 = asymm_mult_by_quant_multiplier_less_than_one(acc1, output_multiplier, output_shift);
+ acc2 = asymm_mult_by_quant_multiplier_less_than_one(acc2, output_multiplier, output_shift);
+ acc3 = asymm_mult_by_quant_multiplier_less_than_one(acc3, output_multiplier, output_shift);
+#endif // OUTPUT_SHIFT < 0
+#endif // defined(PER_CHANNEL_QUANTIZATION)
#endif // defined(REAL_MULTIPLIER)
@@ -1375,7 +1435,7 @@ __kernel void dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc(
int z = get_global_id(2) % (int)DST_DEPTH; // spatial coordinate y
int b = get_global_id(2) / (int)DST_DEPTH; // batch
#else // defined(DST_DEPTH)
- int z = get_global_id(2); // spatial coordinate y
+ int z = get_global_id(2); // spatial coordinate y
#endif // defined(DST_DEPTH)
__global uchar *weights_addr = weights_ptr + weights_offset_first_element_in_bytes + x * weights_stride_y;
@@ -1383,7 +1443,7 @@ __kernel void dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc(
#if defined(DST_DEPTH)
__global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * VEC_SIZE + b * src_stride_w;
#else /* defined(DST_DEPTH) */
- __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * VEC_SIZE;
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * VEC_SIZE;
#endif /* defined(DST_DEPTH) */
int z_coord = 0;
@@ -1519,11 +1579,14 @@ __kernel void dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc(
acc1 = CONVERT(round(CONVERT(acc1, VEC_FLOAT) * (VEC_FLOAT)REAL_MULTIPLIER), VEC_INT);
#else // defined(REAL_MULTIPLIER)
- const int output_multiplier = *((__global int *)(output_multipliers_ptr + output_multipliers_offset_first_element_in_bytes));
- const int output_shift = *((__global int *)(output_shifts_ptr + output_shifts_offset_first_element_in_bytes));
- acc0 = asymm_mult_by_quant_multiplier_less_than_one(acc0, output_multiplier, output_shift);
- acc1 = asymm_mult_by_quant_multiplier_less_than_one(acc1, output_multiplier, output_shift);
+#if OUTPUT_SHIFT < 0
+ acc0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, VEC_SIZE);
+ acc1 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(acc1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, VEC_SIZE);
+#else // OUTPUT_SHIFT < 0
+ acc0 = asymm_mult_by_quant_multiplier_less_than_one(acc0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT);
+ acc1 = asymm_mult_by_quant_multiplier_less_than_one(acc1, OUTPUT_MULTIPLIER, OUTPUT_SHIFT);
+#endif // OUTPUT_SHIFT < 0
#endif // defined(REAL_MULTIPLIER)
acc0 += (VEC_INT)OUTPUT_OFFSET;
@@ -1553,9 +1616,9 @@ __kernel void dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc(
#endif // defined(WEIGHTS_PROMOTED_TYPE)
-#endif // defined(WEIGHTS_OFFSET) && defined(INPUT_OFFSET) && defined(K_OFFSET) && (defined(OUTPUT_OFFSET) || defined(REAL_MULTIPLIER))
+#endif // defined(WEIGHTS_OFFSET) && defined(INPUT_OFFSET) && defined(K_OFFSET) && ((defined(OUTPUT_OFFSET) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)) || defined(REAL_MULTIPLIER))
-#if defined(SRC_DIM1) && defined(SRC_DIM2) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(N0) && defined(DILATION_X) && defined(DILATION_Y) && defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) && defined(CONV_PAD_LEFT) && defined(CONV_PAD_TOP) && defined(INPUT_OFFSET) && defined(WEIGHTS_OFFSET) && defined(OUTPUT_OFFSET)
+#if defined(SRC_DIM1) && defined(SRC_DIM2) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(N0) && defined(DILATION_X) && defined(DILATION_Y) && defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) && defined(CONV_PAD_LEFT) && defined(CONV_PAD_TOP) && defined(INPUT_OFFSET) && defined(WEIGHTS_OFFSET) && defined(OUTPUT_OFFSET) && defined(OUTPUT_SHIFT) && defined(OUTPUT_MULTIPLIER)
/** This function computes the depthwise convolution for NHWC data layout. This kernel assumes that the weights tensor is NOT reshaped
*
* @note The number of elements processed must be passed at compile time using -DN0 (e.g. -DN0=2)
@@ -1630,7 +1693,7 @@ __kernel void dwc_MxN_native_quantized8_nhwc(
int z = get_global_id(2) % (int)DST_DEPTH; // spatial coordinate y
int b = get_global_id(2) / (int)DST_DEPTH; // batch
#else // defined(DST_DEPTH)
- int z = get_global_id(2); // spatial coordinate y
+ int z = get_global_id(2); // spatial coordinate y
#endif // defined(DST_DEPTH)
__global uchar *s_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) * (int)N0;
@@ -1646,12 +1709,6 @@ __kernel void dwc_MxN_native_quantized8_nhwc(
#if defined(PER_CHANNEL_QUANTIZATION)
__global uchar *out_mul_addr = output_multipliers_ptr + output_multipliers_offset_first_element_in_bytes + x * sizeof(int) * (int)DEPTH_MULTIPLIER * (int)N0;
__global uchar *out_shift_addr = output_shifts_ptr + output_shifts_offset_first_element_in_bytes + x * sizeof(int) * (int)DEPTH_MULTIPLIER * (int)N0;
-
- VEC_INT output_multiplier = (VEC_INT)0;
- VEC_INT output_shift = (VEC_INT)0;
-#else // defined(PER_CHANNEL_QUANTIZATION)
- const int output_multiplier = *((__global int *)(output_multipliers_ptr + output_multipliers_offset_first_element_in_bytes));
- const int output_shift = *((__global int *)(output_shifts_ptr + output_shifts_offset_first_element_in_bytes));
#endif // defined(PER_CHANNEL_QUANTIZATION)
#if defined(DST_DEPTH)
@@ -1700,11 +1757,20 @@ __kernel void dwc_MxN_native_quantized8_nhwc(
#endif // defined(HAS_BIAS)
#if defined(PER_CHANNEL_QUANTIZATION)
- output_multiplier = VLOAD(N0)(0, (__global int *)(out_mul_addr));
- output_shift = VLOAD(N0)(0, (__global int *)(out_shift_addr));
+ VEC_INT output_multiplier = VLOAD(N0)(0, (__global int *)(out_mul_addr));
+ VEC_INT output_shift = VLOAD(N0)(0, (__global int *)(out_shift_addr));
+
+ VEC_INT res_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(res, output_multiplier, output_shift, N0);
+ VEC_INT res_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(res, output_multiplier, output_shift, N0);
+ res = select(res_shift_lt0, res_shift_gt0, (VEC_INT)(output_shift) >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+#if OUTPUT_SHIFT < 0
+ res = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(res, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, N0);
+#else // OUTPUT_SHIFT < 0
+ res = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(res, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, N0);
+#endif // OUTPUT_OFFSET < 0
#endif // defined(PER_CHANNEL_QUANTIZATION)
- res = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(res, output_multiplier, output_shift, N0);
res += (VEC_INT)OUTPUT_OFFSET;
VEC_TYPE(VEC_SIZE)
@@ -1726,5 +1792,5 @@ __kernel void dwc_MxN_native_quantized8_nhwc(
}
#endif // DEPTH_MULTIPLIER > 1
}
-#endif // defined(SRC_DIM1) && defined(SRC_DIM2) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defiend(N0) && defined(DILATION_X) && defined(DILATION_Y) && defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) && defined(CONV_PAD_LEFT) && defined(CONV_PAD_TOP) && defined(INPUT_OFFSET) && defined(WEIGHTS_OFFSET) && defined(OUTPUT_OFFSET)
+#endif // defined(SRC_DIM1) && defined(SRC_DIM2) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defiend(N0) && defined(DILATION_X) && defined(DILATION_Y) && defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) && defined(CONV_PAD_LEFT) && defined(CONV_PAD_TOP) && defined(INPUT_OFFSET) && defined(WEIGHTS_OFFSET) && defined(OUTPUT_OFFSET) && defined(OUTPUT_SHIFT) && defined(OUTPUT_MULTIPLIER)
#endif // defined(DATA_TYPE) && defined(WEIGHTS_TYPE)