From 8c837ca85c06c53ccca20937be9dfd74d00d597a Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Tue, 7 Jan 2020 15:06:41 +0000
Subject: COMPMID-2766: Add support for QASYMM8_SIGNED in
 NEDepthwiseConvolutionLayer

This patch also adds support for QASYMM8_SIGNED in the generic functions
that uses NEDepthwiseConvolutionLayerNativeKernel.

Change-Id: I74a99e1476cb1ebd2078e076ab2bea703949527b
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2552
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 .../NEDepthwiseConvolutionLayerNativeKernel.cpp    | 27 ++++++++++++++++------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'src')
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
index aee13ee578..7626fda886 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -299,7 +299,7 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w
             {
                 acc.at(i) = rounding_divide_by_exp2(saturating_doubling_high_mul(acc.at(i), out_mul), out_shift) + output_qoffset;
             }
-            out_vals[i] = static_cast<T>(utility::clamp<int32_t, uint8_t>(acc.at(i)));
+            out_vals[i] = static_cast<T>(utility::clamp<int32_t, T>(acc.at(i)));
         }
 
         wrapper::vstore(reinterpret_cast<T *>(output_it.ptr()), out_vals);
@@ -403,7 +403,7 @@ void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weigh
             {
                 acc.at(m) = rounding_divide_by_exp2(saturating_doubling_high_mul(acc.at(m), out_mul), out_shift) + output_qoffset;
             }
-            *(reinterpret_cast<T *>(output_it.ptr() + m * sizeof(T))) = static_cast<T>(utility::clamp<int32_t, uint8_t>(acc.at(m)));
+            *(reinterpret_cast<T *>(output_it.ptr() + m * sizeof(T))) = static_cast<T>(utility::clamp<int32_t, T>(acc.at(m)));
         }
     },
     input_it, weights_it, biases_it, output_it);
@@ -415,7 +415,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (dilation.x() - 1) > input->dimension(1) + conv_info.pad_left() + conv_info.pad_right());
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (dilation.y() - 1) > input->dimension(2) + conv_info.pad_top() + conv_info.pad_bottom());
@@ -553,9 +553,22 @@ void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, co
                     &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, uint8_t, 8, false, false>;
             pad_vectors(_output_multiplier, _output_shift, 8);
             break;
+        case DataType::QASYMM8_SIGNED:
+            _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t, 8, true, false> :
+                    &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t, 8, false, false>;
+            pad_vectors(_output_multiplier, _output_shift, 8);
+            break;
         case DataType::QSYMM8_PER_CHANNEL:
-            _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, int8_t, 8, true, true> :
-                    &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, int8_t, 8, false, true>;
+            if(_input->info()->data_type() == DataType::QASYMM8)
+            {
+                _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, int8_t, 8, true, true> :
+                        &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, int8_t, 8, false, true>;
+            }
+            else
+            {
+                _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t, 8, true, true> :
+                        &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t, 8, false, true>;
+            }
             pad_vectors(_output_multiplier, _output_shift, 8);
             break;
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -621,7 +634,7 @@ void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window
     }
 }
 
-template <typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if<std::is_same<T, uint8_t>::value, int>::type>
+template <typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename>
 void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-- 
cgit v1.2.1