aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-12-02 19:01:25 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-12-04 12:44:28 +0000
commit6e1791b1bfabc81f08d3117939f6eb5264ed4edf (patch)
treeb984d58856ef9baa168bcf878659caddf599f623
parent5cb49dcf7ad74cc6e7e91790b7132ae4dd845515 (diff)
downloadComputeLibrary-6e1791b1bfabc81f08d3117939f6eb5264ed4edf.tar.gz
COMPMID-2764: Add support for QASYMM8_SIGNED in NEConvolutionLayer.
Change-Id: I8fbbd2e399f48968337a60147098d04f27c2d1c0 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2402 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NECol2ImKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h7
-rw-r--r--arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h6
-rw-r--r--arm_compute/core/Utils.h83
-rw-r--r--arm_compute/runtime/NEON/functions/NECol2Im.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h54
-rw-r--r--src/core/NEON/kernels/NECol2ImKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp182
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.cpp4
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp70
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp6
-rw-r--r--tests/validation/Helpers.cpp35
-rw-r--r--tests/validation/Helpers.h8
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp13
-rw-r--r--tests/validation/fixtures/ConvolutionLayerFixture.h11
-rw-r--r--tests/validation/reference/ActivationLayer.cpp11
-rw-r--r--tests/validation/reference/Convolution3d.h10
-rw-r--r--tests/validation/reference/ConvolutionLayer.cpp6
20 files changed, 372 insertions, 172 deletions
diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
index f02858e7d9..9858d4fd56 100644
--- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h
+++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,7 +72,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: Any
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
@@ -80,7 +80,7 @@ public:
void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
/** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: Any
* @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
index dadc5c221b..ac17b2efa5 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
@@ -37,13 +37,14 @@ class ITensor;
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
* and adds to it the offset contribution of matrix A and matrix B in-place.
*
- * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint.
+ * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8.
+ * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8.
*
- * For QuantizeDownInt32ToUint8Scale the final result is:
+ * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is:
*
* ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift
*
- * For QuantizeDownInt32ToUint8ScaleByFixedPoint the final result is:
+ * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is:
*
* (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
*
diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
index 585c707bb6..d432b731c2 100644
--- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
@@ -75,7 +75,8 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/FP16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/FP16/F32
* @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
@@ -85,7 +86,8 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 366d5dcc68..590e281bb0 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -549,6 +549,72 @@ inline DataType get_promoted_data_type(DataType dt)
return DataType::UNKNOWN;
}
+/** Compute the mininum and maximum values a data type can take
+ *
+ * @param[in] dt Data type to get the min/max bounds of
+ *
+ * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
+ */
+inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt)
+{
+ PixelValue min(0);
+ PixelValue max(0);
+ switch(dt)
+ {
+ case DataType::U8:
+ case DataType::QASYMM8:
+ {
+ min = PixelValue(std::numeric_limits<uint8_t>::lowest());
+ max = PixelValue(std::numeric_limits<uint8_t>::max());
+ break;
+ }
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ {
+ min = PixelValue(std::numeric_limits<int8_t>::lowest());
+ max = PixelValue(std::numeric_limits<int8_t>::max());
+ break;
+ }
+ case DataType::U16:
+ case DataType::QASYMM16:
+ {
+ min = PixelValue(std::numeric_limits<uint16_t>::lowest());
+ max = PixelValue(std::numeric_limits<uint16_t>::max());
+ break;
+ }
+ case DataType::S16:
+ case DataType::QSYMM16:
+ {
+ min = PixelValue(std::numeric_limits<int16_t>::lowest());
+ max = PixelValue(std::numeric_limits<int16_t>::max());
+ break;
+ }
+ case DataType::U32:
+ {
+ min = PixelValue(std::numeric_limits<uint32_t>::lowest());
+ max = PixelValue(std::numeric_limits<uint32_t>::max());
+ break;
+ }
+ case DataType::S32:
+ {
+ min = PixelValue(std::numeric_limits<int32_t>::lowest());
+ max = PixelValue(std::numeric_limits<int32_t>::max());
+ break;
+ }
+ case DataType::F32:
+ {
+ min = PixelValue(std::numeric_limits<float>::lowest());
+ max = PixelValue(std::numeric_limits<float>::max());
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return std::make_tuple(min, max);
+}
+
/** Return true if the given format has horizontal subsampling.
*
* @param[in] format Format to determine subsampling.
@@ -1054,6 +1120,23 @@ inline bool is_data_type_quantized_asymmetric(DataType dt)
}
}
+/** Check if a given data type is of asymmetric quantized signed type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized signed type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::QASYMM8_SIGNED:
+ return true;
+ default:
+ return false;
+ }
+}
+
/** Check if a given data type is of symmetric quantized type
*
* @param[in] dt Input data type.
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index 64ce9944e2..613507cf6a 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@ class NECol2Im : public INESimpleFunctionNoBorder
public:
/** Configure the col2im NEON kernel
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: Any
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
@@ -47,7 +47,7 @@ public:
void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
/** Static function to check if given info will lead to a valid configuration of @ref NECol2Im
*
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input The input tensor to convert. Data types supported: Any
* @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
* while the rest represent batch of outputs. Data types supported: Same as @p input
* @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index 4310ab4b41..91fcef5971 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -80,10 +80,10 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -101,10 +101,10 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -125,7 +125,7 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
@@ -149,5 +149,5 @@ private:
std::shared_ptr<IMemoryManager> _memory_manager;
std::unique_ptr<IFunction> _function; /**< Function to run */
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 6452fc9249..c513afa790 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -63,16 +63,16 @@ public:
NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
/** Set the input and output tensors.
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. Data types supported: Same as @p weights.
*/
void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p weights.
*
* @return an error status
*/
@@ -135,8 +135,8 @@ private:
*
* -# @ref NEIm2ColKernel
* -# @ref NEGEMM (if the data type is FP32 or FP16)
- * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8)
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8)
+ * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
+ * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
* -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
* -# @ref NECol2ImKernel (if NCHW data layout)
*
@@ -158,10 +158,10 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -175,13 +175,13 @@ public:
const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
@@ -202,24 +202,24 @@ public:
private:
/** Configures the appropriate matrix multiply routine
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8 type where output should be of S32 type.
+ * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
*/
void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
- * @param[in] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8 type where output should be of S32 type.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input,
+ * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
* @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
@@ -230,8 +230,8 @@ private:
int gemm_3d_depth = 1, bool skip_im2col = false);
/** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore
*
- * @param[in] input_info Input tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] gemm_3d_depth Depth of GEMM 3D
* @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
index e3661eef30..cea8782354 100644
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ b/src/core/NEON/kernels/NECol2ImKernel.cpp
@@ -43,10 +43,6 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
{
//Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
- DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
// Validate configured output
if(output->total_size() != 0)
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
index a32f0bbdae..84187332f8 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -269,6 +269,13 @@ inline int8x16_t finalize_quantization_floating_point(int32x4x4_t &in_s32, int32
return out_s8;
}
+template <typename T>
+struct VectorTyper
+{
+ using stype = T;
+ using vtype = typename wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128>;
+};
+
inline Window get_win_vector_sum(const Window &window)
{
Window win_vector_sum(window);
@@ -300,9 +307,10 @@ inline Iterator get_bias_it(const Window &window, const ITensor *bias)
return bias_it;
}
-template <bool has_a_offset, bool has_b_offset, bool has_bias, bool is_bounded_relu, bool is_fixed_point>
+template <typename VT, bool has_a_offset, bool has_b_offset, bool has_bias, bool is_bounded_relu, bool is_fixed_point>
inline void run_offset_contribution_output_stage_window(const int32_t *vector_sum_col_ptr, const int32_t *vector_sum_row_ptr, const int32_t *bias_ptr, Iterator mm_result_it, Iterator out_it,
- const int32x4_t result_offset_s32, const int32x4_t result_shift_s32, uint8x16_t min_u8, uint8x16_t max_u8,
+ const int32x4_t result_offset_s32, const int32x4_t result_shift_s32,
+ typename VT::vtype min_vec, typename VT::vtype max_vec,
int32_t a_offset, int32_t b_offset, int32_t k_offset,
int32_t multiplier, int32_t shift, int32_t offset, int32_t min_bound, int32_t max_bound,
int window_step_x, int window_start_x, int window_end_x)
@@ -346,11 +354,13 @@ inline void run_offset_contribution_output_stage_window(const int32_t *vector_su
if(is_fixed_point)
{
- vst1q_u8(out_it.ptr() + x, finalize_quantization<is_bounded_relu>(in_s32, multiplier, shift, result_offset_s32, min_u8, max_u8));
+ wrapper::vstore(reinterpret_cast<typename VT::stype *>(out_it.ptr() + x),
+ finalize_quantization<is_bounded_relu>(in_s32, multiplier, shift, result_offset_s32, min_vec, max_vec));
}
else
{
- vst1q_u8(out_it.ptr() + x, finalize_quantization_floating_point<is_bounded_relu>(in_s32, result_shift_s32, min_u8, max_u8));
+ wrapper::vstore(reinterpret_cast<typename VT::stype *>(out_it.ptr() + x),
+ finalize_quantization_floating_point<is_bounded_relu>(in_s32, result_shift_s32, min_vec, max_vec));
}
}
// Compute left-over elements
@@ -370,7 +380,9 @@ inline void run_offset_contribution_output_stage_window(const int32_t *vector_su
if(is_fixed_point)
{
// Finalize and store the result
- *(out_it.ptr() + x) = finalize_quantization<is_bounded_relu>(in_value, multiplier, shift, offset, static_cast<uint8_t>(min_bound), static_cast<uint8_t>(max_bound));
+ *reinterpret_cast<typename VT::stype *>(out_it.ptr() + x) = finalize_quantization<is_bounded_relu>(in_value, multiplier, shift, offset,
+ static_cast<typename VT::stype>(min_bound),
+ static_cast<typename VT::stype>(max_bound));
}
else
{
@@ -380,9 +392,10 @@ inline void run_offset_contribution_output_stage_window(const int32_t *vector_su
// Bound and store the result
if(is_bounded_relu)
{
- in_value = static_cast<uint8_t>(std::max<int32_t>(min_bound, std::min<int32_t>(max_bound, in_value)));
+ in_value = static_cast<typename VT::stype>(std::max<int32_t>(min_bound, std::min<int32_t>(max_bound, in_value)));
}
- *(out_it.ptr() + x) = static_cast<uint8_t>(std::max<int32_t>(0, std::min<int32_t>(255, in_value)));
+ *reinterpret_cast<typename VT::stype *>(out_it.ptr() + x) = static_cast<typename VT::stype>(std::max<int32_t>(static_cast<int32_t>(std::numeric_limits<typename VT::stype>::lowest()),
+ std::min<int32_t>(static_cast<int32_t>(std::numeric_limits<typename VT::stype>::max()), in_value)));
}
}
}
@@ -463,12 +476,15 @@ inline void run_offset_contribution_output_stage_window_symm(const int32_t *vect
}
}
-template <bool is_gemm3d, bool is_bounded_relu, bool is_fixed_point>
+template <typename T, bool is_gemm3d, bool is_bounded_relu, bool is_fixed_point>
void run_offset_contribution_output_stage(const Window &window,
const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output,
int32_t a_offset, int32_t b_offset, int32_t k_offset, bool slide_vector_sum_col,
GEMMLowpOutputStageInfo output_stage)
{
+ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+ using Typer = VectorTyper<T>;
+
const int height_input = is_gemm3d ? mm_result->info()->dimension(1) : 0;
const int depth_input = is_gemm3d ? mm_result->info()->dimension(2) : 1;
@@ -478,10 +494,10 @@ void run_offset_contribution_output_stage(const Window &window,
const int32_t min_bound = output_stage.gemmlowp_min_bound;
const int32_t max_bound = output_stage.gemmlowp_max_bound;
- const int32x4_t result_offset_s32 = vdupq_n_s32(offset);
- const int32x4_t result_shift_s32 = vdupq_n_s32(is_fixed_point ? shift : -shift);
- const uint8x16_t min_u8 = vdupq_n_u8(static_cast<uint8_t>(min_bound));
- const uint8x16_t max_u8 = vdupq_n_u8(static_cast<uint8_t>(max_bound));
+ const int32x4_t result_offset_s32 = vdupq_n_s32(offset);
+ const int32x4_t result_shift_s32 = vdupq_n_s32(is_fixed_point ? shift : -shift);
+ const auto min_vec = wrapper::vdup_n(static_cast<T>(min_bound), ExactTagType{});
+ const auto max_vec = wrapper::vdup_n(static_cast<T>(max_bound), ExactTagType{});
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
@@ -517,11 +533,13 @@ void run_offset_contribution_output_stage(const Window &window,
const auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_id * vector_sum_col_batch_offset);
const auto vector_sum_row_ptr = reinterpret_cast<const int32_t *>(vector_sum_row_it.ptr() + batch_id * sum_row_stride_y)
+ id.y() + (id.z() % depth_input) * height_input;
- run_offset_contribution_output_stage_window<true, true, true, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, vector_sum_row_ptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it,
- out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, true, true, true, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, vector_sum_row_ptr, reinterpret_cast<const int32_t *>(bias_it.ptr()),
+ mm_result_it,
+ out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
vector_sum_col_it, vector_sum_row_it, bias_it, mm_result_it, out_it);
}
@@ -533,10 +551,11 @@ void run_offset_contribution_output_stage(const Window &window,
const auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_id * vector_sum_col_batch_offset);
const auto vector_sum_row_ptr = reinterpret_cast<const int32_t *>(vector_sum_row_it.ptr() + batch_id * sum_row_stride_y)
+ id.y() + (id.z() % depth_input) * height_input;
- run_offset_contribution_output_stage_window<true, true, false, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, vector_sum_row_ptr, nullptr, mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, true, true, false, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, vector_sum_row_ptr, nullptr, mm_result_it, out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
vector_sum_col_it, vector_sum_row_it, mm_result_it, out_it);
}
@@ -557,10 +576,12 @@ void run_offset_contribution_output_stage(const Window &window,
const int batch_id = id.z() / depth_input;
const auto vector_sum_row_ptr = reinterpret_cast<const int32_t *>(vector_sum_row_it.ptr() + batch_id * sum_row_stride_y)
+ id.y() + (id.z() % depth_input) * height_input;
- run_offset_contribution_output_stage_window<false, true, true, is_bounded_relu, is_fixed_point>(nullptr, vector_sum_row_ptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, false, true, true, is_bounded_relu, is_fixed_point>(nullptr, vector_sum_row_ptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it,
+ out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
vector_sum_row_it, bias_it, mm_result_it, out_it);
}
@@ -571,10 +592,11 @@ void run_offset_contribution_output_stage(const Window &window,
const int batch_id = id.z() / depth_input;
const auto vector_sum_row_ptr = reinterpret_cast<const int32_t *>(vector_sum_row_it.ptr() + batch_id * sum_row_stride_y)
+ id.y() + (id.z() % depth_input) * height_input;
- run_offset_contribution_output_stage_window<false, true, false, is_bounded_relu, is_fixed_point>(nullptr, vector_sum_row_ptr, nullptr, mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, false, true, false, is_bounded_relu, is_fixed_point>(nullptr, vector_sum_row_ptr, nullptr, mm_result_it, out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
vector_sum_row_it, mm_result_it, out_it);
}
@@ -595,10 +617,12 @@ void run_offset_contribution_output_stage(const Window &window,
{
const int batch_id = id.z() / depth_input;
const auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_id * vector_sum_col_batch_offset);
- run_offset_contribution_output_stage_window<true, false, true, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, nullptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, true, false, true, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, nullptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it,
+ out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
vector_sum_col_it, bias_it, mm_result_it, out_it);
}
@@ -608,10 +632,11 @@ void run_offset_contribution_output_stage(const Window &window,
{
const int batch_id = id.z() / depth_input;
const auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_id * vector_sum_col_batch_offset);
- run_offset_contribution_output_stage_window<true, false, false, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, nullptr, nullptr, mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, true, false, false, is_bounded_relu, is_fixed_point>(vector_sum_col_ptr, nullptr, nullptr, mm_result_it, out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
vector_sum_col_it, mm_result_it, out_it);
}
@@ -623,10 +648,11 @@ void run_offset_contribution_output_stage(const Window &window,
Iterator bias_it = get_bias_it(collapsed_window, bias);
execute_window_loop(collapsed_window, [&](const Coordinates &)
{
- run_offset_contribution_output_stage_window<false, false, true, is_bounded_relu, is_fixed_point>(nullptr, nullptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, false, false, true, is_bounded_relu, is_fixed_point>(nullptr, nullptr, reinterpret_cast<const int32_t *>(bias_it.ptr()), mm_result_it, out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
bias_it, mm_result_it, out_it);
}
@@ -634,10 +660,11 @@ void run_offset_contribution_output_stage(const Window &window,
{
execute_window_loop(collapsed_window, [&](const Coordinates &)
{
- run_offset_contribution_output_stage_window<false, false, false, is_bounded_relu, is_fixed_point>(nullptr, nullptr, nullptr, mm_result_it, out_it,
- result_offset_s32, result_shift_s32, min_u8, max_u8, a_offset, b_offset, k_offset,
- multiplier, shift, offset, min_bound, max_bound,
- window_step_x, window_start_x, window_end_x);
+ run_offset_contribution_output_stage_window<Typer, false, false, false, is_bounded_relu, is_fixed_point>(nullptr, nullptr, nullptr, mm_result_it, out_it,
+ result_offset_s32, result_shift_s32,
+ min_vec, max_vec, a_offset, b_offset, k_offset,
+ multiplier, shift, offset, min_bound, max_bound,
+ window_step_x, window_start_x, window_end_x);
},
mm_result_it, out_it);
}
@@ -844,24 +871,36 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *mm_result,
NEGEMMLowpOffsetContributionOutputStageKernel::NEGEMMLowpOffsetContributionOutputStageFunction
get_configured_function(const ITensor *mm_result, const ITensor *vector_sum_row, const ITensor *output, GEMMLowpOutputStageInfo output_stage)
{
- static std::map<uint8_t, NEGEMMLowpOffsetContributionOutputStageKernel::NEGEMMLowpOffsetContributionOutputStageFunction> map_function =
- {
- { 0, &run_offset_contribution_output_stage<false, false, false> },
- { 1, &run_offset_contribution_output_stage<true, false, false> },
- { 2, &run_offset_contribution_output_stage<false, true, false> },
- { 3, &run_offset_contribution_output_stage<true, true, false> },
- { 4, &run_offset_contribution_output_stage<false, false, true> },
- { 5, &run_offset_contribution_output_stage<true, false, true> },
- { 6, &run_offset_contribution_output_stage<false, true, true> },
- { 7, &run_offset_contribution_output_stage<true, true, true> },
- { 8, &run_offset_contribution_output_stage_symm<false, false, false> },
- { 9, &run_offset_contribution_output_stage_symm<true, false, false> },
- { 10, &run_offset_contribution_output_stage_symm<false, true, false> },
- { 11, &run_offset_contribution_output_stage_symm<true, true, false> },
- { 12, &run_offset_contribution_output_stage_symm<false, false, true> },
- { 13, &run_offset_contribution_output_stage_symm<true, false, true> },
- { 14, &run_offset_contribution_output_stage_symm<false, true, true> },
- { 15, &run_offset_contribution_output_stage_symm<true, true, true> }
+ static std::map<uint8_t, NEGEMMLowpOffsetContributionOutputStageKernel::NEGEMMLowpOffsetContributionOutputStageFunction> map_function_qasymm =
+ {
+ { 0, &run_offset_contribution_output_stage<uint8_t, false, false, false> },
+ { 1, &run_offset_contribution_output_stage<uint8_t, true, false, false> },
+ { 2, &run_offset_contribution_output_stage<uint8_t, false, true, false> },
+ { 3, &run_offset_contribution_output_stage<uint8_t, true, true, false> },
+ { 4, &run_offset_contribution_output_stage<uint8_t, false, false, true> },
+ { 5, &run_offset_contribution_output_stage<uint8_t, true, false, true> },
+ { 6, &run_offset_contribution_output_stage<uint8_t, false, true, true> },
+ { 7, &run_offset_contribution_output_stage<uint8_t, true, true, true> },
+ { 8, &run_offset_contribution_output_stage<int8_t, false, false, false> },
+ { 9, &run_offset_contribution_output_stage<int8_t, true, false, false> },
+ { 10, &run_offset_contribution_output_stage<int8_t, false, true, false> },
+ { 11, &run_offset_contribution_output_stage<int8_t, true, true, false> },
+ { 12, &run_offset_contribution_output_stage<int8_t, false, false, true> },
+ { 13, &run_offset_contribution_output_stage<int8_t, true, false, true> },
+ { 14, &run_offset_contribution_output_stage<int8_t, false, true, true> },
+ { 15, &run_offset_contribution_output_stage<int8_t, true, true, true> },
+ };
+
+ static std::map<uint8_t, NEGEMMLowpOffsetContributionOutputStageKernel::NEGEMMLowpOffsetContributionOutputStageFunction> map_function_qsymm =
+ {
+ { 0, &run_offset_contribution_output_stage_symm<false, false, false> },
+ { 1, &run_offset_contribution_output_stage_symm<true, false, false> },
+ { 2, &run_offset_contribution_output_stage_symm<false, true, false> },
+ { 3, &run_offset_contribution_output_stage_symm<true, true, false> },
+ { 4, &run_offset_contribution_output_stage_symm<false, false, true> },
+ { 5, &run_offset_contribution_output_stage_symm<true, false, true> },
+ { 6, &run_offset_contribution_output_stage_symm<false, true, true> },
+ { 7, &run_offset_contribution_output_stage_symm<true, true, true> }
};
// Check if input is a 3D reinterpretation
@@ -877,12 +916,23 @@ get_configured_function(const ITensor *mm_result, const ITensor *vector_sum_row,
const bool is_fixed_point = output_stage.type != GEMMLowpOutputStageType::QUANTIZE_DOWN;
// Check if symmetric per-channel execution
- const bool is_symm = output->info()->data_type() == DataType::QASYMM8_SIGNED;
+ const bool is_signed = output->info()->data_type() == DataType::QASYMM8_SIGNED;
+
+ // Check if symmetric per-channel execution
+ const bool is_symm = output_stage.is_quantized_per_channel;
// key acts as a bitset, setting the first bit on reinterpret_as_3d,
// the second on is_bounded_relu, and the third on is_fixed_point.
- uint8_t key = (reinterpret_as_3d ? 1UL : 0UL) | ((is_bounded_relu ? 1UL : 0UL) << 1) | ((is_fixed_point ? 1UL : 0UL) << 2) | ((is_symm ? 1UL : 0UL) << 3);
- return map_function.find(key)->second;
+ uint8_t key = (reinterpret_as_3d ? 1UL : 0UL) | ((is_bounded_relu ? 1UL : 0UL) << 1) | ((is_fixed_point ? 1UL : 0UL) << 2);
+ if(is_symm)
+ {
+ return map_function_qsymm.find(key)->second;
+ }
+ else
+ {
+ key |= ((is_signed ? 1UL : 0UL) << 3);
+ return map_function_qasymm.find(key)->second;
+ }
}
} // namespace
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 0641d6cfa3..f57b94d70b 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -49,8 +49,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
bool has_bias, const Size2D &dilation, unsigned int num_groups)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized(input->data_type()) && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Number of groups greater than one are not supported on NEON");
@@ -382,6 +382,7 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<float16_t, false, true> : &NEIm2ColKernel::run_im2col<float16_t, true, true>;
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ case DataType::QASYMM8_SIGNED:
case DataType::QASYMM8:
_func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<qasymm8_t, false, true> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, true>;
break;
@@ -403,7 +404,10 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
break;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
case DataType::QASYMM8:
- _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<qasymm8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
+ _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<uint8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
+ break;
+ case DataType::QASYMM8_SIGNED:
+ _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_im2col<int8_t, false, false> : &NEIm2ColKernel::run_im2col<qasymm8_t, true, false>;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
index 649316442e..aa43ad587e 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
@@ -49,7 +49,9 @@ TensorShape get_output_shape(const ITensorInfo *input, bool has_bias)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
{
//Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
+ DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL,
+ DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
if(biases != nullptr)
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index a730749b8b..bb9620b293 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -59,7 +59,9 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I
Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1,
+ DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL,
+ DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
if(biases != nullptr)
@@ -114,11 +116,12 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
{
// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo iqinfo = input->info()->quantization_info();
- const QuantizationInfo wqinfo = weights->info()->quantization_info();
- const QuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info();
- const UniformQuantizationInfo uiqinfo = iqinfo.uniform();
- const UniformQuantizationInfo uoqinfo = oqinfo.uniform();
+ const QuantizationInfo iqinfo = input->info()->quantization_info();
+ const QuantizationInfo wqinfo = weights->info()->quantization_info();
+ const QuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info();
+ const UniformQuantizationInfo uiqinfo = iqinfo.uniform();
+ const UniformQuantizationInfo uoqinfo = oqinfo.uniform();
+ const DataType data_type = input->info()->data_type();
input->info()->set_quantization_info(QuantizationInfo(uiqinfo.scale, -uiqinfo.offset));
if(!is_data_type_quantized_per_channel(weights->info()->data_type()))
@@ -128,23 +131,28 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
}
// Merge activation with output stage
- int min_activation = 0;
- int max_activation = 255;
+ PixelValue type_min = 0;
+ PixelValue type_max = 0;
+ std::tie(type_min, type_max) = get_min_max(data_type);
+ int min_activation = type_min.get<int>();
+ int max_activation = type_max.get<int>();
if(supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = quantize_qasymm8(act_info.a(), uoqinfo);
- const int b_const_int = quantize_qasymm8(act_info.b(), uoqinfo);
+ const bool is_quantized_signed = is_data_type_quantized_asymmetric_signed(data_type);
+ const int a_const_int = is_quantized_signed ? quantize_qasymm8_signed(act_info.a(), uoqinfo) : quantize_qasymm8(act_info.a(), uoqinfo);
+ const int b_const_int = is_quantized_signed ? quantize_qasymm8_signed(act_info.b(), uoqinfo) : quantize_qasymm8(act_info.b(), uoqinfo);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? uoqinfo.offset : b_const_int;
- max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
+ max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? max_activation : a_const_int;
}
GEMMLowpOutputStageInfo output_info;
- output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
- output_info.gemmlowp_offset = uoqinfo.offset;
- output_info.gemmlowp_min_bound = min_activation;
- output_info.gemmlowp_max_bound = max_activation;
+ output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ output_info.gemmlowp_offset = uoqinfo.offset;
+ output_info.gemmlowp_min_bound = min_activation;
+ output_info.gemmlowp_max_bound = max_activation;
+ output_info.is_quantized_per_channel = (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL);
quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info);
_mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info));
@@ -163,8 +171,9 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col)
{
- const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
- const bool is_activation_enabled = act_info.enabled();
+ const DataType data_type = input->data_type();
+ const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
+ const bool is_activation_enabled = act_info.enabled();
// Create GEMMInfo structure
const GEMMInfo gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
@@ -181,8 +190,11 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
const UniformQuantizationInfo uoqinfo = oqinfo.uniform();
// Merge activation with output stage
- int min_activation = 0;
- int max_activation = 255;
+ PixelValue type_min = 0;
+ PixelValue type_max = 0;
+ std::tie(type_min, type_max) = get_min_max(data_type);
+ int min_activation = type_min.get<int>();
+ int max_activation = type_max.get<int>();
const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
@@ -190,18 +202,20 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
};
if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0)
{
- const int a_const_int = quantize_qasymm8(act_info.a(), uoqinfo);
- const int b_const_int = quantize_qasymm8(act_info.b(), uoqinfo);
+ const bool is_quantized_signed = is_data_type_quantized_asymmetric_signed(data_type);
+ const int a_const_int = is_quantized_signed ? quantize_qasymm8_signed(act_info.a(), uoqinfo) : quantize_qasymm8(act_info.a(), uoqinfo);
+ const int b_const_int = is_quantized_signed ? quantize_qasymm8_signed(act_info.b(), uoqinfo) : quantize_qasymm8(act_info.b(), uoqinfo);
min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? uoqinfo.offset : b_const_int;
- max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
+ max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? max_activation : a_const_int;
}
GEMMLowpOutputStageInfo output_info;
- output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
- output_info.gemmlowp_offset = uoqinfo.offset;
- output_info.gemmlowp_min_bound = min_activation;
- output_info.gemmlowp_max_bound = max_activation;
+ output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ output_info.gemmlowp_offset = uoqinfo.offset;
+ output_info.gemmlowp_min_bound = min_activation;
+ output_info.gemmlowp_max_bound = max_activation;
+ output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info));
// Perform validation step on GEMMLowp
@@ -387,8 +401,8 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON");
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 5b9d0551e2..e36cb3d399 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -280,9 +280,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE, "Bias addition not supported in NEGEMMLowpMatrixMultiplyCore for output S32");
ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1),
"The product AB is defined only if the number of columns in A is equal to the number of rows in B");
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index fef4510405..afefee77be 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -122,53 +122,53 @@ SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src)
}
template <>
-SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint16_t> &src)
+SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<int8_t> &src)
{
const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
for(int i = 0; i < src.num_elements(); ++i)
{
- dst[i] = dequantize_qasymm16(src[i], quantization_info);
+ dst[i] = dequantize_qasymm8_signed(src[i], quantization_info);
}
return dst;
}
template <>
-SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
+SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint16_t> &src)
{
- SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
- const UniformQuantizationInfo &qinfo = quantization_info.uniform();
+ const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
+ SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
for(int i = 0; i < src.num_elements(); ++i)
{
- dst[i] = quantize_qasymm8(src[i], qinfo);
+ dst[i] = dequantize_qasymm16(src[i], quantization_info);
}
return dst;
}
template <>
-SimpleTensor<int8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
+SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
{
- SimpleTensor<int8_t> dst{ src.shape(), DataType::QASYMM8_SIGNED, 1, quantization_info };
+ SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
const UniformQuantizationInfo &qinfo = quantization_info.uniform();
for(int i = 0; i < src.num_elements(); ++i)
{
- dst[i] = quantize_qasymm8_signed(src[i], qinfo);
+ dst[i] = quantize_qasymm8(src[i], qinfo);
}
return dst;
}
template <>
-SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<int8_t> &src)
+SimpleTensor<int8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
{
- const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
- SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
+ SimpleTensor<int8_t> dst{ src.shape(), DataType::QASYMM8_SIGNED, 1, quantization_info };
+ const UniformQuantizationInfo &qinfo = quantization_info.uniform();
for(int i = 0; i < src.num_elements(); ++i)
{
- dst[i] = dequantize_qasymm8_signed(src[i], quantization_info);
+ dst[i] = quantize_qasymm8_signed(src[i], qinfo);
}
return dst;
}
@@ -354,6 +354,15 @@ std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, flo
return std::pair<int, int> { min_bound, max_bound };
}
+std::pair<int, int> get_quantized_qasymm8_signed_bounds(const QuantizationInfo &quant_info, float min, float max)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
+
+ const int min_bound = quantize_qasymm8_signed(min, quant_info.uniform());
+ const int max_bound = quantize_qasymm8_signed(max, quant_info.uniform());
+ return std::pair<int, int> { min_bound, max_bound };
+}
+
std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id)
{
ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index 3227a98b05..b481b52443 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -262,6 +262,14 @@ void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &sh
*/
std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, float min, float max);
+/** Helper function to compute asymmetric quantized signed min and max bounds
+ *
+ * @param[in] quant_info Quantization info to be used for conversion
+ * @param[in] min Floating point minimum value to be quantized
+ * @param[in] max Floating point maximum value to be quantized
+ */
+std::pair<int, int> get_quantized_qasymm8_signed_bounds(const QuantizationInfo &quant_info, float min, float max);
+
/** Helper function to compute symmetric quantized min and max bounds
*
* @param[in] quant_info Quantization info to be used for conversion
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index c2a0cb56a2..1d7805d024 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -462,6 +462,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>
}
TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ QuantizedActivationFunctionsDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
TEST_SUITE(QSYMM8_PER_CHANNEL)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerReducedDataset(),
diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h
index c5cddc28db..3c4b625ac6 100644
--- a/tests/validation/fixtures/ConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/ConvolutionLayerFixture.h
@@ -52,7 +52,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class ConvolutionValidationGenericFixture : public framework::Fixture
{
public:
- using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+ using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value
+ || std::is_same<typename std::decay<T>::type, int8_t>::value,
+ int32_t, T >::type;
public:
template <typename...>
@@ -84,6 +86,13 @@ protected:
library->fill(tensor, distribution, i);
break;
}
+ case DataType::QASYMM8_SIGNED:
+ {
+ std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+ std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
+ library->fill(tensor, distribution, i);
+ break;
+ }
case DataType::QSYMM8_PER_CHANNEL:
{
int min_bound = 128;
diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp
index 6cdba09c75..7a699c5f86 100644
--- a/tests/validation/reference/ActivationLayer.cpp
+++ b/tests/validation/reference/ActivationLayer.cpp
@@ -66,6 +66,17 @@ SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src
}
template <>
+SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+{
+ const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
+
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = activation_layer<float>(src_tmp, info);
+ SimpleTensor<int8_t> dst = convert_to_asymmetric<int8_t>(dst_tmp, dst_qinfo);
+ return dst;
+}
+
+template <>
SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 6ac5df93b3..6168f10741 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H
#define ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H
+#include "arm_compute/core/utils/misc/Requires.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
@@ -94,10 +95,8 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<TW> &wei
}
// 3D convolution for QASYMM8 type
-template < typename T, typename TW, typename TB, typename std::enable_if < std::is_same<T, uint8_t>::value &&(std::is_same<TW, uint8_t>::value
- || std::is_same<TW, int8_t>::value)
- &&std::is_same<TB, int32_t>::value,
- int >::type = 0 >
+template < typename T, typename TW, typename TB, REQUIRES_TA((std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) &&(std::is_same<TW, uint8_t>::value
+ || std::is_same<TW, int8_t>::value)) >
inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
int i_offset, int w_offset, int b_offset, int o_offset,
int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1, int filter_id = 0)
@@ -172,7 +171,8 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<TW> &wei
acc += (*b_ptr);
// Quantize down
- acc = validation::quantize_down_scale_by_fixedpoint(acc, output_multiplier, output_shift, output_offset, 0, 255);
+ acc = validation::quantize_down_scale_by_fixedpoint(acc, output_multiplier, output_shift, output_offset,
+ std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max());
// Store the result
*out_ptr = acc;
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index 4d2c1acb6f..c9ad8d38b9 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -41,10 +41,6 @@ namespace validation
{
namespace reference
{
-namespace
-{
-} // namespace
-
template <typename T, typename TW, typename TB>
SimpleTensor<T> convolution_layer_nchw(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const PadStrideInfo &info,
const Size2D &dilation, unsigned int num_groups)
@@ -141,6 +137,8 @@ template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &sr
const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
+template SimpleTensor<int8_t> convolution_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
+ const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
} // namespace reference
} // namespace validation
} // namespace test