aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-02-07 13:46:45 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-03-02 15:51:39 +0000
commit1856ff7ebb29e04c3549b74d7ced336111cbf05e (patch)
treec94654f0d8535930a81712bf7aadffd757c82577
parent3c4bf0c4eab5ead756c472f17ddf008b882cc905 (diff)
downloadComputeLibrary-1856ff7ebb29e04c3549b74d7ced336111cbf05e.tar.gz
COMPMID-3097 Fuse activation with fully connected layer CL
Change-Id: I447030e69b9e565f2f81529a41af8c5e7ece7ecf Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2702 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/PixelValue.h18
-rw-r--r--arm_compute/core/Types.h87
-rw-r--r--arm_compute/graph/nodes/FullyConnectedLayerNode.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h65
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h52
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp19
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp9
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp11
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp7
-rw-r--r--src/graph/mutators/NodeFusionMutator.cpp7
-rw-r--r--src/graph/nodes/FullyConnectedLayer.cpp7
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp42
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp8
-rw-r--r--tests/validation/CL/FullyConnectedLayer.cpp46
-rw-r--r--tests/validation/CL/GEMMLowp.cpp17
-rw-r--r--tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp25
-rw-r--r--tests/validation/NEON/FullyConnectedLayer.cpp37
-rw-r--r--tests/validation/NEON/GEMMLowp.cpp38
-rw-r--r--tests/validation/fixtures/FullyConnectedLayerFixture.h27
26 files changed, 312 insertions, 272 deletions
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index c5f6608163..31bc55098a 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -41,11 +41,11 @@ public:
}
/** Initialize the union with a pixel value of chosen datatype
*
- * @param[in] v int value.
+ * @param[in] v value.
* @param[in] datatype DataType that @p v have to be stored
* @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v
*/
- PixelValue(int64_t v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
+ PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
: PixelValue()
{
switch(datatype)
@@ -57,13 +57,13 @@ public:
value.s8 = static_cast<int8_t>(v);
break;
case DataType::QASYMM8:
- value.u8 = quantize_qasymm8(static_cast<uint8_t>(v), qinfo);
+ value.u8 = quantize_qasymm8(static_cast<float>(v), qinfo);
break;
case DataType::QASYMM8_SIGNED:
- value.s8 = quantize_qasymm8_signed(static_cast<int8_t>(v), qinfo);
+ value.s8 = quantize_qasymm8_signed(static_cast<float>(v), qinfo);
break;
case DataType::QSYMM8:
- value.s8 = quantize_qsymm8(static_cast<int8_t>(v), qinfo);
+ value.s8 = quantize_qsymm8(static_cast<float>(v), qinfo);
break;
case DataType::U16:
value.u16 = static_cast<uint16_t>(v);
@@ -72,10 +72,10 @@ public:
value.s16 = static_cast<int16_t>(v);
break;
case DataType::QASYMM16:
- value.u16 = quantize_qasymm16(static_cast<uint16_t>(v), qinfo);
+ value.u16 = quantize_qasymm16(static_cast<float>(v), qinfo);
break;
case DataType::QSYMM16:
- value.s16 = quantize_qsymm16(static_cast<int16_t>(v), qinfo);
+ value.s16 = quantize_qsymm16(static_cast<float>(v), qinfo);
break;
case DataType::U32:
value.u32 = static_cast<uint32_t>(v);
@@ -96,10 +96,8 @@ public:
value.f32 = static_cast<float>(v);
break;
case DataType::F64:
- value.f64 = static_cast<double>(v);
- break;
default:
- value.s64 = v;
+ value.f64 = v;
break;
}
}
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 2030b171c6..cf689d757c 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -799,39 +799,6 @@ private:
DimensionRoundingType _round_type;
};
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
- DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
- bool transpose_weights{ true }; /**< Transpose weights if true. */
- bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
- bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
- bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
-
- /** Sets the weights trained data layout
- *
- * @param[in] layout Data layout that the weights were trained with
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
- {
- weights_trained_layout = layout;
- return *this;
- }
- /** Sets the transpose weights flag
- *
- * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
- {
- transpose_weights = should_transpose_weights;
- return *this;
- }
-};
-
/** PriorBox layer info */
class PriorBoxLayerInfo final
{
@@ -1674,6 +1641,40 @@ private:
bool _enabled = { false };
};
+/** Fully connected layer info */
+struct FullyConnectedLayerInfo
+{
+ DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
+ bool transpose_weights{ true }; /**< Transpose weights if true. */
+ bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
+ bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
+ bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
+ ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */
+
+ /** Sets the weights trained data layout
+ *
+ * @param[in] layout Data layout that the weights were trained with
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
+ {
+ weights_trained_layout = layout;
+ return *this;
+ }
+ /** Sets the transpose weights flag
+ *
+ * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
+ {
+ transpose_weights = should_transpose_weights;
+ return *this;
+ }
+};
+
/** Normalization Layer Information class */
class NormalizationLayerInfo
{
@@ -1944,16 +1945,16 @@ enum class GEMMLowpOutputStageType
/** GEMMLowp output stage info */
struct GEMMLowpOutputStageInfo
{
- GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
- int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
- int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
- int32_t gemmlowp_min_bound{ 0 }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
- int32_t gemmlowp_max_bound{ 0 }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
- std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+ GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
+ int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
+ int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
+ int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
+ int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
+ std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
+ DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
};
/** GEMM LHS (Left Hand Side) matrix information */
diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
index 41a7bc9946..10c310dda2 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,6 +43,11 @@ public:
FullyConnectedLayerNode(unsigned int num_outputs,
QuantizationInfo out_quant_info = QuantizationInfo(),
FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Sets fused activation
+ *
+ * @param[in] fused_activation Fused activation to set
+ */
+ void set_fused_activation(ActivationLayerInfo fused_activation);
/** Computes weights descriptor
*
* @warning Works for inputs with 1D batch space
@@ -83,6 +88,8 @@ public:
TensorDescriptor configure_output(size_t idx) const override;
void accept(INodeVisitor &v) override;
+ static constexpr NodeType node_type = NodeType::FullyConnectedLayer;
+
private:
unsigned int _num_outputs;
QuantizationInfo _out_quant_info;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
index f453879fd8..564135eed8 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -68,24 +68,25 @@ public:
* @param[in] result_offset Offset to be added to each element of the input matrix
* @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0);
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
@@ -128,25 +129,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
@@ -189,25 +190,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat on OpenCL.
@@ -230,24 +231,25 @@ public:
* @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
* @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix
* @param[in] offset Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = 0, int max = 0);
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
*
@@ -288,24 +290,25 @@ public:
* @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
*
* @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */ \ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index ca2cbbc268..283b052917 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -68,24 +68,25 @@ public:
* @param[in] result_offset Offset to be added to each element of the input matrix
* @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale
*
* @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
@@ -128,24 +129,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON.
*
@@ -187,24 +189,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON.
*
@@ -245,24 +248,25 @@ public:
* @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON.
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
index 5550003f33..b9563553b8 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -113,22 +113,9 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
ARM_COMPUTE_RETURN_ERROR_ON(output_stage.output_data_type != output->data_type());
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mm_result, output);
- PixelValue min_val{};
- PixelValue max_val{};
- std::tie(min_val, max_val) = get_min_max(output->data_type());
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get<int32_t>());
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get<int32_t>() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
- }
- else
- {
- // Output will be configured as depending on the chosen output data type in the output stage
- PixelValue min_val{};
- PixelValue max_val{};
- std::tie(min_val, max_val) = get_min_max(output_stage.output_data_type);
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get<int32_t>());
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get<int32_t>() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
}
+ ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_stage.gemmlowp_multipliers.size() != output_stage.gemmlowp_shifts.size(), "per channel quantization info is incorrect");
return Status{};
@@ -248,8 +235,8 @@ void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *m
PixelValue min_val{};
PixelValue max_val{};
std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
- build_opts.add_option_if((min != min_val.get<int32_t>()) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
- build_opts.add_option_if((max != max_val.get<int32_t>()) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+ build_opts.add_option_if((min > min_val.get<int32_t>()), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+ build_opts.add_option_if((max < max_val.get<int32_t>()), "-DMAX_BOUND=" + support::cpp11::to_string(max));
std::string kernel_name("gemmlowp_offset_contribution");
kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type);
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
index 8720123366..1bc7fe3946 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,8 +45,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 32767);
- ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -137,8 +136,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const
CLBuildOptions build_opts;
build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier));
build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
- build_opts.add_option_if((min != -32768) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
- build_opts.add_option_if((max != 32767) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+ build_opts.add_option_if((min > -32768), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+ build_opts.add_option_if((max < 32767), "-DMAX_BOUND=" + support::cpp11::to_string(max));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
index 8a5ce9fa87..e207fcb1b0 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
@@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 127);
- ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I
build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier));
build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
- build_opts.add_option_if((min != -128) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
- build_opts.add_option_if((max != 127) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+ build_opts.add_option_if((min > -128), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+ build_opts.add_option_if((max < 127), "-DMAX_BOUND=" + support::cpp11::to_string(max));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
index dc04fed96e..7601d7ee77 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
- ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const
build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier));
build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
- build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
- build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+ build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+ build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
index ae096f295c..dd1be748f5 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,8 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
- ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -132,8 +131,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::configure(const ICLTe
CLBuildOptions build_opts;
build_opts.add_option("-DREAL_MULTIPLIER=" + float_to_string_with_full_precision(multiplier));
build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(offset));
- build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
- build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+ build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+ build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 8175f60275..7a22239a7c 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,8 +41,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
- ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -135,8 +134,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i
build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(result_offset));
build_opts.add_option("-DRESULT_MULT_INT=" + support::cpp11::to_string(result_mult_int));
build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
- build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
- build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+ build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+ build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
index 5d2df6d2c9..31414e3f3f 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -778,15 +778,8 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(mm_result, 1, DataType::S32);
- if(output->data_type() == DataType::QASYMM8)
+ if(output->data_type() != DataType::QASYMM8)
{
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 255);
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < 0);
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 127);
- ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < -128);
ARM_COMPUTE_RETURN_ERROR_ON(mm_result->dimension(0) > 1 && output_stage.gemmlowp_multipliers.size() > 1 && b_offset != 0);
}
ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
@@ -914,7 +907,7 @@ get_configured_function(const ITensor *mm_result, const ITensor *vector_sum_row,
std::tie(type_min, type_max) = get_min_max(output->info()->data_type());
int32_t type_min_int = type_min.get<int32_t>();
int32_t type_max_int = type_max.get<int32_t>();
- const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound == type_min_int && output_stage.gemmlowp_max_bound == type_max_int);
+ const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound <= type_min_int && output_stage.gemmlowp_max_bound >= type_max_int);
// Check if we need to perform fixed point requantization
const bool is_fixed_point = output_stage.type != GEMMLowpOutputStageType::QUANTIZE_DOWN;
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
index bc513e6618..058007139d 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,8 +46,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 32767);
- ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -213,7 +212,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const
INEKernel::configure(win_config.second);
// Check if we need to clamp the result using min and max
- const bool is_bounded_relu = ((min != max) && !(min == -32768 && max == 32767));
+ const bool is_bounded_relu = !(min <= -32768 && max >= 32767);
_func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run<false>;
}
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
index d24089d615..b8ca17ec3d 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,8 +46,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 127);
- ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -222,7 +221,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I
INEKernel::configure(win_config.second);
// Check if we need to clamp the result using min and max
- const bool is_bounded_relu = ((min != max) && !(min == -128 && max == 127));
+ const bool is_bounded_relu = !(min <= -128 && max >= 127);
_func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<false>;
}
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
index bb0b86404e..4a9d2f7481 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,8 +46,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
- ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -224,7 +223,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const
INEKernel::configure(win_config.second);
// Check if we need to clamp the result using min and max
- const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255));
+ const bool is_bounded_relu = !(min <= 0 && max >= 255);
_func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run<false>;
}
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index a221bd7925..a68e4e7efb 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,8 +43,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
- ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+ ARM_COMPUTE_RETURN_ERROR_ON(min > max);
// Check biases if exist
if(bias != nullptr)
@@ -324,7 +323,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp
INEKernel::configure(win_config.second);
// Check if we need to clamp the result using min and max
- const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255));
+ const bool is_bounded_relu = !(min <= 0 && max >= 255);
_func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run<false>;
}
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index b7f081dc42..151a8bfa03 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -301,6 +301,10 @@ void NodeFusionMutator::mutate(Graph &g)
{
return true;
};
+ auto cl_target_prec = [](INode & n)
+ {
+ return n.assigned_target() == Target::CL;
+ };
auto qs8_prec = [&g](INode & n)
{
ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
@@ -318,6 +322,7 @@ void NodeFusionMutator::mutate(Graph &g)
detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
+ detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
}
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index 80fce7b8a1..34c432a1ce 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,6 +38,11 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant
_outputs.resize(1, NullTensorID);
}
+void FullyConnectedLayerNode::set_fused_activation(ActivationLayerInfo fused_activation)
+{
+ _info.activation_info = fused_activation;
+}
+
TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
unsigned int num_outputs,
FullyConnectedLayerInfo fc_info,
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index dcaa12645e..9b7de8df1b 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -41,7 +41,7 @@ using namespace arm_compute::utils::cast;
namespace
{
Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output,
- GEMMLowpOutputStageInfo &gemmlowp_output_stage)
+ GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
{
gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
gemmlowp_output_stage.gemmlowp_offset = 0;
@@ -53,13 +53,14 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
// Configure output stage for quantized case
if(is_data_type_quantized_asymmetric(data_type))
{
- const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
- const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output.quantization_info().uniform();
+ const QuantizationInfo oq_info = output.quantization_info();
+ const UniformQuantizationInfo iq_unif = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
+ const UniformQuantizationInfo oq_unif = oq_info.uniform();
- const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info;
+ const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif;
- const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
+ const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
int output_multiplier = 0;
int output_shift = 0;
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
@@ -68,6 +69,27 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
PixelValue type_max{};
std::tie(type_min, type_max) = get_min_max(data_type);
+ if(activation_info.enabled())
+ {
+ switch(activation_info.activation())
+ {
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ type_min = PixelValue(oq_unif.offset);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ type_min = PixelValue(oq_unif.offset);
+ type_max = PixelValue(activation_info.a(), data_type, oq_info);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ type_min = PixelValue(activation_info.b(), data_type, oq_info);
+ type_max = PixelValue(activation_info.a(), data_type, oq_info);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Activation function not supported.");
+ break;
+ }
+ }
+
// Set the GEMMLowp output stage info
gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
@@ -84,7 +106,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
- ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+ ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info));
const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
false, // is_b_reshaped
@@ -144,7 +166,7 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem
void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
- construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage);
+ construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info);
const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
false, // is_b_reshaped
@@ -155,7 +177,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
gemmlowp_output_stage, // gemmlowp_output_stage
fc_info.fp_mixed_precision, // fp_mixed_precision
true, // broadcast_bias
- ActivationLayerInfo()); // activation_info
+ fc_info.activation_info); // activation_info
if(_is_quantized)
{
@@ -313,6 +335,8 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU
+ && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
bool is_fc_after_conv = true;
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 682812b1c8..5398050533 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -333,8 +333,12 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0];
- int min_activation = 0;
- int max_activation = 0;
+ PixelValue min_val{};
+ PixelValue max_val{};
+ std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
+
+ auto min_activation = min_val.get<int32_t>();
+ auto max_activation = max_val.get<int32_t>();
const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index e57dd4e7b1..357d77d03a 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -67,6 +67,23 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo",
QuantizationInfo(1.f / 255.f, 10),
QuantizationInfo(1.1f, 10),
});
+
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)
+});
+
+const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)
+});
} // namespace
TEST_SUITE(CL)
@@ -174,16 +191,18 @@ using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTens
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)))
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)))
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -191,14 +210,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
@@ -212,13 +233,15 @@ using CLFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuant
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData))
+ combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+ ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData))
+ combine(combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+ ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -226,7 +249,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>,
TEST_SUITE_END() /* QASYMM8 */
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData))
+ combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData),
+ ActivationFunctionsQuantizedDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index eb42c4c659..94621b4393 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -150,7 +150,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8Scale)
const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
3)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
2)
@@ -229,7 +229,7 @@ TEST_SUITE_END() // QuantizeDownInt32ToUint8Scale
TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
@@ -310,7 +310,7 @@ TEST_SUITE_END() // BoundedReLu
TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint
TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint)
const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
* framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128, -126) * framework::dataset::make("max", 110, 112) * framework::dataset::make("addBias", { false, true });
@@ -379,7 +379,7 @@ TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint)
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
@@ -389,7 +389,7 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = f
1073741825)
* framework::dataset::make("result_shift", -3,
-2)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
254601602)
@@ -404,26 +404,21 @@ using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Wrong output data type
}),
framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
})),
framework::dataset::make("Min",{ -205,
- -60000,
-180,
})),
framework::dataset::make("Max",{ 205,
- 60000,
180,
})),
- framework::dataset::make("Expected", { true, false, false })),
+ framework::dataset::make("Expected", { true, false })),
a_info, b_info, output_info, min, max, expected)
{
// Lock tensors
diff --git a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
index 4c7ef81572..1ef2fb9559 100644
--- a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,7 +56,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
DataType::F32,
});
-const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", ActivationLayerInfo());
} // namespace
TEST_SUITE(GC)
@@ -107,16 +108,18 @@ using GCFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<GCTens
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)))
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)))
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -124,14 +127,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>,
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(GCAccessor(_target), _reference, rel_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(GCAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index fae116aa9f..f66b0ceb8a 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -71,6 +71,8 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo",
QuantizationInfo(1.f / 256.f, 10),
QuantizationInfo(1.1f, 10),
});
+
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", ActivationLayerInfo());
} // namespace
TEST_SUITE(NEON)
@@ -179,16 +181,18 @@ using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor
TEST_SUITE(Float)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)))
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F16)))
+ framework::dataset::make("DataType", DataType::F16)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
@@ -197,14 +201,16 @@ TEST_SUITE_END()
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
- framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+ framework::dataset::make("DataType", DataType::F32)),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
@@ -217,31 +223,34 @@ using NEFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuant
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
combine(datasets::SmallFullyConnectedLayerDataset(),
FullyConnectedParameters),
framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData))
+ QuantizationData),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(
combine(datasets::LargeFullyConnectedLayerDataset(),
FullyConnectedParameters),
framework::dataset::make("DataType", DataType::QASYMM8)),
- QuantizationData))
+ QuantizationData),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE_END()
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
combine(datasets::SmallFullyConnectedLayerDataset(),
FullyConnectedParameters),
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- QuantizationData))
+ QuantizationData),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 10f2284914..de30bd5451 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -169,7 +169,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8Scale)
const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
3)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
2)
@@ -181,26 +181,21 @@ using NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture = GEMMLowpQuantizeDownInt32
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
}),
framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
TensorInfo(TensorShape(20U), 1, DataType::S32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
})),
framework::dataset::make("Min",{ 0,
- 8,
13,
})),
framework::dataset::make("Max",{ 205,
- 300,
180,
})),
- framework::dataset::make("Expected", { true, false, false })),
+ framework::dataset::make("Expected", { true, false })),
a_info, b_info, output_info, min, max, expected)
{
// Lock tensors
@@ -287,7 +282,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
@@ -303,26 +298,21 @@ using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
}),
framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
TensorInfo(TensorShape(20U), 1, DataType::S32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
})),
framework::dataset::make("Min",{ 0,
- 8,
13,
})),
framework::dataset::make("Max",{ 205,
- 300,
180,
})),
- framework::dataset::make("Expected", { true, false, false })),
+ framework::dataset::make("Expected", { true, false })),
a_info, b_info, output_info, min, max, expected)
{
// Lock tensors
@@ -414,7 +404,7 @@ TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint)
const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
- * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
@@ -427,31 +417,26 @@ using NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture =
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::F32), // Invalid input data type
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),
}),
framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
TensorInfo(TensorShape(20U), 1, DataType::S32),
TensorInfo(TensorShape(21U), 1, DataType::S32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
})),
framework::dataset::make("Min",{ -110,
- -130,
-113,
-113,
})),
framework::dataset::make("Max",{ 87,
- 140,
97,
97,
})),
- framework::dataset::make("Expected", { false, false, false, true })),
+ framework::dataset::make("Expected", { false, false, true })),
a_info, b_info, output_info, min, max, expected)
{
// Lock tensors
@@ -527,7 +512,7 @@ TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint)
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
2)
@@ -536,7 +521,7 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = f
1073741825)
* framework::dataset::make("result_shift", -3,
-2)
- * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+ * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
254601602)
@@ -551,26 +536,21 @@ using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
- TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
}),
framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
- TensorInfo(TensorShape(21U), 1, DataType::S32),
TensorInfo(TensorShape(20U), 1, DataType::S32),
})),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
- TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
})),
framework::dataset::make("Min",{ -205,
- -60000,
-180,
})),
framework::dataset::make("Max",{ 205,
- 60000,
180,
})),
- framework::dataset::make("Expected", { true, false, false })),
+ framework::dataset::make("Expected", { true, false })),
a_info, b_info, output_info, min, max, expected)
{
// Lock tensors
diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h
index 7f0ceadea1..6952b226da 100644
--- a/tests/validation/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h
@@ -34,6 +34,7 @@
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
#include "tests/validation/reference/FullyConnectedLayer.h"
#include "tests/validation/reference/Utils.h"
@@ -55,7 +56,7 @@ public:
public:
template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights,
- DataType data_type, QuantizationInfo quantization_info)
+ DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info)
{
ARM_COMPUTE_UNUSED(weights_shape);
ARM_COMPUTE_UNUSED(bias_shape);
@@ -63,6 +64,7 @@ public:
_data_type = data_type;
_bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
_quantization_info = quantization_info;
+ _activation_info = activation_info;
_target = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights);
_reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape);
@@ -130,6 +132,7 @@ protected:
FullyConnectedLayerInfo fc_info;
fc_info.transpose_weights = transpose_weights;
fc_info.are_weights_reshaped = !reshape_weights;
+ fc_info.activation_info = _activation_info;
// Create and configure function.
FunctionType fc;
@@ -199,14 +202,15 @@ protected:
fill(weights, 1);
fill(bias, 2);
- return reference::fully_connected_layer<T>(src, weights, bias, output_shape);
+ return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape), _activation_info, _quantization_info);
}
- TensorType _target{};
- SimpleTensor<T> _reference{};
- DataType _data_type{};
- DataType _bias_data_type{};
- QuantizationInfo _quantization_info{};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ DataType _data_type{};
+ DataType _bias_data_type{};
+ QuantizationInfo _quantization_info{};
+ ActivationLayerInfo _activation_info{};
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -214,11 +218,12 @@ class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidatio
{
public:
template <typename...>
- void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type)
+ void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
+ ActivationLayerInfo activation_info)
{
FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
reshape_weights, data_type,
- QuantizationInfo());
+ QuantizationInfo(), activation_info);
}
};
@@ -228,11 +233,11 @@ class FullyConnectedLayerValidationQuantizedFixture : public FullyConnectedLayer
public:
template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
- QuantizationInfo quantization_info)
+ QuantizationInfo quantization_info, ActivationLayerInfo activation_info)
{
FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
reshape_weights, data_type,
- quantization_info);
+ quantization_info, activation_info);
}
};
} // namespace validation