aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-02-07 13:46:45 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-03-02 15:51:39 +0000
commit1856ff7ebb29e04c3549b74d7ced336111cbf05e (patch)
treec94654f0d8535930a81712bf7aadffd757c82577 /arm_compute
parent3c4bf0c4eab5ead756c472f17ddf008b882cc905 (diff)
downloadComputeLibrary-1856ff7ebb29e04c3549b74d7ced336111cbf05e.tar.gz
COMPMID-3097 Fuse activation with fully connected layer CL
Change-Id: I447030e69b9e565f2f81529a41af8c5e7ece7ecf Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2702 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/PixelValue.h18
-rw-r--r--arm_compute/core/Types.h87
-rw-r--r--arm_compute/graph/nodes/FullyConnectedLayerNode.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h65
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h52
5 files changed, 122 insertions, 109 deletions
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index c5f6608163..31bc55098a 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -41,11 +41,11 @@ public:
}
/** Initialize the union with a pixel value of chosen datatype
*
- * @param[in] v int value.
+ * @param[in] v value.
* @param[in] datatype DataType that @p v have to be stored
* @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v
*/
- PixelValue(int64_t v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
+ PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
: PixelValue()
{
switch(datatype)
@@ -57,13 +57,13 @@ public:
value.s8 = static_cast<int8_t>(v);
break;
case DataType::QASYMM8:
- value.u8 = quantize_qasymm8(static_cast<uint8_t>(v), qinfo);
+ value.u8 = quantize_qasymm8(static_cast<float>(v), qinfo);
break;
case DataType::QASYMM8_SIGNED:
- value.s8 = quantize_qasymm8_signed(static_cast<int8_t>(v), qinfo);
+ value.s8 = quantize_qasymm8_signed(static_cast<float>(v), qinfo);
break;
case DataType::QSYMM8:
- value.s8 = quantize_qsymm8(static_cast<int8_t>(v), qinfo);
+ value.s8 = quantize_qsymm8(static_cast<float>(v), qinfo);
break;
case DataType::U16:
value.u16 = static_cast<uint16_t>(v);
@@ -72,10 +72,10 @@ public:
value.s16 = static_cast<int16_t>(v);
break;
case DataType::QASYMM16:
- value.u16 = quantize_qasymm16(static_cast<uint16_t>(v), qinfo);
+ value.u16 = quantize_qasymm16(static_cast<float>(v), qinfo);
break;
case DataType::QSYMM16:
- value.s16 = quantize_qsymm16(static_cast<int16_t>(v), qinfo);
+ value.s16 = quantize_qsymm16(static_cast<float>(v), qinfo);
break;
case DataType::U32:
value.u32 = static_cast<uint32_t>(v);
@@ -96,10 +96,8 @@ public:
value.f32 = static_cast<float>(v);
break;
case DataType::F64:
- value.f64 = static_cast<double>(v);
- break;
default:
- value.s64 = v;
+ value.f64 = v;
break;
}
}
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 2030b171c6..cf689d757c 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -799,39 +799,6 @@ private:
DimensionRoundingType _round_type;
};
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
- DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
- bool transpose_weights{ true }; /**< Transpose weights if true. */
- bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
- bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
- bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
-
- /** Sets the weights trained data layout
- *
- * @param[in] layout Data layout that the weights were trained with
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
- {
- weights_trained_layout = layout;
- return *this;
- }
- /** Sets the transpose weights flag
- *
- * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
- {
- transpose_weights = should_transpose_weights;
- return *this;
- }
-};
-
/** PriorBox layer info */
class PriorBoxLayerInfo final
{
@@ -1674,6 +1641,40 @@ private:
bool _enabled = { false };
};
+/** Fully connected layer info */
+struct FullyConnectedLayerInfo
+{
+ DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
+ bool transpose_weights{ true }; /**< Transpose weights if true. */
+ bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
+ bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
+ bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
+ ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */
+
+ /** Sets the weights trained data layout
+ *
+ * @param[in] layout Data layout that the weights were trained with
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
+ {
+ weights_trained_layout = layout;
+ return *this;
+ }
+ /** Sets the transpose weights flag
+ *
+ * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
+ {
+ transpose_weights = should_transpose_weights;
+ return *this;
+ }
+};
+
/** Normalization Layer Information class */
class NormalizationLayerInfo
{
@@ -1944,16 +1945,16 @@ enum class GEMMLowpOutputStageType
/** GEMMLowp output stage info */
struct GEMMLowpOutputStageInfo
{
- GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
- int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
- int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
- int32_t gemmlowp_min_bound{ 0 }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
- int32_t gemmlowp_max_bound{ 0 }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
- std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+ GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
+ int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
+ int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
+ int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
+ int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
+ std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
+ DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
};
/** GEMM LHS (Left Hand Side) matrix information */
diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
index 41a7bc9946..10c310dda2 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,6 +43,11 @@ public:
FullyConnectedLayerNode(unsigned int num_outputs,
QuantizationInfo out_quant_info = QuantizationInfo(),
FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Sets fused activation
+ *
+ * @param[in] fused_activation Fused activation to set
+ */
+ void set_fused_activation(ActivationLayerInfo fused_activation);
/** Computes weights descriptor
*
* @warning Works for inputs with 1D batch space
@@ -83,6 +88,8 @@ public:
TensorDescriptor configure_output(size_t idx) const override;
void accept(INodeVisitor &v) override;
+ static constexpr NodeType node_type = NodeType::FullyConnectedLayer;
+
private:
unsigned int _num_outputs;
QuantizationInfo _out_quant_info;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
index f453879fd8..564135eed8 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -68,24 +68,25 @@ public:
* @param[in] result_offset Offset to be added to each element of the input matrix
* @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0);
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
@@ -128,25 +129,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
@@ -189,25 +190,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
- int min = 0, int max = 0);
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat on OpenCL.
@@ -230,24 +231,25 @@ public:
* @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
* @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix
* @param[in] offset Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = 0, int max = 0);
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
*
@@ -288,24 +290,25 @@ public:
* @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
*
* @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */ \ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index ca2cbbc268..283b052917 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -68,24 +68,25 @@ public:
* @param[in] result_offset Offset to be added to each element of the input matrix
* @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale
*
* @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
@@ -128,24 +129,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON.
*
@@ -187,24 +189,25 @@ public:
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
*
* @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON.
*
@@ -245,24 +248,25 @@ public:
* @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*/
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+ int max = std::numeric_limits<int32_t>::max());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
*
* @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
* @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
* @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON.