From 1856ff7ebb29e04c3549b74d7ced336111cbf05e Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 7 Feb 2020 13:46:45 +0000 Subject: COMPMID-3097 Fuse activation with fully connected layer CL Change-Id: I447030e69b9e565f2f81529a41af8c5e7ece7ecf Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2702 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/core/PixelValue.h | 18 ++--- arm_compute/core/Types.h | 87 +++++++++++----------- arm_compute/graph/nodes/FullyConnectedLayerNode.h | 9 ++- .../runtime/CL/functions/CLGEMMLowpOutputStage.h | 65 ++++++++-------- .../runtime/NEON/functions/NEGEMMLowpOutputStage.h | 52 +++++++------ ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 19 +---- ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 9 +-- ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 7 +- ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 7 +- ...pQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp | 9 +-- ...GEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp | 9 +-- ...GEMMLowpOffsetContributionOutputStageKernel.cpp | 11 +-- ...tizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 7 +- ...ntizeDownInt32ToInt8ScaleByFixedPointKernel.cpp | 7 +- ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 7 +- ...GEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp | 7 +- src/graph/mutators/NodeFusionMutator.cpp | 7 +- src/graph/nodes/FullyConnectedLayer.cpp | 7 +- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 42 ++++++++--- .../CL/functions/CLGEMMConvolutionLayer.cpp | 8 +- tests/validation/CL/FullyConnectedLayer.cpp | 46 +++++++++--- tests/validation/CL/GEMMLowp.cpp | 17 ++--- .../GLES_COMPUTE/FullyConnectedLayer.cpp | 25 ++++--- tests/validation/NEON/FullyConnectedLayer.cpp | 37 +++++---- tests/validation/NEON/GEMMLowp.cpp | 38 +++------- .../fixtures/FullyConnectedLayerFixture.h | 27 ++++--- 26 files changed, 312 insertions(+), 272 deletions(-) diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index c5f6608163..31bc55098a 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -41,11 +41,11 @@ public: } /** Initialize the union with a pixel value of chosen datatype * - * @param[in] v int value. + * @param[in] v value. * @param[in] datatype DataType that @p v have to be stored * @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v */ - PixelValue(int64_t v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) + PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue() { switch(datatype) @@ -57,13 +57,13 @@ public: value.s8 = static_cast(v); break; case DataType::QASYMM8: - value.u8 = quantize_qasymm8(static_cast(v), qinfo); + value.u8 = quantize_qasymm8(static_cast(v), qinfo); break; case DataType::QASYMM8_SIGNED: - value.s8 = quantize_qasymm8_signed(static_cast(v), qinfo); + value.s8 = quantize_qasymm8_signed(static_cast(v), qinfo); break; case DataType::QSYMM8: - value.s8 = quantize_qsymm8(static_cast(v), qinfo); + value.s8 = quantize_qsymm8(static_cast(v), qinfo); break; case DataType::U16: value.u16 = static_cast(v); @@ -72,10 +72,10 @@ public: value.s16 = static_cast(v); break; case DataType::QASYMM16: - value.u16 = quantize_qasymm16(static_cast(v), qinfo); + value.u16 = quantize_qasymm16(static_cast(v), qinfo); break; case DataType::QSYMM16: - value.s16 = quantize_qsymm16(static_cast(v), qinfo); + value.s16 = quantize_qsymm16(static_cast(v), qinfo); break; case DataType::U32: value.u32 = static_cast(v); @@ -96,10 +96,8 @@ public: value.f32 = static_cast(v); break; case DataType::F64: - value.f64 = static_cast(v); - break; default: - value.s64 = v; + value.f64 = v; break; } } diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 2030b171c6..cf689d757c 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -799,39 +799,6 @@ private: DimensionRoundingType _round_type; }; -/** Fully connected layer info */ -struct FullyConnectedLayerInfo -{ - DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */ - bool transpose_weights{ true }; /**< Transpose weights if true. */ - bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */ - bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */ - bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */ - - /** Sets the weights trained data layout - * - * @param[in] layout Data layout that the weights were trained with - * - * @return Updated object - */ - FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout) - { - weights_trained_layout = layout; - return *this; - } - /** Sets the transpose weights flag - * - * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed - * - * @return Updated object - */ - FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights) - { - transpose_weights = should_transpose_weights; - return *this; - } -}; - /** PriorBox layer info */ class PriorBoxLayerInfo final { @@ -1674,6 +1641,40 @@ private: bool _enabled = { false }; }; +/** Fully connected layer info */ +struct FullyConnectedLayerInfo +{ + DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */ + bool transpose_weights{ true }; /**< Transpose weights if true. */ + bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */ + bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */ + bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */ + ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */ + + /** Sets the weights trained data layout + * + * @param[in] layout Data layout that the weights were trained with + * + * @return Updated object + */ + FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout) + { + weights_trained_layout = layout; + return *this; + } + /** Sets the transpose weights flag + * + * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed + * + * @return Updated object + */ + FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights) + { + transpose_weights = should_transpose_weights; + return *this; + } +}; + /** Normalization Layer Information class */ class NormalizationLayerInfo { @@ -1944,16 +1945,16 @@ enum class GEMMLowpOutputStageType /** GEMMLowp output stage info */ struct GEMMLowpOutputStageInfo { - GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */ - int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ - int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ - int32_t gemmlowp_min_bound{ 0 }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ - int32_t gemmlowp_max_bound{ 0 }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ - std::vector gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - std::vector gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ - DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ + GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */ + int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ + int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ + int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ + int32_t gemmlowp_min_bound{ std::numeric_limits::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ + int32_t gemmlowp_max_bound{ std::numeric_limits::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ + std::vector gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ + std::vector gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ + bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ + DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ }; /** GEMM LHS (Left Hand Side) matrix information */ diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h index 41a7bc9946..10c310dda2 100644 --- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h +++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,11 @@ public: FullyConnectedLayerNode(unsigned int num_outputs, QuantizationInfo out_quant_info = QuantizationInfo(), FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); + /** Sets fused activation + * + * @param[in] fused_activation Fused activation to set + */ + void set_fused_activation(ActivationLayerInfo fused_activation); /** Computes weights descriptor * * @warning Works for inputs with 1D batch space @@ -83,6 +88,8 @@ public: TensorDescriptor configure_output(size_t idx) const override; void accept(INodeVisitor &v) override; + static constexpr NodeType node_type = NodeType::FullyConnectedLayer; + private: unsigned int _num_outputs; QuantizationInfo _out_quant_info; diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h index f453879fd8..564135eed8 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,24 +68,25 @@ public: * @param[in] result_offset Offset to be added to each element of the input matrix * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0); + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits::lowest(), + int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL. @@ -128,25 +129,25 @@ public: * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = 0, int max = 0); + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL. @@ -189,25 +190,25 @@ public: * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0 - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = 0, int max = 0); + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0 - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat on OpenCL. @@ -230,24 +231,25 @@ public: * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 * @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix * @param[in] offset Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = 0, int max = 0); + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits::lowest(), + int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL. * @@ -288,24 +290,25 @@ public: * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); + void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits::lowest(), + int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint * * @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index ca2cbbc268..283b052917 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -68,24 +68,25 @@ public: * @param[in] result_offset Offset to be added to each element of the input matrix * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0); + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits::lowest(), + int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale * * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON. @@ -128,24 +129,25 @@ public: * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON. * @@ -187,24 +189,25 @@ public: * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, + int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint * * @param[in] input Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON. * @@ -245,24 +248,25 @@ public: * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); + void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits::lowest(), + int max = std::numeric_limits::max()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint * * @param[in] input Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32 * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. + * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer. * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. + * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits::lowest(), int max = std::numeric_limits::max()); }; /** Basic function to execute GEMMLowpQuantizeDown kernels on NEON. diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp index 5550003f33..b9563553b8 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -113,22 +113,9 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto ARM_COMPUTE_RETURN_ERROR_ON(output_stage.output_data_type != output->data_type()); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mm_result, output); - PixelValue min_val{}; - PixelValue max_val{}; - std::tie(min_val, max_val) = get_min_max(output->data_type()); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get()); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); - } - else - { - // Output will be configured as depending on the chosen output data type in the output stage - PixelValue min_val{}; - PixelValue max_val{}; - std::tie(min_val, max_val) = get_min_max(output_stage.output_data_type); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get()); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); } + ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_stage.gemmlowp_multipliers.size() != output_stage.gemmlowp_shifts.size(), "per channel quantization info is incorrect"); return Status{}; @@ -248,8 +235,8 @@ void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *m PixelValue min_val{}; PixelValue max_val{}; std::tie(min_val, max_val) = get_min_max(output->info()->data_type()); - build_opts.add_option_if((min != min_val.get()) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != max_val.get()) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > min_val.get()), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < max_val.get()), "-DMAX_BOUND=" + support::cpp11::to_string(max)); std::string kernel_name("gemmlowp_offset_contribution"); kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type); diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index 8720123366..1bc7fe3946 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,8 +45,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 32767); - ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -137,8 +136,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const CLBuildOptions build_opts; build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); - build_opts.add_option_if((min != -32768) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 32767) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > -32768), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 32767), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 8a5ce9fa87..e207fcb1b0 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 127); - ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option_if((min != -128) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 127) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > -128), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 127), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index dc04fed96e..7601d7ee77 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp index ae096f295c..dd1be748f5 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,8 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -132,8 +131,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::configure(const ICLTe CLBuildOptions build_opts; build_opts.add_option("-DREAL_MULTIPLIER=" + float_to_string_with_full_precision(multiplier)); build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(offset)); - build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index 8175f60275..7a22239a7c 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,8 +41,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -135,8 +134,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(result_offset)); build_opts.add_option("-DRESULT_MULT_INT=" + support::cpp11::to_string(result_mult_int)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); - build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp index 5d2df6d2c9..31414e3f3f 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -778,15 +778,8 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(mm_result, 1, DataType::S32); - if(output->data_type() == DataType::QASYMM8) + if(output->data_type() != DataType::QASYMM8) { - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 255); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < 0); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 127); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < -128); ARM_COMPUTE_RETURN_ERROR_ON(mm_result->dimension(0) > 1 && output_stage.gemmlowp_multipliers.size() > 1 && b_offset != 0); } ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); @@ -914,7 +907,7 @@ get_configured_function(const ITensor *mm_result, const ITensor *vector_sum_row, std::tie(type_min, type_max) = get_min_max(output->info()->data_type()); int32_t type_min_int = type_min.get(); int32_t type_max_int = type_max.get(); - const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound == type_min_int && output_stage.gemmlowp_max_bound == type_max_int); + const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound <= type_min_int && output_stage.gemmlowp_max_bound >= type_max_int); // Check if we need to perform fixed point requantization const bool is_fixed_point = output_stage.type != GEMMLowpOutputStageType::QUANTIZE_DOWN; diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index bc513e6618..058007139d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,8 +46,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 32767); - ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -213,7 +212,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == -32768 && max == 32767)); + const bool is_bounded_relu = !(min <= -32768 && max >= 32767); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run : &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run; } diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index d24089d615..b8ca17ec3d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,8 +46,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 127); - ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -222,7 +221,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == -128 && max == 127)); + const bool is_bounded_relu = !(min <= -128 && max >= 127); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run : &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run; } diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index bb0b86404e..4a9d2f7481 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,8 +46,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -224,7 +223,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255)); + const bool is_bounded_relu = !(min <= 0 && max >= 255); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run; } diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index a221bd7925..a68e4e7efb 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,8 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -324,7 +323,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255)); + const bool is_bounded_relu = !(min <= 0 && max >= 255); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run; } diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index b7f081dc42..151a8bfa03 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -301,6 +301,10 @@ void NodeFusionMutator::mutate(Graph &g) { return true; }; + auto cl_target_prec = [](INode & n) + { + return n.assigned_target() == Target::CL; + }; auto qs8_prec = [&g](INode & n) { ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr); @@ -318,6 +322,7 @@ void NodeFusionMutator::mutate(Graph &g) detail::fuse_layer(g, empty_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, empty_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, qs8_prec, detail::fuse_node_with_activation, supported_fused_activations); + detail::fuse_layer(g, cl_target_prec, detail::fuse_node_with_activation, supported_fused_activations); detail::fuse_layer(g, empty_prec, detail::fuse_convolution_with_batch_normalization); detail::fuse_layer(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); } diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp index 80fce7b8a1..34c432a1ce 100644 --- a/src/graph/nodes/FullyConnectedLayer.cpp +++ b/src/graph/nodes/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,11 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant _outputs.resize(1, NullTensorID); } +void FullyConnectedLayerNode::set_fused_activation(ActivationLayerInfo fused_activation) +{ + _info.activation_info = fused_activation; +} + TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor, unsigned int num_outputs, FullyConnectedLayerInfo fc_info, diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index dcaa12645e..9b7de8df1b 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -41,7 +41,7 @@ using namespace arm_compute::utils::cast; namespace { Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output, - GEMMLowpOutputStageInfo &gemmlowp_output_stage) + GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info) { gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; gemmlowp_output_stage.gemmlowp_offset = 0; @@ -53,13 +53,14 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn // Configure output stage for quantized case if(is_data_type_quantized_asymmetric(data_type)) { - const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output.quantization_info().uniform(); + const QuantizationInfo oq_info = output.quantization_info(); + const UniformQuantizationInfo iq_unif = input.quantization_info().uniform(); + const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform(); + const UniformQuantizationInfo oq_unif = oq_info.uniform(); - const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info; + const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif; - const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; + const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); @@ -68,6 +69,27 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn PixelValue type_max{}; std::tie(type_min, type_max) = get_min_max(data_type); + if(activation_info.enabled()) + { + switch(activation_info.activation()) + { + case ActivationLayerInfo::ActivationFunction::RELU: + type_min = PixelValue(oq_unif.offset); + break; + case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: + type_min = PixelValue(oq_unif.offset); + type_max = PixelValue(activation_info.a(), data_type, oq_info); + break; + case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: + type_min = PixelValue(activation_info.b(), data_type, oq_info); + type_max = PixelValue(activation_info.a(), data_type, oq_info); + break; + default: + ARM_COMPUTE_ERROR("Activation function not supported."); + break; + } + } + // Set the GEMMLowp output stage info gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; @@ -84,7 +106,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info) { GEMMLowpOutputStageInfo gemmlowp_output_stage; - ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage)); + ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info)); const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped false, // is_b_reshaped @@ -144,7 +166,7 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr mem void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info) { GEMMLowpOutputStageInfo gemmlowp_output_stage; - construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage); + construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info); const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped false, // is_b_reshaped @@ -155,7 +177,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor gemmlowp_output_stage, // gemmlowp_output_stage fc_info.fp_mixed_precision, // fp_mixed_precision true, // broadcast_bias - ActivationLayerInfo()); // activation_info + fc_info.activation_info); // activation_info if(_is_quantized) { @@ -313,6 +335,8 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU + && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; bool is_fc_after_conv = true; diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 682812b1c8..5398050533 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -333,8 +333,12 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0]; gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0]; - int min_activation = 0; - int max_activation = 0; + PixelValue min_val{}; + PixelValue max_val{}; + std::tie(min_val, max_val) = get_min_max(output->info()->data_type()); + + auto min_activation = min_val.get(); + auto max_activation = max_val.get(); const std::set supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp index e57dd4e7b1..357d77d03a 100644 --- a/tests/validation/CL/FullyConnectedLayer.cpp +++ b/tests/validation/CL/FullyConnectedLayer.cpp @@ -67,6 +67,23 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo", QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(1.1f, 10), }); + +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH) +}); + +const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f) +}); } // namespace TEST_SUITE(CL) @@ -174,16 +191,18 @@ using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -191,14 +210,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture, framework:: TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); @@ -212,13 +233,15 @@ using CLFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuant TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData)) + combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData), + ActivationFunctionsQuantizedDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData)) + combine(combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData), + ActivationFunctionsQuantizedDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); @@ -226,7 +249,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture, TEST_SUITE_END() /* QASYMM8 */ TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData)) + combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData), + ActivationFunctionsQuantizedDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index eb42c4c659..94621b4393 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -150,7 +150,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8Scale) const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, 3) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) @@ -229,7 +229,7 @@ TEST_SUITE_END() // QuantizeDownInt32ToUint8Scale TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint) const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) @@ -310,7 +310,7 @@ TEST_SUITE_END() // BoundedReLu TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint) const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128, -126) * framework::dataset::make("max", 110, 112) * framework::dataset::make("addBias", { false, true }); @@ -379,7 +379,7 @@ TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint) const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) @@ -389,7 +389,7 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = f 1073741825) * framework::dataset::make("result_shift", -3, -2) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) @@ -404,26 +404,21 @@ using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Wrong output data type }), framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16), TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), })), framework::dataset::make("Min",{ -205, - -60000, -180, })), framework::dataset::make("Max",{ 205, - 60000, 180, })), - framework::dataset::make("Expected", { true, false, false })), + framework::dataset::make("Expected", { true, false })), a_info, b_info, output_info, min, max, expected) { // Lock tensors diff --git a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp index 4c7ef81572..1ef2fb9559 100644 --- a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp +++ b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,7 +56,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true })); +const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true })); +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", ActivationLayerInfo()); } // namespace TEST_SUITE(GC) @@ -107,16 +108,18 @@ using GCFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) { // Validate output validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) { // Validate output validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -124,14 +127,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) { // Validate output validate(GCAccessor(_target), _reference, rel_tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) { // Validate output validate(GCAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp index fae116aa9f..f66b0ceb8a 100644 --- a/tests/validation/NEON/FullyConnectedLayer.cpp +++ b/tests/validation/NEON/FullyConnectedLayer.cpp @@ -71,6 +71,8 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo", QuantizationInfo(1.f / 256.f, 10), QuantizationInfo(1.1f, 10), }); + +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", ActivationLayerInfo()); } // namespace TEST_SUITE(NEON) @@ -179,16 +181,18 @@ using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); @@ -197,14 +201,16 @@ TEST_SUITE_END() #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); @@ -217,31 +223,34 @@ using NEFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuant TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine( +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine( combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), - QuantizationData)) + QuantizationData), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine( +FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine( combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), - QuantizationData)) + QuantizationData), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine( +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine( combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - QuantizationData)) + QuantizationData), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 10f2284914..de30bd5451 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -169,7 +169,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8Scale) const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, 3) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) @@ -181,26 +181,21 @@ using NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture = GEMMLowpQuantizeDownInt32 // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type }), framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), TensorInfo(TensorShape(20U), 1, DataType::S32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8), TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), })), framework::dataset::make("Min",{ 0, - 8, 13, })), framework::dataset::make("Max",{ 205, - 300, 180, })), - framework::dataset::make("Expected", { true, false, false })), + framework::dataset::make("Expected", { true, false })), a_info, b_info, output_info, min, max, expected) { // Lock tensors @@ -287,7 +282,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint) const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) @@ -303,26 +298,21 @@ using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type }), framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), TensorInfo(TensorShape(20U), 1, DataType::S32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8), TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), })), framework::dataset::make("Min",{ 0, - 8, 13, })), framework::dataset::make("Max",{ 205, - 300, 180, })), - framework::dataset::make("Expected", { true, false, false })), + framework::dataset::make("Expected", { true, false })), a_info, b_info, output_info, min, max, expected) { // Lock tensors @@ -414,7 +404,7 @@ TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint) const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) @@ -427,31 +417,26 @@ using NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture = // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::F32), // Invalid input data type - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), }), framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), TensorInfo(TensorShape(20U), 1, DataType::S32), TensorInfo(TensorShape(21U), 1, DataType::S32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), })), framework::dataset::make("Min",{ -110, - -130, -113, -113, })), framework::dataset::make("Max",{ 87, - 140, 97, 97, })), - framework::dataset::make("Expected", { false, false, false, true })), + framework::dataset::make("Expected", { false, false, true })), a_info, b_info, output_info, min, max, expected) { // Lock tensors @@ -527,7 +512,7 @@ TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint) const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) @@ -536,7 +521,7 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = f 1073741825) * framework::dataset::make("result_shift", -3, -2) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); + * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) @@ -551,26 +536,21 @@ using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type }), framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), TensorInfo(TensorShape(20U), 1, DataType::S32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16), TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), })), framework::dataset::make("Min",{ -205, - -60000, -180, })), framework::dataset::make("Max",{ 205, - 60000, 180, })), - framework::dataset::make("Expected", { true, false, false })), + framework::dataset::make("Expected", { true, false })), a_info, b_info, output_info, min, max, expected) { // Lock tensors diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h index 7f0ceadea1..6952b226da 100644 --- a/tests/validation/fixtures/FullyConnectedLayerFixture.h +++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h @@ -34,6 +34,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" +#include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/FullyConnectedLayer.h" #include "tests/validation/reference/Utils.h" @@ -55,7 +56,7 @@ public: public: template void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, - DataType data_type, QuantizationInfo quantization_info) + DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info) { ARM_COMPUTE_UNUSED(weights_shape); ARM_COMPUTE_UNUSED(bias_shape); @@ -63,6 +64,7 @@ public: _data_type = data_type; _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; _quantization_info = quantization_info; + _activation_info = activation_info; _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights); _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape); @@ -130,6 +132,7 @@ protected: FullyConnectedLayerInfo fc_info; fc_info.transpose_weights = transpose_weights; fc_info.are_weights_reshaped = !reshape_weights; + fc_info.activation_info = _activation_info; // Create and configure function. FunctionType fc; @@ -199,14 +202,15 @@ protected: fill(weights, 1); fill(bias, 2); - return reference::fully_connected_layer(src, weights, bias, output_shape); + return reference::activation_layer(reference::fully_connected_layer(src, weights, bias, output_shape), _activation_info, _quantization_info); } - TensorType _target{}; - SimpleTensor _reference{}; - DataType _data_type{}; - DataType _bias_data_type{}; - QuantizationInfo _quantization_info{}; + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type{}; + DataType _bias_data_type{}; + QuantizationInfo _quantization_info{}; + ActivationLayerInfo _activation_info{}; }; template @@ -214,11 +218,12 @@ class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidatio { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, + ActivationLayerInfo activation_info) { FullyConnectedLayerValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type, - QuantizationInfo()); + QuantizationInfo(), activation_info); } }; @@ -228,11 +233,11 @@ class FullyConnectedLayerValidationQuantizedFixture : public FullyConnectedLayer public: template void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, - QuantizationInfo quantization_info) + QuantizationInfo quantization_info, ActivationLayerInfo activation_info) { FullyConnectedLayerValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type, - quantization_info); + quantization_info, activation_info); } }; } // namespace validation -- cgit v1.2.1