From b66aa3b0f0fd81ae4eb383734045a55351776c7e Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Fri, 10 Jan 2020 14:44:13 +0000 Subject: COMPMID-2759 add support for QASYMM8_SIGNED to CLFullyConnectedLayer Change-Id: I7092390b01a56065a442be0d14e2f9bfce2cdc9c Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/2583 Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h | 7 ++----- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 16 +++++++++++----- tests/validation/CL/FullyConnectedLayer.cpp | 15 ++++++++++++--- tests/validation/fixtures/FullyConnectedLayerFixture.h | 3 ++- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 39c3c2b85b..cbd28603fc 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -26,14 +26,12 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -117,7 +115,6 @@ private: * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer) * -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -136,7 +133,7 @@ public: CLFullyConnectedLayer &operator=(CLFullyConnectedLayer &&) = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. + * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. The weights must be 2 dimensional. * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. @@ -152,7 +149,7 @@ public: FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor info. The weights must be 2 dimensional. * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index ad0714ed15..dcaa12645e 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,8 +48,10 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn gemmlowp_output_stage.gemmlowp_multiplier = 0; gemmlowp_output_stage.gemmlowp_shift = 0; + const auto data_type = input.data_type(); + // Configure output stage for quantized case - if(is_data_type_quantized_asymmetric(input.data_type())) + if(is_data_type_quantized_asymmetric(data_type)) { const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); @@ -62,14 +64,18 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn int output_shift = 0; ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + PixelValue type_min{}; + PixelValue type_max{}; + std::tie(type_min, type_max) = get_min_max(data_type); + // Set the GEMMLowp output stage info gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; gemmlowp_output_stage.gemmlowp_shift = output_shift; - gemmlowp_output_stage.gemmlowp_min_bound = 0; - gemmlowp_output_stage.gemmlowp_max_bound = 255; gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier); gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift); + type_min.get(gemmlowp_output_stage.gemmlowp_min_bound); + type_max.get(gemmlowp_output_stage.gemmlowp_max_bound); } return Status{}; @@ -304,7 +310,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn FullyConnectedLayerInfo fc_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp index 091d9411b7..e57dd4e7b1 100644 --- a/tests/validation/CL/FullyConnectedLayer.cpp +++ b/tests/validation/CL/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,7 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F16, DataType::F32, DataType::QASYMM8, + DataType::QASYMM8_SIGNED, }); const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true })); @@ -222,8 +223,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture, // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() /* QASYMM8 */ +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() /* QASYMM8_SIGNED */ +TEST_SUITE_END() /* Quantized */ TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h index ff6ac17744..7f0ceadea1 100644 --- a/tests/validation/fixtures/FullyConnectedLayerFixture.h +++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h @@ -49,7 +49,8 @@ template ::type, uint8_t>::value || std::is_same::type, int8_t>::value, int32_t, T >::type; + using TDecay = typename std::decay::type; + using TBias = typename std::conditional < (std::is_same::value || std::is_same::value), int32_t, T >::type; public: template -- cgit v1.2.1