diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2020-02-07 13:46:45 +0000 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2020-03-02 15:51:39 +0000 |
commit | 1856ff7ebb29e04c3549b74d7ced336111cbf05e (patch) | |
tree | c94654f0d8535930a81712bf7aadffd757c82577 /src | |
parent | 3c4bf0c4eab5ead756c472f17ddf008b882cc905 (diff) | |
download | ComputeLibrary-1856ff7ebb29e04c3549b74d7ced336111cbf05e.tar.gz |
COMPMID-3097 Fuse activation with fully connected layer CL
Change-Id: I447030e69b9e565f2f81529a41af8c5e7ece7ecf
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2702
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
15 files changed, 86 insertions, 77 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp index 5550003f33..b9563553b8 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -113,22 +113,9 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto ARM_COMPUTE_RETURN_ERROR_ON(output_stage.output_data_type != output->data_type()); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mm_result, output); - PixelValue min_val{}; - PixelValue max_val{}; - std::tie(min_val, max_val) = get_min_max(output->data_type()); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get<int32_t>()); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get<int32_t>() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); - } - else - { - // Output will be configured as depending on the chosen output data type in the output stage - PixelValue min_val{}; - PixelValue max_val{}; - std::tie(min_val, max_val) = get_min_max(output_stage.output_data_type); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get<int32_t>()); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get<int32_t>() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); } + ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_stage.gemmlowp_multipliers.size() != output_stage.gemmlowp_shifts.size(), "per channel quantization info is incorrect"); return Status{}; @@ -248,8 +235,8 @@ void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *m PixelValue min_val{}; PixelValue max_val{}; std::tie(min_val, max_val) = get_min_max(output->info()->data_type()); - build_opts.add_option_if((min != min_val.get<int32_t>()) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != max_val.get<int32_t>()) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > min_val.get<int32_t>()), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < max_val.get<int32_t>()), "-DMAX_BOUND=" + support::cpp11::to_string(max)); std::string kernel_name("gemmlowp_offset_contribution"); kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type); diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index 8720123366..1bc7fe3946 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,8 +45,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 32767); - ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -137,8 +136,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const CLBuildOptions build_opts; build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); - build_opts.add_option_if((min != -32768) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 32767) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > -32768), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 32767), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index 8a5ce9fa87..e207fcb1b0 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 127); - ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option_if((min != -128) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 127) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > -128), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 127), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index dc04fed96e..7601d7ee77 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp index ae096f295c..dd1be748f5 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,8 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -132,8 +131,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::configure(const ICLTe CLBuildOptions build_opts; build_opts.add_option("-DREAL_MULTIPLIER=" + float_to_string_with_full_precision(multiplier)); build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(offset)); - build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index 8175f60275..7a22239a7c 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,8 +41,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -135,8 +134,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(result_offset)); build_opts.add_option("-DRESULT_MULT_INT=" + support::cpp11::to_string(result_mult_int)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift)); - build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min)); - build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max)); + build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min)); + build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); // Create kernel diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp index 5d2df6d2c9..31414e3f3f 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp @@ -778,15 +778,8 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(mm_result, 1, DataType::S32); - if(output->data_type() == DataType::QASYMM8) + if(output->data_type() != DataType::QASYMM8) { - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 255); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < 0); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 127); - ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < -128); ARM_COMPUTE_RETURN_ERROR_ON(mm_result->dimension(0) > 1 && output_stage.gemmlowp_multipliers.size() > 1 && b_offset != 0); } ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound); @@ -914,7 +907,7 @@ get_configured_function(const ITensor *mm_result, const ITensor *vector_sum_row, std::tie(type_min, type_max) = get_min_max(output->info()->data_type()); int32_t type_min_int = type_min.get<int32_t>(); int32_t type_max_int = type_max.get<int32_t>(); - const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound == type_min_int && output_stage.gemmlowp_max_bound == type_max_int); + const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound <= type_min_int && output_stage.gemmlowp_max_bound >= type_max_int); // Check if we need to perform fixed point requantization const bool is_fixed_point = output_stage.type != GEMMLowpOutputStageType::QUANTIZE_DOWN; diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index bc513e6618..058007139d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,8 +46,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 32767); - ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -213,7 +212,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == -32768 && max == 32767)); + const bool is_bounded_relu = !(min <= -32768 && max >= 32767); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run<false>; } diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp index d24089d615..b8ca17ec3d 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,8 +46,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 127); - ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -222,7 +221,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == -128 && max == 127)); + const bool is_bounded_relu = !(min <= -128 && max >= 127); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<false>; } diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index bb0b86404e..4a9d2f7481 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,8 +46,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -224,7 +223,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255)); + const bool is_bounded_relu = !(min <= 0 && max >= 255); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run<false>; } diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index a221bd7925..a68e4e7efb 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,8 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON(max > 255); - ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max); + ARM_COMPUTE_RETURN_ERROR_ON(min > max); // Check biases if exist if(bias != nullptr) @@ -324,7 +323,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp INEKernel::configure(win_config.second); // Check if we need to clamp the result using min and max - const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255)); + const bool is_bounded_relu = !(min <= 0 && max >= 255); _func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run<false>; } diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index b7f081dc42..151a8bfa03 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -301,6 +301,10 @@ void NodeFusionMutator::mutate(Graph &g) { return true; }; + auto cl_target_prec = [](INode & n) + { + return n.assigned_target() == Target::CL; + }; auto qs8_prec = [&g](INode & n) { ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr); @@ -318,6 +322,7 @@ void NodeFusionMutator::mutate(Graph &g) detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations); detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations); detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations); + detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations); detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization); detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization); } diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp index 80fce7b8a1..34c432a1ce 100644 --- a/src/graph/nodes/FullyConnectedLayer.cpp +++ b/src/graph/nodes/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,11 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant _outputs.resize(1, NullTensorID); } +void FullyConnectedLayerNode::set_fused_activation(ActivationLayerInfo fused_activation) +{ + _info.activation_info = fused_activation; +} + TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor, unsigned int num_outputs, FullyConnectedLayerInfo fc_info, diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index dcaa12645e..9b7de8df1b 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -41,7 +41,7 @@ using namespace arm_compute::utils::cast; namespace { Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output, - GEMMLowpOutputStageInfo &gemmlowp_output_stage) + GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info) { gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; gemmlowp_output_stage.gemmlowp_offset = 0; @@ -53,13 +53,14 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn // Configure output stage for quantized case if(is_data_type_quantized_asymmetric(data_type)) { - const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output.quantization_info().uniform(); + const QuantizationInfo oq_info = output.quantization_info(); + const UniformQuantizationInfo iq_unif = input.quantization_info().uniform(); + const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform(); + const UniformQuantizationInfo oq_unif = oq_info.uniform(); - const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info; + const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif; - const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; + const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); @@ -68,6 +69,27 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn PixelValue type_max{}; std::tie(type_min, type_max) = get_min_max(data_type); + if(activation_info.enabled()) + { + switch(activation_info.activation()) + { + case ActivationLayerInfo::ActivationFunction::RELU: + type_min = PixelValue(oq_unif.offset); + break; + case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: + type_min = PixelValue(oq_unif.offset); + type_max = PixelValue(activation_info.a(), data_type, oq_info); + break; + case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: + type_min = PixelValue(activation_info.b(), data_type, oq_info); + type_max = PixelValue(activation_info.a(), data_type, oq_info); + break; + default: + ARM_COMPUTE_ERROR("Activation function not supported."); + break; + } + } + // Set the GEMMLowp output stage info gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; @@ -84,7 +106,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info) { GEMMLowpOutputStageInfo gemmlowp_output_stage; - ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage)); + ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info)); const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped false, // is_b_reshaped @@ -144,7 +166,7 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info) { GEMMLowpOutputStageInfo gemmlowp_output_stage; - construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage); + construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info); const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped false, // is_b_reshaped @@ -155,7 +177,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor gemmlowp_output_stage, // gemmlowp_output_stage fc_info.fp_mixed_precision, // fp_mixed_precision true, // broadcast_bias - ActivationLayerInfo()); // activation_info + fc_info.activation_info); // activation_info if(_is_quantized) { @@ -313,6 +335,8 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU + && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; bool is_fc_after_conv = true; diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 682812b1c8..5398050533 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -333,8 +333,12 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0]; gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0]; - int min_activation = 0; - int max_activation = 0; + PixelValue min_val{}; + PixelValue max_val{}; + std::tie(min_val, max_val) = get_min_max(output->info()->data_type()); + + auto min_activation = min_val.get<int32_t>(); + auto max_activation = max_val.get<int32_t>(); const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, |