From ae58bdf3b58739e105a24e3640d0245e81cea5ee Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Thu, 17 Jun 2021 17:18:45 +0100 Subject: Port NEGEMMLowp Part 1 Details: Port NEGEMMLowpQuantizeDownInt32ScaleKernel to CpuGemmLowpQuantizeDownInt32ScaleKernel Port NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel to CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel Port NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel to CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel Port NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel to CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel Port NEGEMMLowpOutputStage functions to CpuGemmLowpOutputStage operators Partially Resolves: COMPMID-4403 Change-Id: I6d5f45e43f35d731d564ed3b5c0e804d2a318fb1 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5833 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../NEON/functions/NEGEMMLowpOutputStage.cpp | 165 ++++----------------- .../NEON/functions/NELSTMLayerQuantized.cpp | 15 +- .../cpu/operators/CpuGemmLowpOutputStage.cpp | 147 ++++++++++++++++++ src/runtime/cpu/operators/CpuGemmLowpOutputStage.h | 90 +++++++++++ 4 files changed, 277 insertions(+), 140 deletions(-) create mode 100644 src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp create mode 100644 src/runtime/cpu/operators/CpuGemmLowpOutputStage.h (limited to 'src/runtime') diff --git a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp index 807785a534..8351cc66d0 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,57 +25,23 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" -#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" +#include "src/runtime/cpu/operators/CpuGemmLowpOutputStage.h" namespace arm_compute { -NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default; - -void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, - int result_offset_after_shift, int min, int max) -{ - auto k = std::make_unique(); - k->configure(input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - _kernel = std::move(k); -} - -Status NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) -{ - return NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, min, max); -} - -NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default; - -void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, - int result_offset_after_shift, int min, int max) -{ - auto k = std::make_unique(); - k->configure(input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - _kernel = std::move(k); -} - -Status NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) +struct NEGEMMLowpOutputStage::Impl { - return NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, min, max); -} - -NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default; - -void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min, int max) -{ - auto k = std::make_unique(); - k->configure(input, bias, output, result_fixedpoint_multiplier, result_shift, min, max); - _kernel = std::move(k); -} - -Status NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) + const ITensor *src{ nullptr }; + const ITensor *bias{ nullptr }; + ITensor *dst{ nullptr }; + ITensorPack run_pack{}; + std::unique_ptr op{ nullptr }; +}; + +NEGEMMLowpOutputStage::NEGEMMLowpOutputStage() + : _impl(std::make_unique()) { - return NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, min, max); } - NEGEMMLowpOutputStage::~NEGEMMLowpOutputStage() = default; void NEGEMMLowpOutputStage::configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info) @@ -83,104 +49,27 @@ void NEGEMMLowpOutputStage::configure(const ITensor *input, const ITensor *bias, // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpOutputStage::validate(input->info(), bias != nullptr ? bias->info() : nullptr, output->info(), info)); + _impl->src = input; + _impl->bias = bias; + _impl->dst = output; + _impl->op = std::make_unique(); + _impl->op->configure(input->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), info); - switch(info.type) + _impl->run_pack = { - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - { - switch(info.output_data_type) - { - case DataType::QASYMM8: - { - auto k = std::make_unique(); - k->configure(input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - _kernel = std::move(k); - break; - } - case DataType::QASYMM8_SIGNED: - { - auto k = std::make_unique(); - k->configure(input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - _kernel = std::move(k); - break; - } - case DataType::QSYMM16: - { - auto k = std::make_unique(); - k->configure(input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - _kernel = std::move(k); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported output data type."); - break; - } - } - break; - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN: - { - switch(info.output_data_type) - { - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - { - auto k = std::make_unique(); - k->configure(input, bias, output, &info); - _kernel = std::move(k); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported output data type."); - break; - } - } - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported GEMMLowpOutputStage type."); - } + { TensorType::ACL_SRC, _impl->src }, + { TensorType::ACL_BIAS, _impl->bias }, + { TensorType::ACL_DST, _impl->dst } + }; } Status NEGEMMLowpOutputStage::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::UNKNOWN, "NEGEMMLowpQuantizeDownScaleByFixedPoint cannot be used with UNKNOWN output data type."); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16); - - ARM_COMPUTE_RETURN_ERROR_ON((info.type != GEMMLowpOutputStageType::QUANTIZE_DOWN) && (info.type != GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)); + return cpu::CpuGemmLowpOutputStage::validate(input, bias, output, info); +} - switch(info.type) - { - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - { - switch(output->data_type()) - { - case DataType::QASYMM8: - return NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - case DataType::QASYMM8_SIGNED: - return NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - case DataType::QSYMM16: - return NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - default: - return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported output data type."); - } - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN: - { - switch(output->data_type()) - { - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - return NEGEMMLowpQuantizeDownInt32ScaleKernel::validate(input, bias, output, &info); - default: - return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported output data type."); - } - } - default: - return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported GEMMLowpOutputStage type."); - } +void NEGEMMLowpOutputStage::run() +{ + _impl->op->run(_impl->run_pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index 5c0f19a15c..a56e16864a 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -152,7 +152,13 @@ void NELSTMLayerQuantized::configure(const ITensor *input, quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _memory_group.manage(&_output_lowp); - _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift); + + GEMMLowpOutputStageInfo info; + info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + info.gemmlowp_multiplier = output_multiplier; + info.gemmlowp_shift = output_shift; + info.output_data_type = DataType::QSYMM16; + _output_stage.configure(&_output_highp, &_bias, &_output_lowp, info); _output_highp.allocator()->allocate(); _bias.allocator()->allocate(); @@ -358,7 +364,12 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); // _output_stage - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&output_highp, &bias_concatenated, &output_lowp)); + GEMMLowpOutputStageInfo info; + info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + info.gemmlowp_multiplier = output_multiplier; + info.gemmlowp_shift = output_shift; + info.output_data_type = DataType::QSYMM16; + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&output_highp, &bias_concatenated, &output_lowp, info)); TensorInfo input_gate_input; TensorInfo forget_gate_input; diff --git a/src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp b/src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp new file mode 100644 index 0000000000..e17f854a21 --- /dev/null +++ b/src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/cpu/operators/CpuGemmLowpOutputStage.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h" +#include "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" +#include "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" +#include "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +void CpuGemmLowpOutputStage::configure(ITensorInfo *src, ITensorInfo *bias, ITensorInfo *dst, const GEMMLowpOutputStageInfo &info) +{ + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(CpuGemmLowpOutputStage::validate(src, bias, dst, info)); + + switch(info.type) + { + case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: + { + switch(info.output_data_type) + { + case DataType::QASYMM8: + { + auto k = std::make_unique(); + k->configure(src, bias, dst, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound); + _kernel = std::move(k); + break; + } + case DataType::QASYMM8_SIGNED: + { + auto k = std::make_unique(); + k->configure(src, bias, dst, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound); + _kernel = std::move(k); + break; + } + case DataType::QSYMM16: + { + auto k = std::make_unique(); + k->configure(src, bias, dst, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_min_bound, info.gemmlowp_max_bound); + _kernel = std::move(k); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported output data type."); + break; + } + } + break; + } + case GEMMLowpOutputStageType::QUANTIZE_DOWN: + { + switch(info.output_data_type) + { + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + { + auto k = std::make_unique(); + k->configure(src, bias, dst, &info); + _kernel = std::move(k); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported output data type."); + break; + } + } + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported GEMMLowpOutputStage type."); + } +} + +Status CpuGemmLowpOutputStage::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, const GEMMLowpOutputStageInfo &info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() == DataType::UNKNOWN, "CpuGemmLowpOutputStage cannot be used with UNKNOWN output data type."); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON((info.type != GEMMLowpOutputStageType::QUANTIZE_DOWN) && (info.type != GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)); + + switch(info.type) + { + case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: + { + switch(dst->data_type()) + { + case DataType::QASYMM8: + return kernels::CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(src, bias, dst, info.gemmlowp_min_bound, info.gemmlowp_max_bound); + case DataType::QASYMM8_SIGNED: + return kernels::CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(src, bias, dst, info.gemmlowp_min_bound, info.gemmlowp_max_bound); + case DataType::QSYMM16: + return kernels::CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(src, bias, dst, info.gemmlowp_min_bound, info.gemmlowp_max_bound); + default: + return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported output data type."); + } + } + case GEMMLowpOutputStageType::QUANTIZE_DOWN: + { + switch(dst->data_type()) + { + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + return kernels::CpuGemmLowpQuantizeDownInt32ScaleKernel::validate(src, bias, dst, &info); + default: + return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported output data type."); + } + } + default: + return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported GEMMLowpOutputStage type."); + } +} + +void CpuGemmLowpOutputStage::run(ITensorPack &tensors) +{ + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); +} +} // namespace cpu +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/cpu/operators/CpuGemmLowpOutputStage.h b/src/runtime/cpu/operators/CpuGemmLowpOutputStage.h new file mode 100644 index 0000000000..848aaea3e7 --- /dev/null +++ b/src/runtime/cpu/operators/CpuGemmLowpOutputStage.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_GEMMLOWP_OUTPUTSTAGE_H +#define ARM_COMPUTE_CPU_GEMMLOWP_OUTPUTSTAGE_H + +#include "arm_compute/core/Types.h" +#include "src/runtime/cpu/ICpuOperator.h" + +/** This file contains all available output stages for GEMMLowp. + * + * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore), + * and processes it to obtain the final ASYMM8 value. + * + * More information about the GEMMLowp output stage can be found at https://github.com/google/gemmlowp/blob/master/doc/output.md + */ + +namespace arm_compute +{ +namespace cpu +{ +/** Basic function to execute GEMMLowpQuantizeDown kernels. + * + * This function calls the following kernels: + * + * -# @ref kernels::CpuGemmLowpQuantizeDownInt32ScaleKernel + * -# @ref kernels::CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel + * -# @ref kernels::CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel + * -# @ref kernels::CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel +*/ +class CpuGemmLowpOutputStage : public ICpuOperator +{ +public: + /** Constructor */ + CpuGemmLowpOutputStage() = default; + /** Default destructor */ + ~CpuGemmLowpOutputStage() = default; + /** Initialise the kernel's inputs, output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:-------------|:-------------| + * |S32 |S32 |QASYMM8 | + * |S32 |S32 |QASYMM8_SIGNED| + * |S32 |S32 |QSYMM16 | + * + * @param[in] src Input tensor info. Data type supported: S32 + * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required. + * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] dst Output tensor info. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16 + * @param[in] info GEMMLowp output stage metadata. + */ + void configure(ITensorInfo *src, ITensorInfo *bias, ITensorInfo *dst, const GEMMLowpOutputStageInfo &info); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuGemmLowpOutputStage::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, const GEMMLowpOutputStageInfo &info); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; +}; +} // namespace cpu +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPU_GEMMLOWP_OUTPUTSTAGE_H */ -- cgit v1.2.1