diff options
Diffstat (limited to 'src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp | 272 |
1 files changed, 48 insertions, 224 deletions
diff --git a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp index 9ae5d5121c..3dd8c5f101 100644 --- a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,249 +23,73 @@ */ #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "support/MemorySupport.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" -namespace arm_compute -{ -void CLGEMMLowpQuantizeDownInt32ToUint8Scale::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min, int max) -{ - GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); - info.gemmlowp_offset = result_offset; - info.gemmlowp_multiplier = result_mult_int; - info.gemmlowp_shift = result_shift; - info.gemmlowp_min_bound = min; - info.gemmlowp_max_bound = max; - - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleKernel>(); - k->configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, &info); - _kernel = std::move(k); -} - -void CLGEMMLowpQuantizeDownInt32ToUint8Scale::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, - int result_mult_int, - int result_shift, int min, int max) -{ - GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); - info.gemmlowp_offset = result_offset; - info.gemmlowp_multiplier = result_mult_int; - info.gemmlowp_shift = result_shift; - info.gemmlowp_min_bound = min; - info.gemmlowp_max_bound = max; - - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleKernel>(); - k->configure(compile_context, input, bias, output, &info); - _kernel = std::move(k); -} - -Status CLGEMMLowpQuantizeDownInt32ToUint8Scale::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max) -{ - GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); - info.gemmlowp_min_bound = min; - info.gemmlowp_max_bound = max; - - return CLGEMMLowpQuantizeDownInt32ScaleKernel::validate(input, bias, output, &info); -} - -void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min, int max) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); -} - -void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min, int max) -{ - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>(); - k->configure(compile_context, input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - _kernel = std::move(k); -} - -Status CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, - int min, int max) -{ - return CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, min, max); -} - -void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min, int max) -{ - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>(); - k->configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - _kernel = std::move(k); -} +#include "src/core/CL/ICLKernel.h" +#include "src/gpu/cl/operators/ClGemmLowpOutputStage.h" -void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min, int max) -{ - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>(); - k->configure(compile_context, input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - _kernel = std::move(k); -} +#include <algorithm> -Status CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, - int min, int max) +namespace arm_compute { - return CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, min, max); -} - -void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - float multiplier, int offset, - int min, int max) +struct CLGEMMLowpOutputStage::Impl { - GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); - info.gemmlowp_offset = offset; - info.gemmlowp_real_multiplier = multiplier; - info.gemmlowp_min_bound = min; - info.gemmlowp_max_bound = max; - - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel>(); - k->configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, &info); - _kernel = std::move(k); -} + const ICLTensor *src{nullptr}; + const ICLTensor *bias{nullptr}; + ICLTensor *dst{nullptr}; + std::unique_ptr<opencl::ClGemmLowpOutputStage> op{nullptr}; + ITensorPack run_pack{}; +}; -void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - float multiplier, int offset, - int min, int max) +CLGEMMLowpOutputStage::CLGEMMLowpOutputStage() : _impl(std::make_unique<Impl>()) { - GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); - info.gemmlowp_offset = offset; - info.gemmlowp_real_multiplier = multiplier; - info.gemmlowp_min_bound = min; - info.gemmlowp_max_bound = max; - - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel>(); - k->configure(compile_context, input, bias, output, &info); - _kernel = std::move(k); } +CLGEMMLowpOutputStage::CLGEMMLowpOutputStage(CLGEMMLowpOutputStage &&) = default; +CLGEMMLowpOutputStage &CLGEMMLowpOutputStage::operator=(CLGEMMLowpOutputStage &&) = default; +CLGEMMLowpOutputStage::~CLGEMMLowpOutputStage() = default; -Status CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, - int min, int max) +void CLGEMMLowpOutputStage::configure(const ICLTensor *input, + const ICLTensor *bias, + ICLTensor *output, + const GEMMLowpOutputStageInfo &info) { - GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo(); - info.gemmlowp_min_bound = min; - info.gemmlowp_max_bound = max; - return CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::validate(input, bias, output, &info); -} - -void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - int result_fixedpoint_multiplier, int result_shift, - int min, int max) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, min, max); + configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, info); } -void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, - int result_fixedpoint_multiplier, int result_shift, - int min, int max) +void CLGEMMLowpOutputStage::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *bias, + ICLTensor *output, + const GEMMLowpOutputStageInfo &info) { - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>(); - k->configure(compile_context, input, bias, output, result_fixedpoint_multiplier, result_shift, min, max); - _kernel = std::move(k); -} + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); -Status CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, - int min, int max) -{ - return CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, min, max); -} + _impl->src = input; + _impl->bias = bias; + _impl->dst = output; -void CLGEMMLowpOutputStage::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, info); + _impl->op = std::make_unique<opencl::ClGemmLowpOutputStage>(); + _impl->op->configure(compile_context, input->info(), bias != nullptr ? bias->info() : nullptr, output->info(), + info); + _impl->run_pack = {{ACL_SRC, _impl->src}, {ACL_BIAS, _impl->bias}, {ACL_DST, _impl->dst}}; } -void CLGEMMLowpOutputStage::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info) +Status CLGEMMLowpOutputStage::validate(const ITensorInfo *input, + const ITensorInfo *bias, + const ITensorInfo *output, + const GEMMLowpOutputStageInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - switch(info.type) - { - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - { - switch(info.output_data_type) - { - case DataType::QASYMM8: - { - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>(); - k->configure(compile_context, input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - _kernel = std::move(k); - break; - } - case DataType::QASYMM8_SIGNED: - { - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>(); - k->configure(compile_context, input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - _kernel = std::move(k); - break; - } - case DataType::QSYMM16: - { - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>(); - k->configure(input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - _kernel = std::move(k); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported output data type."); - } - break; - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN: - { - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleKernel>(); - k->configure(compile_context, input, bias, output, &info); - _kernel = std::move(k); - break; - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT: - { - auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel>(); - k->configure(compile_context, input, bias, output, &info); - _kernel = std::move(k); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported GEMMLowpOutputStage type."); - } + return opencl::ClGemmLowpOutputStage::validate(input, bias, output, info); } -Status CLGEMMLowpOutputStage::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info) +void CLGEMMLowpOutputStage::run() { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16); - - switch(info.type) - { - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - { - switch(output->data_type()) - { - case DataType::QASYMM8: - return CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - case DataType::QASYMM8_SIGNED: - return CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - case DataType::QSYMM16: - return CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, info.gemmlowp_min_bound, info.gemmlowp_max_bound); - default: - return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported output data type."); - } - } - case GEMMLowpOutputStageType::QUANTIZE_DOWN: - return CLGEMMLowpQuantizeDownInt32ScaleKernel::validate(input, bias, output, &info); - case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT: - return CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::validate(input, bias, output, &info); - default: - return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported GEMMLowpOutputStage type."); - } + _impl->op->run(_impl->run_pack); } -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute |