From 00f4dfce66c0e1cb820b443f111b7c01846d4d5b Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 10 Mar 2021 09:55:14 +0000 Subject: Port Arm(R) Neon(TM) Dequantization to new API Partially resolves: COMPMID-4193 Change-Id: Icf9afdd009f7fd03ccfff3da47ca8f8cc87e2d3a Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5245 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park --- Android.bp | 3 +- .../runtime/NEON/functions/NEDequantizationLayer.h | 30 +- .../NEON/functions/NEGenerateProposalsLayer.h | 2 +- docs/00_introduction.dox | 2 +- src/core/NEON/NEKernels.h | 1 - .../NEON/kernels/NEDequantizationLayerKernel.cpp | 410 --------------------- .../NEON/kernels/NEDequantizationLayerKernel.h | 76 ---- src/core/cpu/kernels/CpuDequantizationKernel.cpp | 401 ++++++++++++++++++++ src/core/cpu/kernels/CpuDequantizationKernel.h | 65 ++++ .../NEON/functions/NEDequantizationLayer.cpp | 36 +- src/runtime/cpu/operators/CpuDequantization.cpp | 54 +++ src/runtime/cpu/operators/CpuDequantization.h | 64 ++++ 12 files changed, 643 insertions(+), 501 deletions(-) delete mode 100644 src/core/NEON/kernels/NEDequantizationLayerKernel.cpp delete mode 100644 src/core/NEON/kernels/NEDequantizationLayerKernel.h create mode 100644 src/core/cpu/kernels/CpuDequantizationKernel.cpp create mode 100644 src/core/cpu/kernels/CpuDequantizationKernel.h create mode 100644 src/runtime/cpu/operators/CpuDequantization.cpp create mode 100644 src/runtime/cpu/operators/CpuDequantization.h diff --git a/Android.bp b/Android.bp index abaf824caa..aefdbf65ab 100644 --- a/Android.bp +++ b/Android.bp @@ -192,7 +192,6 @@ cc_library_static { "src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp", "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp", "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp", - "src/core/NEON/kernels/NEDequantizationLayerKernel.cpp", "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp", "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp", "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp", @@ -332,6 +331,7 @@ cc_library_static { "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp", "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp", "src/core/cpu/kernels/CpuCopyKernel.cpp", + "src/core/cpu/kernels/CpuDequantizationKernel.cpp", "src/core/cpu/kernels/CpuElementwiseKernel.cpp", "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp", "src/core/cpu/kernels/CpuFillKernel.cpp", @@ -657,6 +657,7 @@ cc_library_static { "src/runtime/cpu/operators/CpuAdd.cpp", "src/runtime/cpu/operators/CpuConcatenate.cpp", "src/runtime/cpu/operators/CpuCopy.cpp", + "src/runtime/cpu/operators/CpuDequantization.cpp", "src/runtime/cpu/operators/CpuElementwise.cpp", "src/runtime/cpu/operators/CpuElementwiseUnary.cpp", "src/runtime/cpu/operators/CpuFill.cpp", diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index f52d709c74..a18566e6ca 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,10 @@ #ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H #define ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include namespace arm_compute { @@ -34,10 +35,22 @@ namespace arm_compute class ITensor; class ITensorInfo; -/** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */ -class NEDequantizationLayer : public INESimpleFunctionNoBorder +/** Basic function to run @ref cpu::CpuDequantization that dequantizes an input tensor */ +class NEDequantizationLayer : public IFunction { public: + /** Default Constructor */ + NEDequantizationLayer(); + /** Default Destructor */ + ~NEDequantizationLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayer(const NEDequantizationLayer &) = delete; + /** Default move constructor */ + NEDequantizationLayer(NEDequantizationLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDequantizationLayer &operator=(const NEDequantizationLayer &) = delete; + /** Default move assignment operator */ + NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default; /** Configure the kernel. * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. @@ -52,6 +65,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEDEQUANTIZATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index 979b3ba83e..22c6ba2ed6 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -50,7 +50,7 @@ class NEComputeAllAnchorsKernel; * -# @ref NEReshapeLayer x 2 * -# @ref NEBoundingBoxTransform * -# @ref NEPadLayerKernel - * -# @ref NEDequantizationLayerKernel x 2 + * -# @ref NEDequantizationLayer x 2 * -# @ref NEQuantizationLayer * And the following CPP kernels: * -# @ref CPPBoxWithNonMaximaSuppressionLimit diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 68c64be53f..5912383e7c 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -1296,7 +1296,7 @@ v17.09 Public major release - Most machine learning functions support both fixed point 8 and 16 bit (QS8, QS16) for both Neon and OpenCL. - New Neon kernels / functions: - arm_compute::NEGEMMAssemblyBaseKernel arm_compute::NEGEMMAArch64Kernel - - @ref NEDequantizationLayerKernel / @ref NEDequantizationLayer + - NEDequantizationLayerKernel / @ref NEDequantizationLayer - NEFloorKernel / @ref NEFloor - @ref NEL2NormalizeLayerKernel / @ref NEL2NormalizeLayer - NEQuantizationLayerKernel @ref NEMinMaxLayerKernel / @ref NEQuantizationLayer diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h index 224634fdde..6a7e856d8b 100644 --- a/src/core/NEON/NEKernels.h +++ b/src/core/NEON/NEKernels.h @@ -40,7 +40,6 @@ #include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" #include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" #include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" #include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp deleted file mode 100644 index c8d923018d..0000000000 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/CPP/Validate.h" -#include "src/core/NEON/NEAsymm.h" -#include "src/core/NEON/NESymm.h" -#include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); - - if(output->tensor_shape().total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::tuple validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // Configure kernel window - Window win = calculate_max_window(*input, Steps()); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32); - - // NEDequantizationLayerKernel doesn't need padding so update_window_and_padding() can be skipped - - return std::make_tuple(Status{}, win); -} - -template -inline void store_result(T *ptr, const float32x4x4_t &v) -{ - ARM_COMPUTE_UNUSED(ptr, v); -} - -template <> -inline void store_result(float *ptr, const float32x4x4_t &v) -{ - wrapper::vstore(ptr, v.val[0]); - wrapper::vstore(ptr + 4, v.val[1]); - wrapper::vstore(ptr + 8, v.val[2]); - wrapper::vstore(ptr + 12, v.val[3]); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template <> -inline void store_result(float16_t *ptr, const float32x4x4_t &v) -{ - wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1]))); - wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3]))); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -template -inline void store_result(T *ptr, const float32x4x2_t &v) -{ - ARM_COMPUTE_UNUSED(ptr, v); -} - -template <> -inline void store_result(float *ptr, const float32x4x2_t &v) -{ - wrapper::vstore(ptr, v.val[0]); - wrapper::vstore(ptr + 4, v.val[1]); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template <> -inline void store_result(float16_t *ptr, const float32x4x2_t &v) -{ - wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1]))); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -template -void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window) -{ - const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); - const float scale = qinfo.scale; - const int32_t offset = qinfo.offset; - - const int window_step_x = 16; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - // Collapse window and reset first dimension to handle tail calculations manually - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win_collapsed); - Iterator out(output, win_collapsed); - - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, scale, offset); - - store_result(reinterpret_cast(out_ptr + x), vdeq); - } - - // Compute left-over elements - for(; x < window_end_x; ++x) - { - auto val = *(in_ptr + x); - *(out_ptr + x) = static_cast(Qasymm8QuantizationHelper::dequantize(val, qinfo)); - } - }, - in, out); -} - -template -void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window) -{ - const auto scale = input->info()->quantization_info().scale(); - - const int window_step_x = 16; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - // Reset first dimension to handle tail calculations manually - Window win(window); - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win); - Iterator out(output, win); - - execute_window_loop(win, [&](const Coordinates & id) - { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, scale[id.z()]); - - store_result(reinterpret_cast(out_ptr + x), vdeq); - } - - // Compute left-over elements - for(; x < window_end_x; ++x) - { - int8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()])); - } - }, - in, out); -} - -template -void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window) -{ - const auto scale = input->info()->quantization_info().scale(); - - const int window_step_x = 16; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - // Reset first dimension to handle tail calculations manually - Window win(window); - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win); - Iterator out(output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const float32x4x4_t vscale = - { - { - scale[x + 0], scale[x + 1], scale[x + 2], scale[x + 3], - scale[x + 4], scale[x + 5], scale[x + 6], scale[x + 7], - scale[x + 8], scale[x + 9], scale[x + 10], scale[x + 11], - scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15] - } - }; - const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, vscale); - - store_result(reinterpret_cast(out_ptr + x), vdeq); - } - - // Compute left-over elements - for(; x < window_end_x; ++x) - { - int8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale[x])); - } - }, - in, out); -} - -template -void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Window &window) -{ - const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); - const float scale = qinfo.scale; - - const int window_step_x = 16; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - // Collapse window and reset first dimension to handle tail calculations manually - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win_collapsed); - Iterator out(output, win_collapsed); - - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize(vin, scale); - - store_result(reinterpret_cast(out_ptr + x), vdeq); - } - - // Compute left-over elements - for(; x < window_end_x; ++x) - { - int8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize(val, scale)); - } - }, - in, out); -} - -template -void run_dequantization_qsymm16(const ITensor *input, ITensor *output, const Window &window) -{ - const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); - const float scale = qinfo.scale; - - const int window_step_x = 8; - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - // Collapse window and reset first dimension to handle tail calculations manually - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win_collapsed); - Iterator out(output, win_collapsed); - - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto in_ptr = reinterpret_cast(in.ptr()); - const auto out_ptr = reinterpret_cast(out.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(in_ptr + x); - const auto vdeq = vdequantize_int16(vin, scale); - - store_result(reinterpret_cast(out_ptr + x), vdeq); - } - - // Compute left-over elements - for(; x < window_end_x; ++x) - { - int16_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast(dequantize_qsymm16(val, scale)); - } - }, - in, out); -} - -template -void run_dequantization_core(const ITensor *input, ITensor *output, const Window &window) -{ - switch(input->info()->data_type()) - { - case DataType::QASYMM8: - run_dequantization_qasymm8(input, output, window); - break; - case DataType::QASYMM8_SIGNED: - run_dequantization_qasymm8(input, output, window); - break; - case DataType::QSYMM8_PER_CHANNEL: - input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc(input, output, window) : run_dequantization_qsymm8_per_channel_nchw(input, output, window); - break; - case DataType::QSYMM8: - run_dequantization_qsymm8(input, output, window); - break; - case DataType::QSYMM16: - run_dequantization_qsymm16(input, output, window); - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } -} -} // namespace - -NEDequantizationLayerKernel::NEDequantizationLayerKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void NEDequantizationLayerKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - INEKernel::configure(std::get<1>(win_config)); -} - -Status NEDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); - return Status{}; -} - -void NEDequantizationLayerKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - switch(_output->info()->data_type()) - { - case DataType::F32: - run_dequantization_core(_input, _output, window); - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - run_dequantization_core(_input, _output, window); - break; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.h b/src/core/NEON/kernels/NEDequantizationLayerKernel.h deleted file mode 100644 index 9cc71922af..0000000000 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the dequantization layer kernel. */ -class NEDequantizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDequantizationLayerKernel"; - } - /** Default constructor */ - NEDequantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; - /** Default destructor */ - ~NEDequantizationLayerKernel() = default; - /** Set input, output tensors. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[in] output Output tensor info. Data types supported: F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/src/core/cpu/kernels/CpuDequantizationKernel.cpp b/src/core/cpu/kernels/CpuDequantizationKernel.cpp new file mode 100644 index 0000000000..cb77d36809 --- /dev/null +++ b/src/core/cpu/kernels/CpuDequantizationKernel.cpp @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/cpu/kernels/CpuDequantizationKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/CPP/Validate.h" +#include "src/core/NEON/NEAsymm.h" +#include "src/core/NEON/NESymm.h" +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + + if(dst->tensor_shape().total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(dst); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst); + } + + return Status{}; +} + +template +inline void store_result(T *ptr, const float32x4x4_t &v) +{ + ARM_COMPUTE_UNUSED(ptr, v); +} + +template <> +inline void store_result(float *ptr, const float32x4x4_t &v) +{ + wrapper::vstore(ptr, v.val[0]); + wrapper::vstore(ptr + 4, v.val[1]); + wrapper::vstore(ptr + 8, v.val[2]); + wrapper::vstore(ptr + 12, v.val[3]); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template <> +inline void store_result(float16_t *ptr, const float32x4x4_t &v) +{ + wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1]))); + wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3]))); +} +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +template +inline void store_result(T *ptr, const float32x4x2_t &v) +{ + ARM_COMPUTE_UNUSED(ptr, v); +} + +template <> +inline void store_result(float *ptr, const float32x4x2_t &v) +{ + wrapper::vstore(ptr, v.val[0]); + wrapper::vstore(ptr + 4, v.val[1]); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template <> +inline void store_result(float16_t *ptr, const float32x4x2_t &v) +{ + wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1]))); +} +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +template +void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window) +{ + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); + const float scale = qinfo.scale; + const int32_t offset = qinfo.offset; + + const int window_step_x = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Collapse window and reset first dimension to handle tail calculations manually + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Create iterators + Iterator in(input, win_collapsed); + Iterator out(output, win_collapsed); + + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(in_ptr + x); + const auto vdeq = vdequantize(vin, scale, offset); + + store_result(reinterpret_cast(out_ptr + x), vdeq); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + auto val = *(in_ptr + x); + *(out_ptr + x) = static_cast(Qasymm8QuantizationHelper::dequantize(val, qinfo)); + } + }, + in, out); +} + +template +void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window) +{ + const auto scale = input->info()->quantization_info().scale(); + + const int window_step_x = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Reset first dimension to handle tail calculations manually + Window win(window); + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Create iterators + Iterator in(input, win); + Iterator out(output, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(in_ptr + x); + const auto vdeq = vdequantize(vin, scale[id.z()]); + + store_result(reinterpret_cast(out_ptr + x), vdeq); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[id.z()])); + } + }, + in, out); +} + +template +void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window) +{ + const auto scale = input->info()->quantization_info().scale(); + + const int window_step_x = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Reset first dimension to handle tail calculations manually + Window win(window); + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Create iterators + Iterator in(input, win); + Iterator out(output, win); + + execute_window_loop(win, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const float32x4x4_t vscale = + { + { + scale[x + 0], scale[x + 1], scale[x + 2], scale[x + 3], + scale[x + 4], scale[x + 5], scale[x + 6], scale[x + 7], + scale[x + 8], scale[x + 9], scale[x + 10], scale[x + 11], + scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15] + } + }; + const auto vin = wrapper::vloadq(in_ptr + x); + const auto vdeq = vdequantize(vin, vscale); + + store_result(reinterpret_cast(out_ptr + x), vdeq); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale[x])); + } + }, + in, out); +} + +template +void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Window &window) +{ + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); + const float scale = qinfo.scale; + + const int window_step_x = 16; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Collapse window and reset first dimension to handle tail calculations manually + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Create iterators + Iterator in(input, win_collapsed); + Iterator out(output, win_collapsed); + + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(in_ptr + x); + const auto vdeq = vdequantize(vin, scale); + + store_result(reinterpret_cast(out_ptr + x), vdeq); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + int8_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize(val, scale)); + } + }, + in, out); +} + +template +void run_dequantization_qsymm16(const ITensor *input, ITensor *output, const Window &window) +{ + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); + const float scale = qinfo.scale; + + const int window_step_x = 8; + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + // Collapse window and reset first dimension to handle tail calculations manually + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + // Create iterators + Iterator in(input, win_collapsed); + Iterator out(output, win_collapsed); + + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(in_ptr + x); + const auto vdeq = vdequantize_int16(vin, scale); + + store_result(reinterpret_cast(out_ptr + x), vdeq); + } + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + int16_t val = *(in_ptr + x); + *(out_ptr + x) = static_cast(dequantize_qsymm16(val, scale)); + } + }, + in, out); +} + +template +void run_dequantization_core(const ITensor *input, ITensor *output, const Window &window) +{ + switch(input->info()->data_type()) + { + case DataType::QASYMM8: + run_dequantization_qasymm8(input, output, window); + break; + case DataType::QASYMM8_SIGNED: + run_dequantization_qasymm8(input, output, window); + break; + case DataType::QSYMM8_PER_CHANNEL: + input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc(input, output, window) : run_dequantization_qsymm8_per_channel_nchw(input, output, window); + break; + case DataType::QSYMM8: + run_dequantization_qsymm8(input, output, window); + break; + case DataType::QSYMM16: + run_dequantization_qsymm16(input, output, window); + break; + default: + ARM_COMPUTE_ERROR("Unsupported data type."); + } +} +} // namespace + +void CpuDequantizationKernel::configure(const ITensorInfo *src, ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst)); + + // Configure kernel window + Window win = calculate_max_window(*src, Steps()); + + // Output tensor auto initialization if not yet initialized + auto_init_if_empty(*dst, src->tensor_shape(), 1, DataType::F32); + + ICpuKernel::configure(win); +} + +Status CpuDequantizationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst)); + return Status{}; +} + +void CpuDequantizationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); + auto dst = tensors.get_tensor(TensorType::ACL_DST); + + switch(dst->info()->data_type()) + { + case DataType::F32: + run_dequantization_core(src, dst, window); + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + run_dequantization_core(src, dst, window); + break; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + default: + ARM_COMPUTE_ERROR("Unsupported data type."); + } +} +const char *CpuDequantizationKernel::name() const +{ + return "CpuDequantizationKernel"; +} +} // namespace kernels +} // namespace cpu +} // namespace arm_compute diff --git a/src/core/cpu/kernels/CpuDequantizationKernel.h b/src/core/cpu/kernels/CpuDequantizationKernel.h new file mode 100644 index 0000000000..e4525d5c76 --- /dev/null +++ b/src/core/cpu/kernels/CpuDequantizationKernel.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_DEQUANTIZATIONKERNEL_H +#define ARM_COMPUTE_CPU_DEQUANTIZATIONKERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/cpu/ICpuKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +/** Interface for the dequantization layer kernel. */ +class CpuDequantizationKernel : public ICpuKernel +{ +public: + /** Default constructor */ + CpuDequantizationKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDequantizationKernel); + /** Set input, output tensors. + * + * @param[in] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] dst Destination tensor info with the same dimensions of input. Data type supported: F16/F32. + */ + void configure(const ITensorInfo *src, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref CpuDequantizationKernel + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] dst Destination tensor info. Data types supported: F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; +}; +} // namespace kernels +} // namespace cpu +} // namespace arm_compute +#endif /*ARM_COMPUTE_CPU_DEQUANTIZATIONKERNEL_H */ diff --git a/src/runtime/NEON/functions/NEDequantizationLayer.cpp b/src/runtime/NEON/functions/NEDequantizationLayer.cpp index a345840f4f..210fbe0eb2 100644 --- a/src/runtime/NEON/functions/NEDequantizationLayer.cpp +++ b/src/runtime/NEON/functions/NEDequantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,19 +24,43 @@ #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" -#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/Tensor.h" +#include "src/runtime/cpu/operators/CpuDequantization.h" namespace arm_compute { +struct NEDequantizationLayer::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +NEDequantizationLayer::NEDequantizationLayer() + : _impl(std::make_unique()) +{ +} +NEDequantizationLayer::~NEDequantizationLayer() = default; + void NEDequantizationLayer::configure(const ITensor *input, ITensor *output) { - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = std::make_unique(); + _impl->op->configure(input->info(), output->info()); } Status NEDequantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return NEDequantizationLayerKernel::validate(input, output); + return cpu::CpuDequantization::validate(input, output); +} + +void NEDequantizationLayer::run() +{ + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuDequantization.cpp b/src/runtime/cpu/operators/CpuDequantization.cpp new file mode 100644 index 0000000000..0a3f602da1 --- /dev/null +++ b/src/runtime/cpu/operators/CpuDequantization.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/cpu/operators/CpuDequantization.h" + +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/cpu/kernels/CpuDequantizationKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +void CpuDequantization::configure(const ITensorInfo *src, ITensorInfo *dst) +{ + auto k = std::make_unique(); + k->configure(src, dst); + _kernel = std::move(k); +} + +Status CpuDequantization::validate(const ITensorInfo *src, const ITensorInfo *dst) +{ + return kernels::CpuDequantizationKernel::validate(src, dst); +} + +void CpuDequantization::run(ITensorPack &tensors) +{ + ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); + prepare(tensors); + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuDequantization.h b/src/runtime/cpu/operators/CpuDequantization.h new file mode 100644 index 0000000000..22f8114149 --- /dev/null +++ b/src/runtime/cpu/operators/CpuDequantization.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_DEQUANTIZATION_H +#define ARM_COMPUTE_CPU_DEQUANTIZATION_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/experimental/Types.h" +#include "src/core/cpu/ICpuKernel.h" +#include "src/runtime/cpu/ICpuOperator.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +/** Basic function to run @ref kernels::CpuDequantizationKernel that dequantizes an input tensor */ +class CpuDequantization : public ICpuOperator +{ +public: + /** Default Constructor */ + CpuDequantization() = default; + /** Configure the kernel. + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[out] dst Destination tensor info with the same dimensions of input. Data type supported: F16/F32. + */ + void configure(const ITensorInfo *src, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref CpuDequantization + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] dst Destination tensor info. Data type supported: F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; +}; +} // namespace cpu +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPU_DEQUANTIZATION_H */ -- cgit v1.2.1