Port OpenCL Dequantization to new API

Partially resolves: COMPMID-4193 Change-Id: I4e14149d5b0a7f9c0dd3bfce800eaddca1e4d885 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5238 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Manuel Bottini <manuel.bottini@arm.com> 2021-03-02 17:40:42 +0000
committer: Manuel Bottini <manuel.bottini@arm.com> 2021-03-11 18:24:52 +0000
commit: 9e73c93bbd49fdd648d8f8cb77df46e7bbc9526d (patch)
tree: 67f94008f7a8afbd95f00a0093d83af009aca87c
parent: 6b0bf9972975aff01e42e5790f7b7c98cd835afb (diff)
download: ComputeLibrary-9e73c93bbd49fdd648d8f8cb77df46e7bbc9526d.tar.gz
12 files changed, 301 insertions, 148 deletions
diff --git a/Android.bp b/Android.bp
index aefdbf65ab..0042dcef4c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -99,7 +99,6 @@ cc_library_static {
         "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp",
         "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp",
         "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp",
-        "src/core/CL/kernels/CLDequantizationLayerKernel.cpp",
         "src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp",
         "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp",
         "src/core/CL/kernels/CLFFTRadixStageKernel.cpp",
@@ -377,6 +376,7 @@ cc_library_static {
         "src/core/gpu/cl/kernels/ClCopyKernel.cpp",
         "src/core/gpu/cl/kernels/ClCropKernel.cpp",
         "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
+        "src/core/gpu/cl/kernels/ClDequantizationKernel.cpp",
         "src/core/gpu/cl/kernels/ClElementwiseKernel.cpp",
         "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp",
         "src/core/gpu/cl/kernels/ClFillKernel.cpp",
@@ -674,6 +674,7 @@ cc_library_static {
         "src/runtime/gpu/cl/operators/ClConcatenate.cpp",
         "src/runtime/gpu/cl/operators/ClCopy.cpp",
         "src/runtime/gpu/cl/operators/ClCrop.cpp",
+        "src/runtime/gpu/cl/operators/ClDequantization.cpp",
         "src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp",
         "src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp",
         "src/runtime/gpu/cl/operators/ClFill.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index b2cf3356f4..7ecf094df3 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,10 @@
 #ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYER_H
 #define ARM_COMPUTE_CLDEQUANTIZATIONLAYER_H
 
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
 
 namespace arm_compute
 {
@@ -35,10 +36,22 @@ class CLCompileContext;
 class ICLTensor;
 class ITensorInfo;
 
-/** Basic function to run @ref CLDequantizationLayerKernel that dequantizes an input tensor */
-class CLDequantizationLayer : public ICLSimpleFunction
+/** Basic function to run @ref opencl::ClDequantization that dequantizes an input tensor */
+class CLDequantizationLayer : public IFunction
 {
 public:
+    /** Default Constructor */
+    CLDequantizationLayer();
+    /** Default Destructor */
+    ~CLDequantizationLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDequantizationLayer(const CLDequantizationLayer &) = delete;
+    /** Default move constructor */
+    CLDequantizationLayer(CLDequantizationLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDequantizationLayer &operator=(const CLDequantizationLayer &) = delete;
+    /** Default move assignment operator */
+    CLDequantizationLayer &operator=(CLDequantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
@@ -62,6 +75,13 @@ public:
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLDEQUANTIZATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index e6b0eed6d8..bea470712c 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -40,7 +40,7 @@ namespace arm_compute
 {
 class CLCompileContext;
 class CLBoundingBoxTransformKernel;
-class CLDequantizationLayerKernel;
+class CLDequantizationLayer;
 class CLComputeAllAnchorsKernel;
 class CLPadLayerKernel;
 class CLQuantizationLayer;
@@ -55,7 +55,7 @@ class ITensorInfo;
  * -# @ref CLReshapeLayer x 2
  * -# @ref CLBoundingBoxTransform
  * -# @ref CLPadLayerKernel
- * -# @ref CLDequantizationLayerKernel x 2
+ * -# @ref CLDequantizationLayer x 2
  * -# @ref CLQuantizationLayer
  * And the following CPP functions:
  * -# @ref CPPBoxWithNonMaximaSuppressionLimit
@@ -144,8 +144,8 @@ private:
     std::unique_ptr<CLComputeAllAnchorsKernel>    _compute_anchors_kernel;
     std::unique_ptr<CLBoundingBoxTransformKernel> _bounding_box_kernel;
     std::unique_ptr<CLPadLayerKernel>             _pad_kernel;
-    std::unique_ptr<CLDequantizationLayerKernel>  _dequantize_anchors;
-    std::unique_ptr<CLDequantizationLayerKernel>  _dequantize_deltas;
+    std::unique_ptr<CLDequantizationLayer>        _dequantize_anchors;
+    std::unique_ptr<CLDequantizationLayer>        _dequantize_deltas;
     std::unique_ptr<CLQuantizationLayer>          _quantize_all_proposals;
 
     // CPP functions
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 5912383e7c..7fd3af7c94 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -760,7 +760,7 @@ v19.11 Public major release
  - Added FP16 support for:
     - @ref CLGEMMMatrixMultiplyReshapedKernel
  - Added new data type QASYMM8_PER_CHANNEL support for:
-    - @ref CLDequantizationLayer
+    - CLDequantizationLayer
     - @ref NEDequantizationLayer
  - Added new data type QSYMM8_PER_CHANNEL support for:
     - @ref CLConvolutionLayer
@@ -1306,7 +1306,7 @@ v17.09 Public major release
 
  - New OpenCL kernels / functions:
     - @ref CLDepthwiseConvolutionLayer3x3NCHWKernel @ref CLDepthwiseConvolutionLayer3x3NHWCKernel CLDepthwiseIm2ColKernel CLDepthwiseVectorToTensorKernel CLDepthwiseWeightsReshapeKernel / CLDepthwiseConvolutionLayer3x3 @ref CLDepthwiseConvolutionLayer CLDepthwiseSeparableConvolutionLayer
-    - @ref CLDequantizationLayerKernel / @ref CLDequantizationLayer
+    - CLDequantizationLayerKernel / CLDequantizationLayer
     - @ref CLDirectConvolutionLayerKernel / @ref CLDirectConvolutionLayer
     - CLFlattenLayer
     - CLFloorKernel / @ref CLFloor
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index b93f270859..14733899b3 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -42,7 +42,6 @@
 #include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
 #include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
 #include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
 #include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
 #include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
 #include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.h b/src/core/CL/kernels/CLDequantizationLayerKernel.h
deleted file mode 100644
index 5579b5bc71..0000000000
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dequantization layer kernel. */
-class CLDequantizationLayerKernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLDequantizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default;
-    /** Default move assignment operator */
-    CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~CLDequantizationLayerKernel() = default;
-    /** Set the input, output, min and max.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
-     * @param[out] output Destination tensor. Data types supported: F16/F32.
-     */
-    void configure(const ICLTensor *input, ICLTensor *output);
-    /** Set the input, output, min and max.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
-     * @param[out] output          Destination tensor. Data types supported: F16/F32.
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
-     * @param[in] output Output tensor info. Data types supported: F16/F32.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
index 3723c651fe..267ac9b2b4 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClDequantizationKernel.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -33,109 +33,107 @@
 #include "src/core/CL/CLValidate.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
 #include "support/StringSupport.h"
 
 namespace arm_compute
 {
+namespace opencl
+{
+namespace kernels
+{
 namespace
 {
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
 
-    if(output->tensor_shape().total_size() > 0)
+    if(dst->tensor_shape().total_size() > 0)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(dst);
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::F16, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
     }
 
     return Status{};
 }
 } // namespace
 
-CLDequantizationLayerKernel::CLDequantizationLayerKernel()
-    : _input(nullptr), _output(nullptr)
+ClDequantizationKernel::ClDequantizationKernel()
 {
 }
 
-void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+void ClDequantizationKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
 {
-    configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLDequantizationLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
 
     // Output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, DataType::F32);
-
-    auto padding_info = get_padding_info({ input, output });
+    auto_init_if_empty(*dst, src->tensor_shape(), 1, DataType::F32);
 
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
+    auto padding_info = get_padding_info({ src, dst });
 
-    _input  = input;
-    _output = output;
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
 
-    const int  vec_size_x     = 16 / output->info()->element_size();
-    const int  output_width_x = output->info()->tensor_shape().x();
+    const int  vec_size_x     = 16 / dst->element_size();
+    const int  output_width_x = dst->tensor_shape().x();
     const bool multi_access_x = (output_width_x / vec_size_x > 0);
 
-    const bool  is_quantized_per_channel = is_data_type_quantized_per_channel(input->info()->data_type());
+    const bool  is_quantized_per_channel = is_data_type_quantized_per_channel(src->data_type());
     std::string kernel_name              = "dequantization_layer";
 
     // Create kernel
     CLBuildOptions build_opts;
     if(!is_quantized_per_channel)
     {
-        const UniformQuantizationInfo qinfo   = input->info()->quantization_info().uniform();
-        const int                     qoffset = is_data_type_quantized_asymmetric(input->info()->data_type()) ? qinfo.offset : 0;
+        const UniformQuantizationInfo qinfo   = src->quantization_info().uniform();
+        const int                     qoffset = is_data_type_quantized_asymmetric(src->data_type()) ? qinfo.offset : 0;
         build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
         build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qoffset));
     }
     else
     {
         kernel_name += "_per_channel";
-        kernel_name += input->info()->data_layout() == DataLayout::NCHW ? "_nchw" : "_nhwc";
+        kernel_name += src->data_layout() == DataLayout::NCHW ? "_nchw" : "_nhwc";
     }
 
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
-    build_opts.add_option("-DDATA_TYPE_SRC=" + get_cl_type_from_data_type(input->info()->data_type()));
-    build_opts.add_option("-DDATA_TYPE_DST=" + get_cl_type_from_data_type(output->info()->data_type()));
+    build_opts.add_option("-DDATA_TYPE_SRC=" + get_cl_type_from_data_type(src->data_type()));
+    build_opts.add_option("-DDATA_TYPE_DST=" + get_cl_type_from_data_type(dst->data_type()));
     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
 
     // Create kernel name
     _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
 
     // Configure kernel window
-    Window win = calculate_max_window(*output->info());
+    Window win = calculate_max_window(*dst);
     if(multi_access_x)
     {
-        win.set(Window::DimX,
-                Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+        win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
     }
     ICLKernel::configure_internal(win);
 
     // Set output valid region
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
 
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
-Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+Status ClDequantizationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
     return Status{};
 }
 
-void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClDequantizationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const bool is_quantized_per_channel = is_data_type_quantized_per_channel(_input->info()->data_type());
+    auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+    const bool is_quantized_per_channel = is_data_type_quantized_per_channel(src->info()->data_type());
 
     // Collapse windo
     Window new_window = is_quantized_per_channel ? window.collapse_if_possible(ICLKernel::window(), 4) : window.collapse_if_possible(ICLKernel::window(), 3);
@@ -144,16 +142,18 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu
     if(is_quantized_per_channel)
     {
         unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters
-        _kernel.setArg(idx++, _input->quantization().scale->cl_buffer());
+        _kernel.setArg(idx++, src->quantization().scale->cl_buffer());
     }
 
     do
     {
         unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, slice);
-        add_3D_tensor_argument(idx, _output, slice);
+        add_3D_tensor_argument(idx, src, slice);
+        add_3D_tensor_argument(idx, dst, slice);
         enqueue(queue, *this, slice, lws_hint());
     }
     while(new_window.slide_window_slice_3D(slice));
 }
+} // namespace kernels
+} // namespace opencl
 } // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClDequantizationKernel.h b/src/core/gpu/cl/kernels/ClDequantizationKernel.h
new file mode 100644
index 0000000000..3ccf90c204
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClDequantizationKernel.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DEQUANTIZATION_KERNEL_H
+#define ARM_COMPUTE_CL_DEQUANTIZATION_KERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the dequantization layer kernel. */
+class ClDequantizationKernel : public IClKernel
+{
+public:
+    /** Default constructor */
+    ClDequantizationKernel();
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDequantizationKernel);
+    /** Initialise the kernel's input and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[out] dst             Destination tensor info. Data types supported: F16/F32.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+    /** Static function to check if given info will lead to a valid configuration of @ref ClDequantizationKernel
+     *
+     * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in] dst Output tensor info. Data types supported: F16/F32.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+    // Inherited methods overridden:
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_DEQUANTIZATION_KERNEL_H */
diff --git a/src/runtime/CL/functions/CLDequantizationLayer.cpp b/src/runtime/CL/functions/CLDequantizationLayer.cpp
index d358813724..e0381f90ae 100644
--- a/src/runtime/CL/functions/CLDequantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLDequantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,10 +23,27 @@
  */
 #include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
 
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClDequantization.h"
 
 namespace arm_compute
 {
+struct CLDequantizationLayer::Impl
+{
+    const ICLTensor                          *src{ nullptr };
+    ICLTensor                                *dst{ nullptr };
+    std::unique_ptr<opencl::ClDequantization> op{ nullptr };
+};
+
+CLDequantizationLayer::CLDequantizationLayer()
+    : _impl(std::make_unique<Impl>())
+{
+}
+CLDequantizationLayer::~CLDequantizationLayer() = default;
+
 void CLDequantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -34,13 +51,23 @@ void CLDequantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
 
 void CLDequantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
 {
-    auto k = std::make_unique<CLDequantizationLayerKernel>();
-    k->configure(compile_context, input, output);
-    _kernel = std::move(k);
+    _impl->src = input;
+    _impl->dst = output;
+
+    _impl->op = std::make_unique<opencl::ClDequantization>();
+    _impl->op->configure(compile_context, input->info(), output->info());
 }
 
 Status CLDequantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
-    return CLDequantizationLayerKernel::validate(input, output);
+    return opencl::ClDequantization::validate(input, output);
+}
+
+void CLDequantizationLayer::run()
+{
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index fb698d5b88..81e24dba08 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -25,9 +25,9 @@
 
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
 #include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
 #include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
 #include "src/core/CL/kernels/CLPadLayerKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
@@ -43,8 +43,8 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
       _compute_anchors_kernel(std::make_unique<CLComputeAllAnchorsKernel>()),
       _bounding_box_kernel(std::make_unique<CLBoundingBoxTransformKernel>()),
       _pad_kernel(std::make_unique<CLPadLayerKernel>()),
-      _dequantize_anchors(std::make_unique<CLDequantizationLayerKernel>()),
-      _dequantize_deltas(std::make_unique<CLDequantizationLayerKernel>()),
+      _dequantize_anchors(std::make_unique<CLDequantizationLayer>()),
+      _dequantize_deltas(std::make_unique<CLDequantizationLayer>()),
       _quantize_all_proposals(std::make_unique<CLQuantizationLayer>()),
       _cpp_nms(memory_manager),
       _is_nhwc(false),
@@ -261,10 +261,10 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
     if(is_qasymm8)
     {
         TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
-        ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayerKernel::validate(&all_anchors_info, &all_anchors_f32_info));
+        ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info));
 
         TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
-        ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayerKernel::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
+        ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
 
         TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
         ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
@@ -363,8 +363,8 @@ void CLGenerateProposalsLayer::run()
 
     if(_is_qasymm8)
     {
-        CLScheduler::get().enqueue(*_dequantize_anchors, false);
-        CLScheduler::get().enqueue(*_dequantize_deltas, false);
+        _dequantize_anchors->run();
+        _dequantize_deltas->run();
     }
 
     // Build the boxes
diff --git a/src/runtime/gpu/cl/operators/ClDequantization.cpp b/src/runtime/gpu/cl/operators/ClDequantization.cpp
new file mode 100644
index 0000000000..df3203d2e1
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClDequantization.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClDequantization.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/kernels/ClDequantizationKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+void ClDequantization::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
+{
+    auto k = std::make_unique<kernels::ClDequantizationKernel>();
+    k->configure(compile_context, src, dst);
+    _kernel = std::move(k);
+}
+
+Status ClDequantization::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+    return kernels::ClDequantizationKernel::validate(src, dst);
+}
+
+void ClDequantization::run(ITensorPack &tensors)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+    CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClDequantization.h b/src/runtime/gpu/cl/operators/ClDequantization.h
new file mode 100644
index 0000000000..a696b73d2e
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClDequantization.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DEQUANTIZATION_H
+#define ARM_COMPUTE_CL_DEQUANTIZATION_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to run @ref kernels::ClDequantizationKernel that dequantizes an input tensor */
+class ClDequantization : public IClOperator
+{
+public:
+    /** Constructor */
+    ClDequantization() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[out] dst             Destination tensor info with the same dimensions of @p src. Data type supported: F16/F32.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
+     *
+     * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in] dst Output tensor info. Data type supported: F16/F32.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+    // Inherited method overridden
+    void run(ITensorPack &tensors) override;
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DEQUANTIZATION_H */
author	Manuel Bottini <manuel.bottini@arm.com>	2021-03-02 17:40:42 +0000
committer	Manuel Bottini <manuel.bottini@arm.com>	2021-03-11 18:24:52 +0000
commit	9e73c93bbd49fdd648d8f8cb77df46e7bbc9526d (patch)
tree	67f94008f7a8afbd95f00a0093d83af009aca87c
parent	6b0bf9972975aff01e42e5790f7b7c98cd835afb (diff)
download	ComputeLibrary-9e73c93bbd49fdd648d8f8cb77df46e7bbc9526d.tar.gz