aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-03-02 17:40:42 +0000
committerManuel Bottini <manuel.bottini@arm.com>2021-03-11 18:24:52 +0000
commit9e73c93bbd49fdd648d8f8cb77df46e7bbc9526d (patch)
tree67f94008f7a8afbd95f00a0093d83af009aca87c
parent6b0bf9972975aff01e42e5790f7b7c98cd835afb (diff)
downloadComputeLibrary-9e73c93bbd49fdd648d8f8cb77df46e7bbc9526d.tar.gz
Port OpenCL Dequantization to new API
Partially resolves: COMPMID-4193 Change-Id: I4e14149d5b0a7f9c0dd3bfce800eaddca1e4d885 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5238 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp3
-rw-r--r--arm_compute/runtime/CL/functions/CLDequantizationLayer.h30
-rw-r--r--arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h8
-rw-r--r--docs/00_introduction.dox4
-rw-r--r--src/core/CL/CLKernels.h1
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.h79
-rw-r--r--src/core/gpu/cl/kernels/ClDequantizationKernel.cpp (renamed from src/core/CL/kernels/CLDequantizationLayerKernel.cpp)86
-rw-r--r--src/core/gpu/cl/kernels/ClDequantizationKernel.h69
-rw-r--r--src/runtime/CL/functions/CLDequantizationLayer.cpp39
-rw-r--r--src/runtime/CL/functions/CLGenerateProposalsLayer.cpp14
-rw-r--r--src/runtime/gpu/cl/operators/ClDequantization.cpp54
-rw-r--r--src/runtime/gpu/cl/operators/ClDequantization.h62
12 files changed, 301 insertions, 148 deletions
diff --git a/Android.bp b/Android.bp
index aefdbf65ab..0042dcef4c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -99,7 +99,6 @@ cc_library_static {
"src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp",
"src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp",
"src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp",
- "src/core/CL/kernels/CLDequantizationLayerKernel.cpp",
"src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp",
"src/core/CL/kernels/CLFFTDigitReverseKernel.cpp",
"src/core/CL/kernels/CLFFTRadixStageKernel.cpp",
@@ -377,6 +376,7 @@ cc_library_static {
"src/core/gpu/cl/kernels/ClCopyKernel.cpp",
"src/core/gpu/cl/kernels/ClCropKernel.cpp",
"src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClDequantizationKernel.cpp",
"src/core/gpu/cl/kernels/ClElementwiseKernel.cpp",
"src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp",
"src/core/gpu/cl/kernels/ClFillKernel.cpp",
@@ -674,6 +674,7 @@ cc_library_static {
"src/runtime/gpu/cl/operators/ClConcatenate.cpp",
"src/runtime/gpu/cl/operators/ClCopy.cpp",
"src/runtime/gpu/cl/operators/ClCrop.cpp",
+ "src/runtime/gpu/cl/operators/ClDequantization.cpp",
"src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp",
"src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp",
"src/runtime/gpu/cl/operators/ClFill.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index b2cf3356f4..7ecf094df3 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYER_H
#define ARM_COMPUTE_CLDEQUANTIZATIONLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
@@ -35,10 +36,22 @@ class CLCompileContext;
class ICLTensor;
class ITensorInfo;
-/** Basic function to run @ref CLDequantizationLayerKernel that dequantizes an input tensor */
-class CLDequantizationLayer : public ICLSimpleFunction
+/** Basic function to run @ref opencl::ClDequantization that dequantizes an input tensor */
+class CLDequantizationLayer : public IFunction
{
public:
+ /** Default Constructor */
+ CLDequantizationLayer();
+ /** Default Destructor */
+ ~CLDequantizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDequantizationLayer(const CLDequantizationLayer &) = delete;
+ /** Default move constructor */
+ CLDequantizationLayer(CLDequantizationLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDequantizationLayer &operator=(const CLDequantizationLayer &) = delete;
+ /** Default move assignment operator */
+ CLDequantizationLayer &operator=(CLDequantizationLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
@@ -62,6 +75,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLDEQUANTIZATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index e6b0eed6d8..bea470712c 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -40,7 +40,7 @@ namespace arm_compute
{
class CLCompileContext;
class CLBoundingBoxTransformKernel;
-class CLDequantizationLayerKernel;
+class CLDequantizationLayer;
class CLComputeAllAnchorsKernel;
class CLPadLayerKernel;
class CLQuantizationLayer;
@@ -55,7 +55,7 @@ class ITensorInfo;
* -# @ref CLReshapeLayer x 2
* -# @ref CLBoundingBoxTransform
* -# @ref CLPadLayerKernel
- * -# @ref CLDequantizationLayerKernel x 2
+ * -# @ref CLDequantizationLayer x 2
* -# @ref CLQuantizationLayer
* And the following CPP functions:
* -# @ref CPPBoxWithNonMaximaSuppressionLimit
@@ -144,8 +144,8 @@ private:
std::unique_ptr<CLComputeAllAnchorsKernel> _compute_anchors_kernel;
std::unique_ptr<CLBoundingBoxTransformKernel> _bounding_box_kernel;
std::unique_ptr<CLPadLayerKernel> _pad_kernel;
- std::unique_ptr<CLDequantizationLayerKernel> _dequantize_anchors;
- std::unique_ptr<CLDequantizationLayerKernel> _dequantize_deltas;
+ std::unique_ptr<CLDequantizationLayer> _dequantize_anchors;
+ std::unique_ptr<CLDequantizationLayer> _dequantize_deltas;
std::unique_ptr<CLQuantizationLayer> _quantize_all_proposals;
// CPP functions
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 5912383e7c..7fd3af7c94 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -760,7 +760,7 @@ v19.11 Public major release
- Added FP16 support for:
- @ref CLGEMMMatrixMultiplyReshapedKernel
- Added new data type QASYMM8_PER_CHANNEL support for:
- - @ref CLDequantizationLayer
+ - CLDequantizationLayer
- @ref NEDequantizationLayer
- Added new data type QSYMM8_PER_CHANNEL support for:
- @ref CLConvolutionLayer
@@ -1306,7 +1306,7 @@ v17.09 Public major release
- New OpenCL kernels / functions:
- @ref CLDepthwiseConvolutionLayer3x3NCHWKernel @ref CLDepthwiseConvolutionLayer3x3NHWCKernel CLDepthwiseIm2ColKernel CLDepthwiseVectorToTensorKernel CLDepthwiseWeightsReshapeKernel / CLDepthwiseConvolutionLayer3x3 @ref CLDepthwiseConvolutionLayer CLDepthwiseSeparableConvolutionLayer
- - @ref CLDequantizationLayerKernel / @ref CLDequantizationLayer
+ - CLDequantizationLayerKernel / CLDequantizationLayer
- @ref CLDirectConvolutionLayerKernel / @ref CLDirectConvolutionLayer
- CLFlattenLayer
- CLFloorKernel / @ref CLFloor
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index b93f270859..14733899b3 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -42,7 +42,6 @@
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.h b/src/core/CL/kernels/CLDequantizationLayerKernel.h
deleted file mode 100644
index 5579b5bc71..0000000000
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dequantization layer kernel. */
-class CLDequantizationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDequantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLDequantizationLayerKernel() = default;
- /** Set the input, output, min and max.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor. Data types supported: F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input, output, min and max.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor. Data types supported: F16/F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[in] output Output tensor info. Data types supported: F16/F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
index 3723c651fe..267ac9b2b4 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDequantizationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClDequantizationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -33,109 +33,107 @@
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
- if(output->tensor_shape().total_size() > 0)
+ if(dst->tensor_shape().total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
}
return Status{};
}
} // namespace
-CLDequantizationLayerKernel::CLDequantizationLayerKernel()
- : _input(nullptr), _output(nullptr)
+ClDequantizationKernel::ClDequantizationKernel()
{
}
-void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+void ClDequantizationKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLDequantizationLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
// Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, DataType::F32);
-
- auto padding_info = get_padding_info({ input, output });
+ auto_init_if_empty(*dst, src->tensor_shape(), 1, DataType::F32);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
+ auto padding_info = get_padding_info({ src, dst });
- _input = input;
- _output = output;
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
- const int vec_size_x = 16 / output->info()->element_size();
- const int output_width_x = output->info()->tensor_shape().x();
+ const int vec_size_x = 16 / dst->element_size();
+ const int output_width_x = dst->tensor_shape().x();
const bool multi_access_x = (output_width_x / vec_size_x > 0);
- const bool is_quantized_per_channel = is_data_type_quantized_per_channel(input->info()->data_type());
+ const bool is_quantized_per_channel = is_data_type_quantized_per_channel(src->data_type());
std::string kernel_name = "dequantization_layer";
// Create kernel
CLBuildOptions build_opts;
if(!is_quantized_per_channel)
{
- const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
- const int qoffset = is_data_type_quantized_asymmetric(input->info()->data_type()) ? qinfo.offset : 0;
+ const UniformQuantizationInfo qinfo = src->quantization_info().uniform();
+ const int qoffset = is_data_type_quantized_asymmetric(src->data_type()) ? qinfo.offset : 0;
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qoffset));
}
else
{
kernel_name += "_per_channel";
- kernel_name += input->info()->data_layout() == DataLayout::NCHW ? "_nchw" : "_nhwc";
+ kernel_name += src->data_layout() == DataLayout::NCHW ? "_nchw" : "_nhwc";
}
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
- build_opts.add_option("-DDATA_TYPE_SRC=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.add_option("-DDATA_TYPE_DST=" + get_cl_type_from_data_type(output->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE_SRC=" + get_cl_type_from_data_type(src->data_type()));
+ build_opts.add_option("-DDATA_TYPE_DST=" + get_cl_type_from_data_type(dst->data_type()));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
// Create kernel name
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
- Window win = calculate_max_window(*output->info());
+ Window win = calculate_max_window(*dst);
if(multi_access_x)
{
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+ win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
}
ICLKernel::configure_internal(win);
// Set output valid region
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+Status ClDequantizationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
return Status{};
}
-void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClDequantizationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
- const bool is_quantized_per_channel = is_data_type_quantized_per_channel(_input->info()->data_type());
+ auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ const bool is_quantized_per_channel = is_data_type_quantized_per_channel(src->info()->data_type());
// Collapse windo
Window new_window = is_quantized_per_channel ? window.collapse_if_possible(ICLKernel::window(), 4) : window.collapse_if_possible(ICLKernel::window(), 3);
@@ -144,16 +142,18 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu
if(is_quantized_per_channel)
{
unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters
- _kernel.setArg(idx++, _input->quantization().scale->cl_buffer());
+ _kernel.setArg(idx++, src->quantization().scale->cl_buffer());
}
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
}
while(new_window.slide_window_slice_3D(slice));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClDequantizationKernel.h b/src/core/gpu/cl/kernels/ClDequantizationKernel.h
new file mode 100644
index 0000000000..3ccf90c204
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClDequantizationKernel.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DEQUANTIZATION_KERNEL_H
+#define ARM_COMPUTE_CL_DEQUANTIZATION_KERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the dequantization layer kernel. */
+class ClDequantizationKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClDequantizationKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDequantizationKernel);
+ /** Initialise the kernel's input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[out] dst Destination tensor info. Data types supported: F16/F32.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClDequantizationKernel
+ *
+ * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] dst Output tensor info. Data types supported: F16/F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_DEQUANTIZATION_KERNEL_H */
diff --git a/src/runtime/CL/functions/CLDequantizationLayer.cpp b/src/runtime/CL/functions/CLDequantizationLayer.cpp
index d358813724..e0381f90ae 100644
--- a/src/runtime/CL/functions/CLDequantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLDequantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,27 @@
*/
#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClDequantization.h"
namespace arm_compute
{
+struct CLDequantizationLayer::Impl
+{
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClDequantization> op{ nullptr };
+};
+
+CLDequantizationLayer::CLDequantizationLayer()
+ : _impl(std::make_unique<Impl>())
+{
+}
+CLDequantizationLayer::~CLDequantizationLayer() = default;
+
void CLDequantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -34,13 +51,23 @@ void CLDequantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
void CLDequantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
- auto k = std::make_unique<CLDequantizationLayerKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
+ _impl->src = input;
+ _impl->dst = output;
+
+ _impl->op = std::make_unique<opencl::ClDequantization>();
+ _impl->op->configure(compile_context, input->info(), output->info());
}
Status CLDequantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
{
- return CLDequantizationLayerKernel::validate(input, output);
+ return opencl::ClDequantization::validate(input, output);
+}
+
+void CLDequantizationLayer::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index fb698d5b88..81e24dba08 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -25,9 +25,9 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
#include "src/core/CL/kernels/CLPadLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
@@ -43,8 +43,8 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
_compute_anchors_kernel(std::make_unique<CLComputeAllAnchorsKernel>()),
_bounding_box_kernel(std::make_unique<CLBoundingBoxTransformKernel>()),
_pad_kernel(std::make_unique<CLPadLayerKernel>()),
- _dequantize_anchors(std::make_unique<CLDequantizationLayerKernel>()),
- _dequantize_deltas(std::make_unique<CLDequantizationLayerKernel>()),
+ _dequantize_anchors(std::make_unique<CLDequantizationLayer>()),
+ _dequantize_deltas(std::make_unique<CLDequantizationLayer>()),
_quantize_all_proposals(std::make_unique<CLQuantizationLayer>()),
_cpp_nms(memory_manager),
_is_nhwc(false),
@@ -261,10 +261,10 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
if(is_qasymm8)
{
TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayerKernel::validate(&all_anchors_info, &all_anchors_f32_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info));
TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayerKernel::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
@@ -363,8 +363,8 @@ void CLGenerateProposalsLayer::run()
if(_is_qasymm8)
{
- CLScheduler::get().enqueue(*_dequantize_anchors, false);
- CLScheduler::get().enqueue(*_dequantize_deltas, false);
+ _dequantize_anchors->run();
+ _dequantize_deltas->run();
}
// Build the boxes
diff --git a/src/runtime/gpu/cl/operators/ClDequantization.cpp b/src/runtime/gpu/cl/operators/ClDequantization.cpp
new file mode 100644
index 0000000000..df3203d2e1
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClDequantization.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClDequantization.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/kernels/ClDequantizationKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+void ClDequantization::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
+{
+ auto k = std::make_unique<kernels::ClDequantizationKernel>();
+ k->configure(compile_context, src, dst);
+ _kernel = std::move(k);
+}
+
+Status ClDequantization::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ return kernels::ClDequantizationKernel::validate(src, dst);
+}
+
+void ClDequantization::run(ITensorPack &tensors)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+ CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClDequantization.h b/src/runtime/gpu/cl/operators/ClDequantization.h
new file mode 100644
index 0000000000..a696b73d2e
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClDequantization.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DEQUANTIZATION_H
+#define ARM_COMPUTE_CL_DEQUANTIZATION_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to run @ref kernels::ClDequantizationKernel that dequantizes an input tensor */
+class ClDequantization : public IClOperator
+{
+public:
+ /** Constructor */
+ ClDequantization() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[out] dst Destination tensor info with the same dimensions of @p src. Data type supported: F16/F32.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
+ *
+ * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] dst Output tensor info. Data type supported: F16/F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+ // Inherited method overridden
+ void run(ITensorPack &tensors) override;
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DEQUANTIZATION_H */