aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-03-01 17:39:36 +0000
committerManuel Bottini <manuel.bottini@arm.com>2021-03-10 15:45:16 +0000
commit5a1bf629752720a7ba0c88f34249393f7e52ad3c (patch)
tree558391e00c108062b77983eede729863fdceccf1
parent0ded4c40578bc78003756d171f2bbe15f6ac72bc (diff)
downloadComputeLibrary-5a1bf629752720a7ba0c88f34249393f7e52ad3c.tar.gz
Port OpenCL Quantization to new API
Partially resolves: COMPMID-4193 Change-Id: Ie8367769c690442a0e30383c67851b50ab7c6742 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5231 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp3
-rw-r--r--arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLQuantizationLayer.h33
-rw-r--r--docs/00_introduction.dox2
-rw-r--r--src/core/CL/CLKernels.h1
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.h86
-rw-r--r--src/core/gpu/cl/kernels/ClQuantizationKernel.cpp (renamed from src/core/CL/kernels/CLQuantizationLayerKernel.cpp)78
-rw-r--r--src/core/gpu/cl/kernels/ClQuantizationKernel.h73
-rw-r--r--src/runtime/CL/functions/CLGenerateProposalsLayer.cpp8
-rw-r--r--src/runtime/CL/functions/CLQuantizationLayer.cpp38
-rw-r--r--src/runtime/gpu/cl/operators/ClQuantization.cpp53
-rw-r--r--src/runtime/gpu/cl/operators/ClQuantization.h67
12 files changed, 302 insertions, 146 deletions
diff --git a/Android.bp b/Android.bp
index a2df76e832..abaf824caa 100644
--- a/Android.bp
+++ b/Android.bp
@@ -136,7 +136,6 @@ cc_library_static {
"src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp",
"src/core/CL/kernels/CLPriorBoxLayerKernel.cpp",
"src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp",
- "src/core/CL/kernels/CLQuantizationLayerKernel.cpp",
"src/core/CL/kernels/CLROIAlignLayerKernel.cpp",
"src/core/CL/kernels/CLROIPoolingLayerKernel.cpp",
"src/core/CL/kernels/CLRangeKernel.cpp",
@@ -385,6 +384,7 @@ cc_library_static {
"src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
"src/core/gpu/cl/kernels/ClPermuteKernel.cpp",
"src/core/gpu/cl/kernels/ClPoolingKernel.cpp",
+ "src/core/gpu/cl/kernels/ClQuantizationKernel.cpp",
"src/core/gpu/cl/kernels/ClReshapeKernel.cpp",
"src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp",
"src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
@@ -681,6 +681,7 @@ cc_library_static {
"src/runtime/gpu/cl/operators/ClPRelu.cpp",
"src/runtime/gpu/cl/operators/ClPermute.cpp",
"src/runtime/gpu/cl/operators/ClPooling.cpp",
+ "src/runtime/gpu/cl/operators/ClQuantization.cpp",
"src/runtime/gpu/cl/operators/ClReshape.cpp",
"src/runtime/gpu/cl/operators/ClSoftmax.cpp",
"src/runtime/gpu/cl/operators/ClSub.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index 4d6bc66487..e6b0eed6d8 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -43,7 +43,7 @@ class CLBoundingBoxTransformKernel;
class CLDequantizationLayerKernel;
class CLComputeAllAnchorsKernel;
class CLPadLayerKernel;
-class CLQuantizationLayerKernel;
+class CLQuantizationLayer;
class ICLTensor;
class ITensorInfo;
@@ -56,7 +56,7 @@ class ITensorInfo;
* -# @ref CLBoundingBoxTransform
* -# @ref CLPadLayerKernel
* -# @ref CLDequantizationLayerKernel x 2
- * -# @ref CLQuantizationLayerKernel
+ * -# @ref CLQuantizationLayer
* And the following CPP functions:
* -# @ref CPPBoxWithNonMaximaSuppressionLimit
*/
@@ -146,7 +146,7 @@ private:
std::unique_ptr<CLPadLayerKernel> _pad_kernel;
std::unique_ptr<CLDequantizationLayerKernel> _dequantize_anchors;
std::unique_ptr<CLDequantizationLayerKernel> _dequantize_deltas;
- std::unique_ptr<CLQuantizationLayerKernel> _quantize_all_proposals;
+ std::unique_ptr<CLQuantizationLayer> _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index a0a27c5cb4..c5dad53513 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,10 @@
#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYER_H
#define ARM_COMPUTE_CLQUANTIZATIONLAYER_H
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
@@ -35,14 +37,26 @@ class ITensorInfo;
/** Basic function to simulate a quantization layer. This function calls the following CL kernels:
*
- * @note The implementation supports only 3D input tensors.
+ * -# @ref opencl::ClQuantization
*
- * -# @ref CLQuantizationLayerKernel
+ * @note The implementation supports only 3D input tensors.
*
*/
-class CLQuantizationLayer : public ICLSimpleFunction
+class CLQuantizationLayer : public IFunction
{
public:
+ /** Default Constructor */
+ CLQuantizationLayer();
+ /** Default Destructor */
+ ~CLQuantizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQuantizationLayer(const CLQuantizationLayer &) = delete;
+ /** Default move constructor */
+ CLQuantizationLayer(CLQuantizationLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQuantizationLayer &operator=(const CLQuantizationLayer &) = delete;
+ /** Default move assignment operator */
+ CLQuantizationLayer &operator=(CLQuantizationLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
@@ -68,6 +82,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} //namespace arm_compute
#endif /* ARM_COMPUTE_CLQUANTIZATIONLAYER_H */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 1fdc6882ad..4b37b5b768 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -1312,7 +1312,7 @@ v17.09 Public major release
- CLGEMMTranspose1xW
- CLGEMMMatrixVectorMultiplyKernel
- @ref CLL2NormalizeLayerKernel / @ref CLL2NormalizeLayer
- - @ref CLQuantizationLayerKernel @ref CLMinMaxLayerKernel / @ref CLQuantizationLayer
+ - CLQuantizationLayerKernel @ref CLMinMaxLayerKernel / @ref CLQuantizationLayer
- @ref CLROIPoolingLayerKernel / @ref CLROIPoolingLayer
- @ref CLReductionOperationKernel / @ref CLReductionOperation
- CLReshapeLayerKernel / @ref CLReshapeLayer
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index 45e27f2b1b..b93f270859 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -79,7 +79,6 @@
#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "src/core/CL/kernels/CLRangeKernel.h"
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.h b/src/core/CL/kernels/CLQuantizationLayerKernel.h
deleted file mode 100644
index e9d03decb3..0000000000
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors.
- */
-class CLQuantizationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLQuantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLQuantizationLayerKernel() = default;
- /** Set the input, output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
index 76e703f0dd..ea56289157 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,11 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClQuantizationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
@@ -33,58 +34,54 @@
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F32, DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F32, DataType::F16);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
// Output must always be initialized
- ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
return Status{};
}
} // namespace
-CLQuantizationLayerKernel::CLQuantizationLayerKernel()
- : _input(nullptr), _output(nullptr)
+ClQuantizationKernel::ClQuantizationKernel()
{
}
-void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+void ClQuantizationKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-void CLQuantizationLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ auto padding_info = get_padding_info({ src, dst });
- auto padding_info = get_padding_info({ input, output });
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- const int vec_size_x = 16 / input->info()->element_size();
- const int input_width_x = input->info()->tensor_shape().x();
+ const int vec_size_x = 16 / src->element_size();
+ const int input_width_x = src->tensor_shape().x();
const bool multi_access_x = (input_width_x / vec_size_x > 0);
- const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
- const DataType output_data_type = output->info()->data_type();
+ const UniformQuantizationInfo qinfo = dst->quantization_info().uniform();
+ const DataType output_data_type = dst->data_type();
float scale_to_apply = qinfo.scale;
int32_t offset_to_apply = qinfo.offset;
- if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
+ if(is_data_type_quantized_asymmetric(src->data_type()))
{
/*
* In case of requantization of a quantized input tensor to an output tensor with another quantization
@@ -116,7 +113,7 @@ void CLQuantizationLayerKernel::configure(const CLCompileContext &compile_contex
* z_n = - z_i * s_i / s_o + z_o
*
*/
- const UniformQuantizationInfo qinfo_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qinfo_in = src->quantization_info().uniform();
scale_to_apply /= qinfo_in.scale;
// In order to minimize flooring we convert the offset to a float,
// then compute the new offset in the float domain,
@@ -126,11 +123,11 @@ void CLQuantizationLayerKernel::configure(const CLCompileContext &compile_contex
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option_if(is_data_type_float(_input->info()->data_type()), "-DIS_FLOAT");
+ build_opts.add_option_if(is_data_type_float(src->data_type()), "-DIS_FLOAT");
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_to_apply));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_to_apply));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
- build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(src->data_type()));
build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output_data_type));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
std::pair<int, int> min_max_quant_values = quantization::get_min_max_values_from_quantized_data_type(output_data_type);
@@ -140,39 +137,44 @@ void CLQuantizationLayerKernel::configure(const CLCompileContext &compile_contex
_kernel = create_kernel(compile_context, "quantization_layer", build_opts.options());
// Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
+ Window win = calculate_max_window(*src, Steps());
if(multi_access_x)
{
win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
}
ICLKernel::configure_internal(win);
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+Status ClQuantizationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
return Status{};
}
-void CLQuantizationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClQuantizationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+ auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), 3);
Window slice = window_collapsed.first_slice_window_3D();
do
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClQuantizationKernel.h b/src/core/gpu/cl/kernels/ClQuantizationKernel.h
new file mode 100644
index 0000000000..20822cf9c9
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClQuantizationKernel.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_QUANTIZATION_KERNEL_H
+#define ARM_COMPUTE_CL_QUANTIZATION_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the quantization layer kernel.
+ *
+ * @note The implementation supports only 3D input tensors.
+ */
+class ClQuantizationKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClQuantizationKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClQuantizationKernel);
+ /** Set the input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[out] dst Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClQuantizationKernel
+ *
+ * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[in] dst Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_QUANTIZATION_KERNEL_H */
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 365f95243f..fb698d5b88 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -25,11 +25,11 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
#include "src/core/CL/kernels/CLPadLayerKernel.h"
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
@@ -45,7 +45,7 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
_pad_kernel(std::make_unique<CLPadLayerKernel>()),
_dequantize_anchors(std::make_unique<CLDequantizationLayerKernel>()),
_dequantize_deltas(std::make_unique<CLDequantizationLayerKernel>()),
- _quantize_all_proposals(std::make_unique<CLQuantizationLayerKernel>()),
+ _quantize_all_proposals(std::make_unique<CLQuantizationLayer>()),
_cpp_nms(memory_manager),
_is_nhwc(false),
_is_qasymm8(false),
@@ -270,7 +270,7 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
- ARM_COMPUTE_RETURN_ON_ERROR(CLQuantizationLayerKernel::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized;
}
else
@@ -372,7 +372,7 @@ void CLGenerateProposalsLayer::run()
if(_is_qasymm8)
{
- CLScheduler::get().enqueue(*_quantize_all_proposals, false);
+ _quantize_all_proposals->run();
}
// Non maxima suppression
diff --git a/src/runtime/CL/functions/CLQuantizationLayer.cpp b/src/runtime/CL/functions/CLQuantizationLayer.cpp
index cb8cabef87..1f6ddb6014 100644
--- a/src/runtime/CL/functions/CLQuantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLQuantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,26 @@
*/
#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClQuantization.h"
namespace arm_compute
{
+struct CLQuantizationLayer::Impl
+{
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClQuantization> op{ nullptr };
+};
+
+CLQuantizationLayer::CLQuantizationLayer()
+ : _impl(std::make_unique<Impl>())
+{
+}
+CLQuantizationLayer::~CLQuantizationLayer() = default;
+
void CLQuantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -34,13 +50,23 @@ void CLQuantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
void CLQuantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
- auto k = std::make_unique<CLQuantizationLayerKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
+ _impl->src = input;
+ _impl->dst = output;
+
+ _impl->op = std::make_unique<opencl::ClQuantization>();
+ _impl->op->configure(compile_context, input->info(), output->info());
}
Status CLQuantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
{
- return CLQuantizationLayerKernel::validate(input, output);
+ return opencl::ClQuantization::validate(input, output);
+}
+
+void CLQuantizationLayer::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClQuantization.cpp b/src/runtime/gpu/cl/operators/ClQuantization.cpp
new file mode 100644
index 0000000000..2e753b550e
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClQuantization.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClQuantization.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/kernels/ClQuantizationKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+void ClQuantization::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
+{
+ auto k = std::make_unique<kernels::ClQuantizationKernel>();
+ k->configure(compile_context, src, dst);
+ _kernel = std::move(k);
+}
+
+Status ClQuantization::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+ return kernels::ClQuantizationKernel::validate(src, dst);
+}
+
+void ClQuantization::run(ITensorPack &tensors)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+ CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClQuantization.h b/src/runtime/gpu/cl/operators/ClQuantization.h
new file mode 100644
index 0000000000..d938ff95a0
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClQuantization.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_QUANTIZATION_H
+#define ARM_COMPUTE_CL_QUANTIZATION_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to quantize a tensor. This function calls the following OpenCL kernel:
+ *
+ * -# @ref kernels::ClQuantizationKernel
+ */
+class ClQuantization : public IClOperator
+{
+public:
+ /** Constructor */
+ ClQuantization() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
+ * @param[out] dst Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this function
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayer
+ *
+ * @param[in] src Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
+ * @param[in] dst Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+ // Inherited method overridden
+ void run(ITensorPack &tensors) override;
+};
+} // namespace opencl
+} //namespace arm_compute
+#endif /* ARM_COMPUTE_CL_QUANTIZATION_H */