From cbbed288a71f2f048123db3cf396361e5d66ce93 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 20 Dec 2019 13:26:08 +0000 Subject: COMPMID-2991: Add support for QASYMM8_SIGNED in CL kernels/functions - part 2 Adding support for QASYMM8_SIGNED to the following CL kernels/functions: - CLActivationLayerKernel/CLActivationLayer - CLComparisonKernel/CLComparison - CLConvertFullyConnectedWeightsKernel/CLConvertFullyConnectedWeights - CLDeconvolutionLayerUpsampleKernel/CLDeconvolutionLayerUpsample - CLDepthToSpaceLayerKernel/CLDepthToSpaceLayer - CLDequantizationLayerKernel/CLDequantizationLayer - CLGEMMMatrixVectorMultiplyKernel - CLNormalizePlanarYUVLayerKernel - CLPReluLayer - CLPixelWiseMultiplicationKernel/CLPixelWiseMultiplication - CLPoolingLayerKernel/CLPoolingLayer Change-Id: I874bbb7c2b08baa9c5ff4c9e6bc8778b42a6bec5 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2539 Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../core/CL/kernels/CLActivationLayerKernel.h | 6 +- arm_compute/core/CL/kernels/CLComparisonKernel.h | 6 +- .../kernels/CLConvertFullyConnectedWeightsKernel.h | 6 +- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 4 +- .../core/CL/kernels/CLDepthToSpaceLayerKernel.h | 6 +- .../core/CL/kernels/CLDequantizationLayerKernel.h | 6 +- .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 14 ++-- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h | 6 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 6 +- arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 6 +- .../runtime/CL/functions/CLActivationLayer.h | 6 +- arm_compute/runtime/CL/functions/CLComparison.h | 6 +- .../CL/functions/CLConvertFullyConnectedWeights.h | 8 +- .../CL/functions/CLDeconvolutionLayerUpsample.h | 6 +- .../runtime/CL/functions/CLDepthToSpaceLayer.h | 20 ++--- .../runtime/CL/functions/CLDequantizationLayer.h | 6 +- .../CL/functions/CLNormalizePlanarYUVLayer.h | 22 ++--- arm_compute/runtime/CL/functions/CLPReluLayer.h | 6 +- .../CL/functions/CLPixelWiseMultiplication.h | 6 +- arm_compute/runtime/CL/functions/CLPoolingLayer.h | 6 +- src/core/CL/cl_kernels/comparisons.cl | 11 +-- src/core/CL/cl_kernels/convert_fc_weights.cl | 4 +- src/core/CL/cl_kernels/deconvolution_layer.cl | 4 +- src/core/CL/cl_kernels/depth_to_space.cl | 6 +- src/core/CL/cl_kernels/dequantization_layer.cl | 4 +- src/core/CL/cl_kernels/gemv.cl | 24 +++--- .../normalize_planar_yuv_layer_quantized.cl | 6 +- src/core/CL/cl_kernels/pixelwise_mul_int.cl | 8 +- src/core/CL/cl_kernels/pooling_layer.cl | 18 +--- src/core/CL/cl_kernels/pooling_layer_quantized.cl | 67 ++++++++------- src/core/CL/kernels/CLActivationLayerKernel.cpp | 95 +++++++++++++--------- src/core/CL/kernels/CLComparisonKernel.cpp | 11 +-- .../CLConvertFullyConnectedWeightsKernel.cpp | 13 ++- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 9 +- src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 5 +- .../CL/kernels/CLDequantizationLayerKernel.cpp | 4 +- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 6 +- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 17 ++-- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 29 +++++-- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 39 +++++---- .../functions/CLConvertFullyConnectedWeights.cpp | 9 +- src/runtime/CL/functions/CLDepthToSpaceLayer.cpp | 27 ++---- .../CL/functions/CLNormalizePlanarYUVLayer.cpp | 25 ++---- src/runtime/CL/functions/CLPoolingLayer.cpp | 26 ++++-- tests/datasets/DatatypeDataset.h | 3 +- tests/validation/CL/ActivationLayer.cpp | 14 ++-- tests/validation/CL/NormalizePlanarYUVLayer.cpp | 28 +++++-- tests/validation/CL/PReluLayer.cpp | 24 +++++- tests/validation/CL/PixelWiseMultiplication.cpp | 12 ++- tests/validation/CL/PoolingLayer.cpp | 22 +++-- .../reference/NormalizePlanarYUVLayer.cpp | 13 ++- tests/validation/reference/PoolingLayer.cpp | 11 ++- tests/validation/reference/UpsampleLayer.cpp | 33 +------- 53 files changed, 413 insertions(+), 372 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index cb2fda2be8..5b65a54824 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,7 +51,7 @@ public: * @note If the output tensor is a nullptr, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] act_info Activation layer information. */ @@ -59,7 +59,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] act_info Activation layer information. * diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h index 21c6aeb064..a9c463901d 100644 --- a/arm_compute/core/CL/kernels/CLComparisonKernel.h +++ b/arm_compute/core/CL/kernels/CLComparisonKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ public: ~CLComparisonKernel() = default; /** Set the inputs and output tensors * - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * @param[out] output Destination tensor. Data types supported: U8. * @param[in] operation Comparison operation to use. @@ -58,7 +58,7 @@ public: void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel * - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * @param[in] output Destination tensor. Data types supported: U8. * @param[in] operation Comparison operation to use. diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h index 6518dfc84c..b204eaa2ac 100644 --- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h +++ b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,7 +55,7 @@ public: ~CLConvertFullyConnectedWeightsKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. @@ -63,7 +63,7 @@ public: void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h index 8382f3b4d7..a1c6bbdafe 100644 --- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -50,14 +50,14 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. * diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h index 6ae991c8e7..637e5fa960 100644 --- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,14 +49,14 @@ public: ~CLDepthToSpaceLayerKernel() = default; /** Initialise the kernel's inputs and output. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. */ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] output Tensor output info. Data types supported: same as @p input * @param[in] block_shape Block shape value. * diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h index d79cd89883..78b5c14128 100644 --- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,13 +48,13 @@ public: ~CLDequantizationLayerKernel() = default; /** Set the input, output, min and max. * - * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor. Data types supported: F16/F32. */ void configure(const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data types supported: F16/F32. * * @return a status diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h index 821c6ae5bb..8ee911dc0e 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,16 +46,16 @@ public: CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input, S32 for QASYMM8 input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input + * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. */ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel * - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input, S32 for QASYMM8 input. - * @param[in] output The output 2D tensor. Data types supported: Same as @p input + * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input. + * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. * * @return a status */ diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h index d54aae31c5..4334882fd8 100644 --- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. @@ -60,7 +60,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel * * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor info. Data type supported: same as @p input * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h index 0e5027b29a..58471ab299 100644 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,7 +48,7 @@ public: CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; /** Initialise the kernel's input, output and border mode. * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] input2 An input tensor. Data types supported: same as @p input1. * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. * @param[in] scale Scale to apply after multiplication. @@ -60,7 +60,7 @@ public: ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] input2 An input tensor info. Data types supported: same as @p input1. * @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. * @param[in] scale Scale to apply after multiplication. diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h index 95acdf4b6c..4b3ee24333 100644 --- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,14 +52,14 @@ public: /** Set the input and output tensors. * * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h index f7781480bf..09f5d2bf58 100644 --- a/arm_compute/runtime/CL/functions/CLActivationLayer.h +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,7 +57,7 @@ public: * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] act_info Activation layer parameters. */ @@ -65,7 +65,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] act_info Activation layer information. * diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h index 7f0b22341f..85dbe7129d 100644 --- a/arm_compute/runtime/CL/functions/CLComparison.h +++ b/arm_compute/runtime/CL/functions/CLComparison.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ class CLComparison : public ICLSimpleFunction public: /** Initialise the kernel's inputs and outputs. * - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -48,7 +48,7 @@ public: void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); /** Static function to check if given info will lead to a valid configuration of @ref CLComparison * - * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input1 Source tensor. Data types supported: All. * @param[in] input2 Source tensor. Data types supported: Same as @p input1. * @param[in] output Destination tensor. Data types supported: U8. * @param[out] operation Comparison operation to be used. diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h index f0359ecc75..76a28ed6fe 100644 --- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,7 @@ class CLConvertFullyConnectedWeights : public ICLSimpleFunction public: /** Initialize the function. * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. @@ -49,7 +49,7 @@ public: void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. @@ -90,7 +90,7 @@ public: } /** Configures the @ref CLConvertFullyConnectedWeights function * - * @param[in] input Source weights tensor info to convert. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor info to convert. Data type supported: All. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. */ diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index 6f015744bf..5a1009c79f 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,14 +59,14 @@ public: /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. + * @param[in, out] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] info Contains padding and policies to be used in the deconvolution. */ void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data type supported: All. * @param[in] output Destination tensor info. Data type supported: same as @p input. * @param[in] info Contains padding and policies to be used in the deconvolution. * diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h index ddee04a3dd..0c33ed34be 100644 --- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,43 +24,33 @@ #ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYER_H #define ARM_COMPUTE_CLDEPTHTOSPACELAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { class ICLTensor; /** Basic function to run @ref CLDepthToSpaceLayerKernel. */ -class CLDepthToSpaceLayer : public IFunction +class CLDepthToSpaceLayer : public ICLSimpleFunction { public: - /** Default constructor */ - CLDepthToSpaceLayer(); /** Set the input and output tensors. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. */ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayer. * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] output Tensor output info. Data types supported: same as @p input * @param[in] block_shape Block shape value. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run() override; - -private: - CLDepthToSpaceLayerKernel _depth_to_space_kernel; /**< CLDepthToSpaceLayerKernel to run */ }; } #endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h index 308349af02..48d6ba8435 100644 --- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,13 +40,13 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. - * Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. */ void configure(const ICLTensor *input, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[in] output Output tensor info. Data type supported: F16/F32. * * @return a status diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h index 4fe5a111b6..5fbfdd18b7 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYER_H #define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYER_H -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include namespace arm_compute { @@ -37,15 +37,13 @@ class ICLTensor; * * @note The function simulates a NormalizePlanarYUV layer. */ -class CLNormalizePlanarYUVLayer : public IFunction +class CLNormalizePlanarYUVLayer : public ICLSimpleFunction { public: - /** Default constructor */ - CLNormalizePlanarYUVLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destinationfeature tensor. Data type supported: same as @p input * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. @@ -55,7 +53,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayer * * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/F16/F32. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor info. Data type supported: same as @p input * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. @@ -64,12 +62,6 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); - - // Inherited methods overridden: - void run() override; - -private: - CLNormalizePlanarYUVLayerKernel _norm_kernel; /**< NormalizePlanarYUV layer kernel to run */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h index 42876cd714..7f8a41238c 100644 --- a/arm_compute/runtime/CL/functions/CLPReluLayer.h +++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,14 +42,14 @@ public: * * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input. * @param[out] output Destination tensor. Data type supported: same as @p input */ void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLPReluLayer * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input. * @param[in] output Destination tensor info. Data type supported: same as @p input * diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h index fd64d7b939..72b1587b02 100644 --- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ class CLPixelWiseMultiplication : public ICLSimpleFunction public: /** Initialise the kernel's inputs, output and convertion policy. * - * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. @@ -52,7 +52,7 @@ public: ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. * @param[in] input2 An input tensor info. Data types supported: same as @p input1. * @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. * @param[in] scale Scale to apply after multiplication. diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h index 19acb7fb40..c78b558ac8 100644 --- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,14 +43,14 @@ class CLPoolingLayer : public ICLSimpleFunction public: /** Set the input and output tensors. * - * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/F16/F32. + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * diff --git a/src/core/CL/cl_kernels/comparisons.cl b/src/core/CL/cl_kernels/comparisons.cl index 8824b136b2..a41b7e2966 100644 --- a/src/core/CL/cl_kernels/comparisons.cl +++ b/src/core/CL/cl_kernels/comparisons.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * @attention The comparison operation should be given as a preprocessor argument using -DOP=operation. e.g. -DOP=LESS * - * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32 + * @param[in] in1_ptr Pointer to the source tensor. Supported data types: All non-quantized data types. * @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -93,12 +93,13 @@ __kernel void DEFINE_KERNEL(OP_NAME)( #if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) /** This function compares two quantized tensors. * + * @note The inputs' data type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar * @note The quantization offset of the first operand must be passed at compile time using -DOFFSET_IN1, i.e. -DOFFSET_IN1=10 * @note The quantization offset of the second operand must be passed at compile time using -DOFFSET_IN2, i.e. -DOFFSET_IN2=10 * @note The quantization scale of the first operand must be passed at compile time using -DSCALE_IN1, i.e. -DSCALE_IN1=10 * @note The quantization scale of the second operand must be passed at compile time using -DSCALE_IN2, i.e. -DSCALE_IN2=10 * - * @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8 + * @param[in] in1_ptr Pointer to the source tensor. Supported data types: All quantized data types. * @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -133,8 +134,8 @@ __kernel void DEFINE_KERNEL_QUANTIZED(OP_NAME)( Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16); - int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16); + int16 in_a = CONVERT(vload16(0, (__global DATA_TYPE *)in1.ptr), int16); + int16 in_b = CONVERT(vload16(0, (__global DATA_TYPE *)in2.ptr), int16); in_a = in_a - (int16)((int)OFFSET_IN1); in_b = in_b - (int16)((int)OFFSET_IN2); diff --git a/src/core/CL/cl_kernels/convert_fc_weights.cl b/src/core/CL/cl_kernels/convert_fc_weights.cl index d47b733acd..db0873755e 100644 --- a/src/core/CL/cl_kernels/convert_fc_weights.cl +++ b/src/core/CL/cl_kernels/convert_fc_weights.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float * @attention Original input tensor width*height and depth should be given as a preprocessor argument using -DFACTOR_1=size and -DFACTOR_2=size for NCHW and vice versa for NHWC. e.g. -DFACTOR_1=256 and -DFACTOR_2=128 * - * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32 + * @param[in] src_ptr Pointer to the source image. Supported data types: All. * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl index ea2455c613..a9a6ac1947 100644 --- a/src/core/CL/cl_kernels/deconvolution_layer.cl +++ b/src/core/CL/cl_kernels/deconvolution_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ /** This function applies upsample on an input image. * - * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8/F16/F32 + * @param[in] src_ptr Pointer to the source image. Supported data types: All. * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/depth_to_space.cl b/src/core/CL/cl_kernels/depth_to_space.cl index 2ffd0a40e7..5c2e8a1d57 100644 --- a/src/core/CL/cl_kernels/depth_to_space.cl +++ b/src/core/CL/cl_kernels/depth_to_space.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,7 @@ * @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2 * @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: All. * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -72,7 +72,7 @@ __kernel void depth_to_space_nchw( * @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2 * @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: All. * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl index 7550b4ba76..add86e3f2e 100644 --- a/src/core/CL/cl_kernels/dequantization_layer.cl +++ b/src/core/CL/cl_kernels/dequantization_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,7 +33,7 @@ * @note Quantization scale of input tensor is passed in with -DSCALE=scale. * @note Quantization offset of input tensor is passed in with -DOFFSET=offset. * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QSYMM8 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/gemv.cl b/src/core/CL/cl_kernels/gemv.cl index 811aa1b865..aabde4119f 100644 --- a/src/core/CL/cl_kernels/gemv.cl +++ b/src/core/CL/cl_kernels/gemv.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -110,12 +110,12 @@ __kernel void gemm_mv(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(weights), VEC } } } -#endif /* defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) */ -#if defined(SRC_WIDTH) && defined(SRC_HEIGHT) /** This kernel applies dot product to each plane on the input tensor and the corresponding column of the reshaped weight tensor. * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8 + * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE=type, e.g. -DDATA_TYPE=uchar + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -123,13 +123,13 @@ __kernel void gemm_mv(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(weights), VEC * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[in] weights_ptr Pointer to the weights tensor. Same as @p src_ptr + * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) * @param[in] weights_step_y weights_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: S32 * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor @@ -158,14 +158,14 @@ __kernel void gemm_mv_quantized(TENSOR3D_DECLARATION(src), // This kernel handle 4 rows in per thread so that it can reuse the weights for(int i = 0; i < SRC_WIDTH; i += 4) { - int4 w = convert_int4(vload4(0, (__global uchar *)(current_weights + i * weights_stride_x))) + (int4)weights_offset; + int4 w = convert_int4(vload4(0, (__global DATA_TYPE *)(current_weights + i * weights_stride_x))) + (int4)weights_offset; int4 offset = (int4)i * (int4)src_stride_x + (int4)(0, 1, 2, 3) * (int4)src_stride_y; - int4 tmp0 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s0))) + (int4)input_offset; - int4 tmp1 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s1))) + (int4)input_offset; - int4 tmp2 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s2))) + (int4)input_offset; - int4 tmp3 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s3))) + (int4)input_offset; + int4 tmp0 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s0))) + (int4)input_offset; + int4 tmp1 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s1))) + (int4)input_offset; + int4 tmp2 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s2))) + (int4)input_offset; + int4 tmp3 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s3))) + (int4)input_offset; // Accumulate acc0 += tmp0.s0 * w.s0 + tmp0.s1 * w.s1 + tmp0.s2 * w.s2 + tmp0.s3 * w.s3; @@ -197,4 +197,4 @@ __kernel void gemm_mv_quantized(TENSOR3D_DECLARATION(src), } } } -#endif /* defined(SRC_WIDTH) && defined(SRC_HEIGHT) */ +#endif /* defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) */ diff --git a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl index 925975d2ba..b2ba65f812 100644 --- a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl +++ b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,7 @@ * @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8 * @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8 * - * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8 + * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED * @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes) * @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes) @@ -102,7 +102,7 @@ __kernel void normalize_planar_yuv_layer_q8_nchw(TENSOR3D_DECLARATION(src), * @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8 * @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8 * - * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8 + * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED * @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes) * @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/pixelwise_mul_int.cl b/src/core/CL/cl_kernels/pixelwise_mul_int.cl index 989316d661..d277c6c56f 100644 --- a/src/core/CL/cl_kernels/pixelwise_mul_int.cl +++ b/src/core/CL/cl_kernels/pixelwise_mul_int.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -109,7 +109,7 @@ __kernel void pixelwise_mul_int( * @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 * - * @param[in] in1_ptr Pointer to the source image. Supported data types: QASYMM8/QSYMM16 + * @param[in] in1_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16 * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) @@ -117,7 +117,7 @@ __kernel void pixelwise_mul_int( * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32 + * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) @@ -125,7 +125,7 @@ __kernel void pixelwise_mul_int( * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32 + * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index c8b5e07b47..207669e43e 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -391,28 +391,16 @@ __kernel void pooling_layer_optimized_3( #if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y) -// Set the initial value for the pooling operation accordingly with the data type -#if defined(POOL_AVG) || defined(POOL_L2) -#define INITIAL_VALUE 0 -#else /* defined(POOL_AVG) || defined(POOL_L2) */ -#if FP16 -#define INITIAL_VALUE -HALF_MAX -#else // FP16 -#define INITIAL_VALUE -FLT_MAX -#endif // FP16 - -#endif // POOL_AVG - /** Performs a pooling function of pool size equal to N (NCHW) * * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32; - * @note -DFP16 must be passed at compile time if half float data type is used * @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13; * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension + * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0 * * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) @@ -519,13 +507,13 @@ ACC_DATA_TYPE calculate_avg_scale_nhwc(const int pool_size_x, const int pool_siz /** Performs a pooling function of pool size equal to N (NHWC) * * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32 - * @note -DFP16 must be passed at compile time if half float data type is used * @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13; * @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT * @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions * @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. + * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0 * * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) diff --git a/src/core/CL/cl_kernels/pooling_layer_quantized.cl b/src/core/CL/cl_kernels/pooling_layer_quantized.cl index 2df22d736c..3a370eea93 100644 --- a/src/core/CL/cl_kernels/pooling_layer_quantized.cl +++ b/src/core/CL/cl_kernels/pooling_layer_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,18 +23,19 @@ */ #include "helpers.h" +#if defined(DATA_TYPE) && defined(INITIAL_VALUE) +#define VEC_TYPE(VEC_SIZE) VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) -#define VEC_FLOAT(VEC_SIZE) \ - VEC_DATA_TYPE(float, VEC_SIZE) +#define VEC_FLOAT(VEC_SIZE) VEC_DATA_TYPE(float, VEC_SIZE) #define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE) -#define VEC_UCHAR(VEC_SIZE) VEC_DATA_TYPE(uchar, VEC_SIZE) #define CONVERT_RTE(x, type) (convert_##type##_rte((x))) #define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) #define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \ { \ const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \ const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \ - res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_UCHAR(VEC_SIZE)); \ + res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_TYPE(VEC_SIZE)); \ } #endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ @@ -74,8 +75,10 @@ int calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension + * @note Input data type must be passed at compile time using -DDAT_TYPE=type, e.g. -DDATA_TYPE=uchar + * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0 * - * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8 + * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) @@ -100,8 +103,8 @@ __kernel void pooling_layer_MxN_quantized_nchw( Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - int8 vdata = 0; - int sdata = 0; + int8 vdata = INITIAL_VALUE; + int sdata = INITIAL_VALUE; // Load data for(int y = 0; y < POOL_SIZE_Y; y++) @@ -109,17 +112,18 @@ __kernel void pooling_layer_MxN_quantized_nchw( int x = 0; for(; x <= ((int)POOL_SIZE_X - 8); x += 8) { - uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0)); - int8 data0 = convert_int8(data); - vdata = POOL_OP(vdata, data0); + VEC_TYPE(8) + data = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0)); + int8 data0 = convert_int8(data); + vdata = POOL_OP(vdata, data0); } // Leftover for(; x < (int)POOL_SIZE_X; ++x) { - uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0)); - int data0 = convert_int(data); - sdata = POOL_OP(sdata, data0); + DATA_TYPE data = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0)); + int data0 = convert_int(data); + sdata = POOL_OP(sdata, data0); } } @@ -133,22 +137,22 @@ __kernel void pooling_layer_MxN_quantized_nchw( res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))); #endif /* defined(POOL_AVG) */ - uchar result_u8 = convert_uchar(res); + DATA_TYPE result_q8 = CONVERT(res, DATA_TYPE); #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) - const float result_f32 = convert_float(result_u8); + const float result_f32 = convert_float(result_q8); const float input_offset = (float)OFFSET_IN1; const float input_scale = (float)SCALE_IN1; const float scale_out = (float)SCALE_OUT; const float offset_out = (float)OFFSET_OUT; const float in_f32 = (result_f32 - input_offset) * input_scale; const float out_f32 = in_f32 / scale_out + offset_out; - result_u8 = convert_uchar_sat(convert_int_rte(out_f32)); + result_q8 = CONVERT_SAT(convert_int_rte(out_f32), DATA_TYPE); #endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ - *(__global uchar *)output.ptr = result_u8; + *(__global DATA_TYPE *)output.ptr = result_q8; } int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h, @@ -158,7 +162,7 @@ int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int u #if defined(DST_DEPTH) int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y; #else /* defined(DST_DEPTH) */ - int start_y = get_global_id(2) * stride_y - pad_y; + int start_y = get_global_id(2) * stride_y - pad_y; #endif /* defined(DST_DEPTH) */ const int end_x = min(start_x + pool_size_x, upper_bound_w); @@ -178,8 +182,9 @@ int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int u * @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. + * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0 * - * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8 + * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) @@ -209,17 +214,17 @@ __kernel void pooling_layer_MxN_quantized_nhwc( Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DST_DEPTH); Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH); #else /* defined(DST_DEPTH) */ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); + Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); + Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); #endif /* defined(DST_DEPTH) */ - int8 vdata = 0; + int8 vdata = INITIAL_VALUE; const int idx_width = get_global_id(1) * STRIDE_X; #if defined(DST_DEPTH) const int idx_height = (get_global_id(2) % DST_DEPTH) * STRIDE_Y; #else /* defined(DST_DEPTH) */ - const int idx_height = get_global_id(2) * STRIDE_Y; + const int idx_height = get_global_id(2) * STRIDE_Y; #endif /* defined(DST_DEPTH) */ for(int y = 0; y < POOL_SIZE_Y; ++y) @@ -231,9 +236,11 @@ __kernel void pooling_layer_MxN_quantized_nhwc( x1 = select(x1, PAD_X - idx_width - 1, y != y1); #if defined(DST_DEPTH) - uchar8 data = vload8(0, (__global uchar *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0)); + VEC_TYPE(8) + data = vload8(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0)); #else /* defined(DST_DEPTH) */ - uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y)); + VEC_TYPE(8) + data = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y)); #endif /* defined(DST_DEPTH) */ int8 data0 = convert_int8(data); @@ -246,11 +253,13 @@ __kernel void pooling_layer_MxN_quantized_nhwc( vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)))); #endif /* defined(POOL_AVG) */ - uchar8 out_u8 = convert_uchar8(vdata); + VEC_TYPE(8) + out_q8 = CONVERT(vdata, VEC_TYPE(8)); #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) - REQUANTIZE(8, out_u8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_u8); + REQUANTIZE(8, out_q8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_q8); #endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ // Store result - vstore8(out_u8, 0, (__global uchar *)output.ptr); + vstore8(out_q8, 0, (__global DATA_TYPE *)output.ptr); } +#endif /* defined(DATA_TYPE) && defined(INITIAL_VALUE) */ diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 5062fd1801..270eb78dcb 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,14 +39,14 @@ #include #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::F16, DataType::F32); static std::set quantized_supported_activations = { @@ -63,12 +63,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(data_type) && (quantized_supported_activations.count(f_act) == 0), "For Quantized data type only tanh, logistic, relu and lower/upper bounded relu are supported"); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128))); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8 && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128))); + ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8 && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0))); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0))); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128))); + // Checks performed when output is configured if((output != nullptr) && (output->total_size() != 0)) { @@ -135,27 +138,11 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act const DataType dt = input->info()->data_type(); float a_const = act_info.a(); float b_const = act_info.b(); - int a_const_int = 0; - int b_const_int = 0; const ActivationLayerInfo::ActivationFunction f_act = act_info.activation(); const bool is_quantized = is_data_type_quantized(dt); const bool perform_activation_in_float = (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) || (f_act == ActivationLayerInfo::ActivationFunction::TANH); - // Create quantized version of constants a, b if needed - if(dt == DataType::QASYMM8) - { - const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); - a_const_int = quantize_qasymm8(a_const, iq_info); - b_const_int = quantize_qasymm8(b_const, iq_info); - } - else if(dt == DataType::QSYMM16) - { - const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); - a_const_int = quantize_qsymm16(a_const, iq_info); - b_const_int = quantize_qsymm16(b_const, iq_info); - } - // Set build options CLBuildOptions build_opts; build_opts.add_option_if(perform_activation_in_float, "-DFLOAT_DOMAIN"); @@ -164,28 +151,59 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - // Set A, B constants in build options - if(is_quantized && !perform_activation_in_float) - { - build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); - build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); - } - else - { - build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const))); - build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const))); - } + std::string kernel_name = std::string("activation_layer"); // Set quantization info build options if(is_quantized) { const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + if(!perform_activation_in_float) + { + int a_const_int = 0; + int b_const_int = 0; + + // Create quantized version of constants a, b if needed + switch(dt) + { + case DataType::QASYMM8: + { + a_const_int = quantize_qasymm8(a_const, iq_info); + b_const_int = quantize_qasymm8(b_const, iq_info); + } + break; + case DataType::QASYMM8_SIGNED: + { + a_const_int = quantize_qasymm8_signed(a_const, iq_info); + b_const_int = quantize_qasymm8_signed(b_const, iq_info); + } + break; + case DataType::QSYMM16: + { + a_const_int = quantize_qsymm16(a_const, iq_info); + b_const_int = quantize_qsymm16(b_const, iq_info); + } + break; + default: + break; + } + build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); + build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); + } + else + { + build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const))); + build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const))); + } + // Quantized value of 0 corresponds to the offset o1 build_opts.add_option(("-DCONST_0=" + (is_data_type_quantized_asymmetric(dt) ? support::cpp11::to_string(iq_info.offset) : "0"))); build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale))); build_opts.add_option_if(is_data_type_quantized_asymmetric(dt), "-DO1_VAL=" + support::cpp11::to_string(iq_info.offset)); + // Set correct kernel name + kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant"); + // Set scale and offset of the input and output if they have different quantization info if(output != nullptr) { @@ -198,14 +216,14 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act } } } - - // Create kernel - std::string kernel_name = std::string("activation_layer"); - if(is_quantized) + else { - kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant"); + // Set A, B constants in build options for float types + build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const))); + build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const))); } + // Create kernel _kernel = create_opencl_kernel(_ctx, kernel_name, build_opts); // Make sure _kernel is initialized before calling the parent's configure _input = input; @@ -254,3 +272,4 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) } while(collapsed.slide_window_slice_3D(slice)); } +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 5570ecfc79..afee429219 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,12 +52,7 @@ int calculate_num_elems_processed_per_iteration(const ITensorInfo &input) Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ComparisonOperation operation) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, - 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input1.data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); ARM_COMPUTE_RETURN_ERROR_ON(supported_comparison_ops.count(operation) == 0); @@ -132,7 +127,7 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info()))); build_opts.emplace("-DOP=" + operation_name); build_opts.emplace("-DOP_NAME=" + lower_string(operation_name)); - if(is_data_type_quantized_asymmetric(input1->info()->data_type())) + if(is_data_type_quantized(input1->info()->data_type())) { const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index 81856769b2..7ec6841149 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -64,7 +64,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())); build_opts.add_option("-DFACTOR_1=" + support::cpp11::to_string(factor_1)); build_opts.add_option("-DFACTOR_2=" + support::cpp11::to_string(factor_2)); @@ -79,18 +79,15 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3)); ARM_COMPUTE_RETURN_ERROR_ON(data_layout == DataLayout::UNKNOWN); // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) + if(output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index cd9552f149..ee392032ca 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -32,8 +32,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -using namespace arm_compute; - +namespace arm_compute +{ CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel() : _input(nullptr), _output(nullptr), _info(), _data_layout(DataLayout::UNKNOWN) { @@ -45,7 +45,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); @@ -82,7 +82,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe // Create kernel CLBuildOptions build_opts; - build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()))); _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options())); constexpr unsigned int num_elems_processed_per_iteration = 1; @@ -156,3 +156,4 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu ARM_COMPUTE_ERROR("Unsupported data layout"); } } +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index 49a5590231..f23f7ce542 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2); @@ -81,7 +82,7 @@ void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *out // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())); build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel))); build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape)); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 60659faaaf..f85cb7636a 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16); if(output->tensor_shape().total_size() > 0) { diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 9e5d677e89..c158937839 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -90,11 +90,11 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const _output = output; // Check if is a quantized operation - bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type()); + const bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type()); // Create kernel CLBuildOptions build_opts; - build_opts.add_option_if(!is_quantized, "-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input0->info()->dimension(0))); build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input0->info()->dimension(1))); diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index b255ba346f..220c2cd576 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,15 +35,15 @@ #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std); @@ -97,12 +97,8 @@ CLNormalizePlanarYUVLayerKernel::CLNormalizePlanarYUVLayerKernel() void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), *input->info()->clone()); - // Perform validation step + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), std->info())); _input = input; @@ -183,3 +179,4 @@ void CLNormalizePlanarYUVLayerKernel::run(const Window &window, cl::CommandQueue } while(collapsed.slide_window_slice_3D(slice)); } +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 50cdc9c7f4..6bdb1242a6 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,10 +50,18 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, ARM_COMPUTE_UNUSED(overflow_policy); ARM_COMPUTE_UNUSED(rounding_policy); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input2); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, + 1, + DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, + 1, + DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative."); const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); @@ -63,12 +71,17 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, // Validate in case of configured output if(output->total_size() > 0) { - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, + 1, + DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8 && (input1->data_type() != DataType::QASYMM8 || input2->data_type() != DataType::QASYMM8), "Output can only be QASYMM8 if both inputs are QASYMM8"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8_SIGNED && (input1->data_type() != DataType::QASYMM8_SIGNED || input2->data_type() != DataType::QASYMM8_SIGNED), + "Output can only be QASYMM8_SIGNED if both inputs are QASYMM8_SIGNED"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QSYMM16 && (input1->data_type() != DataType::QSYMM16 || input2->data_type() != DataType::QSYMM16), "Output can only be QSYMM16 if both inputs are QSYMM16"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output"); @@ -99,6 +112,10 @@ std::pair validate_and_configure_window(ITensorInfo *input1, ITe { set_data_type_if_unknown(*output, DataType::QASYMM8); } + else if(input1->data_type() == DataType::QASYMM8_SIGNED) + { + set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED); + } else if(input1->data_type() == DataType::QSYMM16) { set_data_type_if_unknown(*output, DataType::QSYMM16); diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index e3f1114f21..2d75e5f969 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,8 @@ #include #include -using namespace arm_compute; +namespace arm_compute +{ using namespace arm_compute::misc::shape_calculator; namespace @@ -57,19 +58,8 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, PoolingLayerInfo p Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - DataLayout data_layout = input->data_layout(); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - switch(data_layout) - { - case DataLayout::NCHW: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - break; - case DataLayout::NHWC: - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - break; - default: - ARM_COMPUTE_ERROR("Data layout not supported"); - } + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type() == PoolingType::L2), "Unsupported combination of parameters!"); @@ -234,7 +224,25 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x)); build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y)); - build_opts.add_option_if(data_type == DataType::F16, "-DFP16"); + // Set the initial value for the pooling operation accordingly with the data type + if(pool_type == PoolingType::MAX) + { + if(is_data_type_quantized(data_type)) + { + PixelValue type_min{}; + std::tie(type_min, std::ignore) = get_min_max(data_type); + build_opts.add_option("-DINITIAL_VALUE=" + support::cpp11::to_string(type_min.get())); + } + else + { + build_opts.add_option("-DINITIAL_VALUE=" + float_to_string_with_full_precision(std::numeric_limits::lowest())); + } + } + else + { + // Pool AVG and Pool L2 initial value + build_opts.add_option("-DINITIAL_VALUE=0"); + } const auto use_fp_mixed_precision = (data_type == DataType::F16) && pool_info.fp_mixed_precision(); const auto use_wider_accumulator = use_fp_mixed_precision && (pool_type != PoolingType::MAX); @@ -389,3 +397,4 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR("Not implemented"); } } +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp index c226e56aff..02927e83ad 100644 --- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp +++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" -using namespace arm_compute; - +namespace arm_compute +{ void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout) { @@ -37,4 +37,5 @@ Status CLConvertFullyConnectedWeights::validate(const ITensorInfo *input, const DataLayout data_layout) { return CLConvertFullyConnectedWeightsKernel::validate(input, output, original_input_shape, data_layout); -} \ No newline at end of file +} +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp index 08aef92eae..1581dd9c19 100644 --- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp +++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,33 +21,24 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" +#include "support/ToolchainSupport.h" -using namespace arm_compute; +#include -CLDepthToSpaceLayer::CLDepthToSpaceLayer() - : _depth_to_space_kernel() +namespace arm_compute { -} - void CLDepthToSpaceLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape) { - _depth_to_space_kernel.configure(input, output, block_shape); + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, block_shape); + _kernel = std::move(k); } Status CLDepthToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) { return CLDepthToSpaceLayerKernel::validate(input, output, block_shape); } - -void CLDepthToSpaceLayer::run() -{ - CLScheduler::get().enqueue(_depth_to_space_kernel, true); -} +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp index 11d70e31fb..c5de591f5c 100644 --- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp +++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,22 +24,18 @@ #include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" +#include "support/ToolchainSupport.h" + +#include namespace arm_compute { -CLNormalizePlanarYUVLayer::CLNormalizePlanarYUVLayer() - : _norm_kernel() -{ -} - void CLNormalizePlanarYUVLayer::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std) { - _norm_kernel.configure(input, output, mean, std); + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, mean, std); + _kernel = std::move(k); } Status CLNormalizePlanarYUVLayer::validate(const ITensorInfo *input, const ITensorInfo *output, @@ -47,9 +43,4 @@ Status CLNormalizePlanarYUVLayer::validate(const ITensorInfo *input, const ITens { return CLNormalizePlanarYUVLayerKernel::validate(input, output, mean, std); } - -void CLNormalizePlanarYUVLayer::run() -{ - CLScheduler::get().enqueue(_norm_kernel, true); -} } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index 086017a7fd..f3ea926ae7 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,8 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -40,12 +40,14 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin k->configure(input, output, pool_info); _kernel = std::move(k); + const DataType data_type = input->info()->data_type(); + // Configure border depending on operation required (quantize border in case of asymmetric data_type) BorderMode border_mode{}; PixelValue pixel_value(0.f); - if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) + if(is_data_type_quantized_asymmetric(data_type) && !pool_info.exclude_padding()) { - pixel_value = PixelValue(static_cast(input->info()->quantization_info().uniform().offset)); + pixel_value = PixelValue(0, data_type, input->info()->quantization_info()); } switch(input->info()->data_layout()) { @@ -54,9 +56,16 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin break; case DataLayout::NHWC: border_mode = BorderMode::CONSTANT; - if(PoolingType::MAX == pool_info.pool_type() && !is_data_type_quantized_asymmetric(input->info()->data_type())) + if(PoolingType::MAX == pool_info.pool_type()) { - pixel_value = PixelValue(std::numeric_limits::lowest()); + if(is_data_type_quantized(data_type)) + { + std::tie(pixel_value, std::ignore) = get_min_max(data_type); + } + else + { + pixel_value = PixelValue(std::numeric_limits::lowest()); + } } break; default: @@ -71,4 +80,5 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) { return CLPoolingLayerKernel::validate(input, output, pool_info); -} \ No newline at end of file +} +} // namespace arm_compute \ No newline at end of file diff --git a/tests/datasets/DatatypeDataset.h b/tests/datasets/DatatypeDataset.h index 72952e418e..cc79104ff1 100644 --- a/tests/datasets/DatatypeDataset.h +++ b/tests/datasets/DatatypeDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,7 @@ public: { DataType::QSYMM8, DataType::QASYMM8, + DataType::QASYMM8_SIGNED, DataType::QSYMM16, }) { diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp index a17ad9b269..8b12b0b28b 100644 --- a/tests/validation/CL/ActivationLayer.cpp +++ b/tests/validation/CL/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -235,15 +235,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture, fra // Validate output validate(CLAccessor(_target), _reference, tolerance(_function, _data_type)); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLActivationLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), QuantizedActivationDataset), - framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) }))) +TEST_SUITE_END() // QASYMM8 +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), QuantizedActivationDataset), + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 10.0f) }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance(_function, _data_type)); } -TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM16) FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), QuantizedActivationDataset), framework::dataset::make("DataType", diff --git a/tests/validation/CL/NormalizePlanarYUVLayer.cpp b/tests/validation/CL/NormalizePlanarYUVLayer.cpp index 31e0625eed..54fff01915 100644 --- a/tests/validation/CL/NormalizePlanarYUVLayer.cpp +++ b/tests/validation/CL/NormalizePlanarYUVLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -123,7 +123,7 @@ FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerFixture, framework // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, 0); } -TEST_SUITE_END() +TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomNormalizePlanarYUVLayerDataset(), @@ -133,8 +133,8 @@ FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerFixture, framewor // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float template using CLNormalizePlanarYUVLayerQuantizedFixture = NormalizePlanarYUVLayerValidationQuantizedFixture; @@ -143,17 +143,27 @@ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomNormalizePlanarYUVLayerDataset(), framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, 0); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // QASYMM8 +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomNormalizePlanarYUVLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, 0); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // NormalizePlanarYUVLayer +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp index 32fb2a113b..ce678d9aea 100644 --- a/tests/validation/CL/PReluLayer.cpp +++ b/tests/validation/CL/PReluLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,9 @@ const auto PReluLayerU8Dataset = combine(combine(framework::dataset::make("DataT const auto PReluLayerQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto PReluLayerQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)); const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), @@ -165,7 +168,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerQuantizedFixture, framewor ) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01); + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + PReluLayerQASYMM8SIGNEDDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 127.f, 10) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 127.f, 5) })) + + ) +{ + // Validate output + validate(CLAccessor(_target), _reference); } TEST_SUITE_END() TEST_SUITE_END() @@ -211,8 +228,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerFixture, framework::DatasetMo TEST_SUITE_END() TEST_SUITE(FP32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(), - shape) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(), shape) { // Create tensors CLTensor ref_src1 = create_tensor(shape, DataType::F32); diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp index 22ff9f2fb9..3b55e25f37 100644 --- a/tests/validation/CL/PixelWiseMultiplication.cpp +++ b/tests/validation/CL/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -139,8 +139,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Scale", { 1.f, 2.f })), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)), @@ -151,7 +154,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPixelWiseMultiplicationQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QSYMM16)), diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp index ff7c24f024..262cea3338 100644 --- a/tests/validation/CL/PoolingLayer.cpp +++ b/tests/validation/CL/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -71,9 +71,10 @@ framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(5, 7) })), framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), framework::dataset::make("ExcludePadding", { true })); -constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ -constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ -constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */ +constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ +constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ +constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */ +constexpr AbsoluteTolerance tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit signed asymmetric type */ const auto pool_data_layout_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); const auto pool_fp_mixed_precision_dataset = framework::dataset::make("FpMixedPrecision", { true, false }); @@ -188,14 +189,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture, framew // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8, - framework::dataset::make("DataType", DataType::QASYMM8))), - pool_data_layout_dataset)) +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + pool_data_layout_dataset)) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_qasymm8); + validate(CLAccessor(_target), _reference, tolerance_qasymm8_s); } -TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() // PoolingLayer TEST_SUITE_END() // CL diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.cpp b/tests/validation/reference/NormalizePlanarYUVLayer.cpp index ea0e75a3c7..d2d29cc682 100644 --- a/tests/validation/reference/NormalizePlanarYUVLayer.cpp +++ b/tests/validation/reference/NormalizePlanarYUVLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -72,6 +72,17 @@ SimpleTensor normalize_planar_yuv_layer(const SimpleTensor +SimpleTensor normalize_planar_yuv_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &std) +{ + SimpleTensor src_tmp = convert_from_asymmetric(src); + SimpleTensor mean_tmp = convert_from_asymmetric(mean); + SimpleTensor std_tmp = convert_from_asymmetric(std); + SimpleTensor dst_tmp = normalize_planar_yuv_layer(src_tmp, mean_tmp, std_tmp); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); + return dst; +} + template SimpleTensor normalize_planar_yuv_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &std); template SimpleTensor normalize_planar_yuv_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &std); } // namespace reference diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp index 010412c92b..40dd6fa505 100644 --- a/tests/validation/reference/PoolingLayer.cpp +++ b/tests/validation/reference/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -171,6 +171,15 @@ SimpleTensor pooling_layer(const SimpleTensor &src, c return dst; } +template <> +SimpleTensor pooling_layer(const SimpleTensor &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo) +{ + SimpleTensor src_tmp = convert_from_asymmetric(src); + SimpleTensor dst_tmp = pooling_layer_internal(src_tmp, info, output_qinfo); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, output_qinfo); + return dst; +} + template <> SimpleTensor pooling_layer(const SimpleTensor &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo) { diff --git a/tests/validation/reference/UpsampleLayer.cpp b/tests/validation/reference/UpsampleLayer.cpp index d77f9ae348..a81a601057 100644 --- a/tests/validation/reference/UpsampleLayer.cpp +++ b/tests/validation/reference/UpsampleLayer.cpp @@ -23,6 +23,7 @@ */ #include "UpsampleLayer.h" +#include "arm_compute/core/utils/misc/Requires.h" #include "tests/validation/Helpers.h" namespace arm_compute @@ -33,10 +34,8 @@ namespace validation { namespace reference { -namespace -{ template -SimpleTensor upsample_function(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy) +SimpleTensor upsample_layer(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy) { ARM_COMPUTE_ERROR_ON(policy != InterpolationPolicy::NEAREST_NEIGHBOR); ARM_COMPUTE_UNUSED(policy); @@ -76,36 +75,12 @@ SimpleTensor upsample_function(const SimpleTensor &src, const Size2D &info return out; } -} // namespace - -template -SimpleTensor upsample_layer(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy) -{ - return upsample_function(src, info, policy); -} - -template <> -SimpleTensor upsample_layer(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy) -{ - SimpleTensor dst(src.shape(), src.data_type(), 1, src.quantization_info()); - - if(is_data_type_quantized_asymmetric(src.data_type())) - { - SimpleTensor src_tmp = convert_from_asymmetric(src); - SimpleTensor dst_tmp = upsample_function(src_tmp, info, policy); - dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); - } - else - { - dst = upsample_function(src, info, policy); - } - return dst; -} - template SimpleTensor upsample_layer(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy); template SimpleTensor upsample_layer(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy); +template SimpleTensor upsample_layer(const SimpleTensor &src, + const Size2D &info, const InterpolationPolicy policy); template SimpleTensor upsample_layer(const SimpleTensor &src, const Size2D &info, const InterpolationPolicy policy); } // namespace reference -- cgit v1.2.1