aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-12-20 13:26:08 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-01-14 17:21:26 +0000
commitcbbed288a71f2f048123db3cf396361e5d66ce93 (patch)
treebd48122e283272adb668a42097ea69ca3f0473a6
parent70d33bdfd36e1b44b0573189dca67ed7c63dd59e (diff)
downloadComputeLibrary-cbbed288a71f2f048123db3cf396361e5d66ce93.tar.gz
COMPMID-2991: Add support for QASYMM8_SIGNED in CL kernels/functions - part 2
Adding support for QASYMM8_SIGNED to the following CL kernels/functions: - CLActivationLayerKernel/CLActivationLayer - CLComparisonKernel/CLComparison - CLConvertFullyConnectedWeightsKernel/CLConvertFullyConnectedWeights - CLDeconvolutionLayerUpsampleKernel/CLDeconvolutionLayerUpsample - CLDepthToSpaceLayerKernel/CLDepthToSpaceLayer - CLDequantizationLayerKernel/CLDequantizationLayer - CLGEMMMatrixVectorMultiplyKernel - CLNormalizePlanarYUVLayerKernel - CLPReluLayer - CLPixelWiseMultiplicationKernel/CLPixelWiseMultiplication - CLPoolingLayerKernel/CLPoolingLayer Change-Id: I874bbb7c2b08baa9c5ff4c9e6bc8778b42a6bec5 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2539 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLActivationLayerKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLComparisonKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h14
-rw-r--r--arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLPoolingLayerKernel.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLActivationLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLComparison.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h20
-rw-r--r--arm_compute/runtime/CL/functions/CLDequantizationLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLPReluLayer.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLPoolingLayer.h6
-rw-r--r--src/core/CL/cl_kernels/comparisons.cl11
-rw-r--r--src/core/CL/cl_kernels/convert_fc_weights.cl4
-rw-r--r--src/core/CL/cl_kernels/deconvolution_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/depth_to_space.cl6
-rw-r--r--src/core/CL/cl_kernels/dequantization_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/gemv.cl24
-rw-r--r--src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl6
-rw-r--r--src/core/CL/cl_kernels/pixelwise_mul_int.cl8
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl18
-rw-r--r--src/core/CL/cl_kernels/pooling_layer_quantized.cl67
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp95
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp13
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp17
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp29
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp39
-rw-r--r--src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp9
-rw-r--r--src/runtime/CL/functions/CLDepthToSpaceLayer.cpp27
-rw-r--r--src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp25
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp26
-rw-r--r--tests/datasets/DatatypeDataset.h3
-rw-r--r--tests/validation/CL/ActivationLayer.cpp14
-rw-r--r--tests/validation/CL/NormalizePlanarYUVLayer.cpp28
-rw-r--r--tests/validation/CL/PReluLayer.cpp24
-rw-r--r--tests/validation/CL/PixelWiseMultiplication.cpp12
-rw-r--r--tests/validation/CL/PoolingLayer.cpp22
-rw-r--r--tests/validation/reference/NormalizePlanarYUVLayer.cpp13
-rw-r--r--tests/validation/reference/PoolingLayer.cpp11
-rw-r--r--tests/validation/reference/UpsampleLayer.cpp33
53 files changed, 413 insertions, 372 deletions
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index cb2fda2be8..5b65a54824 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,7 +51,7 @@ public:
* @note If the output tensor is a nullptr, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*/
@@ -59,7 +59,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h
index 21c6aeb064..a9c463901d 100644
--- a/arm_compute/core/CL/kernels/CLComparisonKernel.h
+++ b/arm_compute/core/CL/kernels/CLComparisonKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,7 @@ public:
~CLComparisonKernel() = default;
/** Set the inputs and output tensors
*
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* @param[out] output Destination tensor. Data types supported: U8.
* @param[in] operation Comparison operation to use.
@@ -58,7 +58,7 @@ public:
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
/** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
*
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* @param[in] output Destination tensor. Data types supported: U8.
* @param[in] operation Comparison operation to use.
diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
index 6518dfc84c..b204eaa2ac 100644
--- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
+++ b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,7 +55,7 @@ public:
~CLConvertFullyConnectedWeightsKernel() = default;
/** Set the input and output tensor.
*
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
@@ -63,7 +63,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
*
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
index 8382f3b4d7..a1c6bbdafe 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -50,14 +50,14 @@ public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: All.
* @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
*
diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
index 6ae991c8e7..637e5fa960 100644
--- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,14 +49,14 @@ public:
~CLDepthToSpaceLayerKernel() = default;
/** Initialise the kernel's inputs and output.
*
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
*/
void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
*
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
* @param[in] output Tensor output info. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
*
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
index d79cd89883..78b5c14128 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,13 +48,13 @@ public:
~CLDequantizationLayerKernel() = default;
/** Set the input, output, min and max.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor. Data types supported: F16/F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data types supported: F16/F32.
*
* @return a status
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
index 821c6ae5bb..8ee911dc0e 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,16 +46,16 @@ public:
CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default;
/** Set the input and output of the kernel.
*
- * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input, S32 for QASYMM8 input.
- * @param[out] output The output 2D tensor. Data types supported: Same as @p input
+ * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
+ * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel
*
- * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input, S32 for QASYMM8 input.
- * @param[in] output The output 2D tensor. Data types supported: Same as @p input
+ * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input.
+ * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
*
* @return a status
*/
diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
index d54aae31c5..4334882fd8 100644
--- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,7 +50,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
* @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
@@ -60,7 +60,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor info. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
* @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels.
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 0e5027b29a..58471ab299 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,7 +48,7 @@ public:
CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] input2 An input tensor. Data types supported: same as @p input1.
* @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
@@ -60,7 +60,7 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1.
* @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
index 95acdf4b6c..4b3ee24333 100644
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,14 +52,14 @@ public:
/** Set the input and output tensors.
*
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index f7781480bf..09f5d2bf58 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,7 +57,7 @@ public:
* @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] act_info Activation layer parameters.
*/
@@ -65,7 +65,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index 7f0b22341f..85dbe7129d 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ class CLComparison : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs and outputs.
*
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -48,7 +48,7 @@ public:
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
/** Static function to check if given info will lead to a valid configuration of @ref CLComparison
*
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* @param[in] output Destination tensor. Data types supported: U8.
* @param[out] operation Comparison operation to be used.
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index f0359ecc75..76a28ed6fe 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@ class CLConvertFullyConnectedWeights : public ICLSimpleFunction
public:
/** Initialize the function.
*
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
@@ -49,7 +49,7 @@ public:
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights
*
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
@@ -90,7 +90,7 @@ public:
}
/** Configures the @ref CLConvertFullyConnectedWeights function
*
- * @param[in] input Source weights tensor info to convert. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source weights tensor info to convert. Data type supported: All.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
*/
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index 6f015744bf..5a1009c79f 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,14 +59,14 @@ public:
/** Initialize the function's source, destination, interpolation type and border_mode.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[in, out] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution.
*/
void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor info. Data type supported: All.
* @param[in] output Destination tensor info. Data type supported: same as @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution.
*
diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
index ddee04a3dd..0c33ed34be 100644
--- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,43 +24,33 @@
#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYER_H
#define ARM_COMPUTE_CLDEPTHTOSPACELAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
class ICLTensor;
/** Basic function to run @ref CLDepthToSpaceLayerKernel. */
-class CLDepthToSpaceLayer : public IFunction
+class CLDepthToSpaceLayer : public ICLSimpleFunction
{
public:
- /** Default constructor */
- CLDepthToSpaceLayer();
/** Set the input and output tensors.
*
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
*/
void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayer.
*
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
* @param[in] output Tensor output info. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLDepthToSpaceLayerKernel _depth_to_space_kernel; /**< CLDepthToSpaceLayerKernel to run */
};
}
#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 308349af02..48d6ba8435 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,13 +40,13 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
- * Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data type supported: F16/F32.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
index 4fe5a111b6..5fbfdd18b7 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,10 +24,10 @@
#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYER_H
#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
namespace arm_compute
{
@@ -37,15 +37,13 @@ class ICLTensor;
*
* @note The function simulates a NormalizePlanarYUV layer.
*/
-class CLNormalizePlanarYUVLayer : public IFunction
+class CLNormalizePlanarYUVLayer : public ICLSimpleFunction
{
public:
- /** Default constructor */
- CLNormalizePlanarYUVLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destinationfeature tensor. Data type supported: same as @p input
* @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input
* @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
@@ -55,7 +53,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayer
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor info. Data type supported: same as @p input
* @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input
* @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels.
@@ -64,12 +62,6 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLNormalizePlanarYUVLayerKernel _norm_kernel; /**< NormalizePlanarYUV layer kernel to run */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 42876cd714..7f8a41238c 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,14 +42,14 @@ public:
*
* @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] alpha PRelu layer parameters. Data types supported: same of @p input.
* @param[out] output Destination tensor. Data type supported: same as @p input
*/
void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLPReluLayer
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] alpha PRelu layer parameters. Data types supported: same of @p input.
* @param[in] output Destination tensor info. Data type supported: same as @p input
*
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index fd64d7b939..72b1587b02 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ class CLPixelWiseMultiplication : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+ * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -52,7 +52,7 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
*
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1.
* @param[in] output The output tensor info, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index 19acb7fb40..c78b558ac8 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,14 +43,14 @@ class CLPoolingLayer : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
- * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/F16/F32.
+ * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*
diff --git a/src/core/CL/cl_kernels/comparisons.cl b/src/core/CL/cl_kernels/comparisons.cl
index 8824b136b2..a41b7e2966 100644
--- a/src/core/CL/cl_kernels/comparisons.cl
+++ b/src/core/CL/cl_kernels/comparisons.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,7 @@
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
* @attention The comparison operation should be given as a preprocessor argument using -DOP=operation. e.g. -DOP=LESS
*
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32
+ * @param[in] in1_ptr Pointer to the source tensor. Supported data types: All non-quantized data types.
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -93,12 +93,13 @@ __kernel void DEFINE_KERNEL(OP_NAME)(
#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2)
/** This function compares two quantized tensors.
*
+ * @note The inputs' data type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar
* @note The quantization offset of the first operand must be passed at compile time using -DOFFSET_IN1, i.e. -DOFFSET_IN1=10
* @note The quantization offset of the second operand must be passed at compile time using -DOFFSET_IN2, i.e. -DOFFSET_IN2=10
* @note The quantization scale of the first operand must be passed at compile time using -DSCALE_IN1, i.e. -DSCALE_IN1=10
* @note The quantization scale of the second operand must be passed at compile time using -DSCALE_IN2, i.e. -DSCALE_IN2=10
*
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] in1_ptr Pointer to the source tensor. Supported data types: All quantized data types.
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -133,8 +134,8 @@ __kernel void DEFINE_KERNEL_QUANTIZED(OP_NAME)(
Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
- int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16);
- int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16);
+ int16 in_a = CONVERT(vload16(0, (__global DATA_TYPE *)in1.ptr), int16);
+ int16 in_b = CONVERT(vload16(0, (__global DATA_TYPE *)in2.ptr), int16);
in_a = in_a - (int16)((int)OFFSET_IN1);
in_b = in_b - (int16)((int)OFFSET_IN2);
diff --git a/src/core/CL/cl_kernels/convert_fc_weights.cl b/src/core/CL/cl_kernels/convert_fc_weights.cl
index d47b733acd..db0873755e 100644
--- a/src/core/CL/cl_kernels/convert_fc_weights.cl
+++ b/src/core/CL/cl_kernels/convert_fc_weights.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,7 +32,7 @@
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
* @attention Original input tensor width*height and depth should be given as a preprocessor argument using -DFACTOR_1=size and -DFACTOR_2=size for NCHW and vice versa for NHWC. e.g. -DFACTOR_1=256 and -DFACTOR_2=128
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All.
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
index ea2455c613..a9a6ac1947 100644
--- a/src/core/CL/cl_kernels/deconvolution_layer.cl
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
/** This function applies upsample on an input image.
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8/F16/F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All.
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/depth_to_space.cl b/src/core/CL/cl_kernels/depth_to_space.cl
index 2ffd0a40e7..5c2e8a1d57 100644
--- a/src/core/CL/cl_kernels/depth_to_space.cl
+++ b/src/core/CL/cl_kernels/depth_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,7 +30,7 @@
* @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
* @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: All.
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -72,7 +72,7 @@ __kernel void depth_to_space_nchw(
* @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
* @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: All.
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl
index 7550b4ba76..add86e3f2e 100644
--- a/src/core/CL/cl_kernels/dequantization_layer.cl
+++ b/src/core/CL/cl_kernels/dequantization_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,7 +33,7 @@
* @note Quantization scale of input tensor is passed in with -DSCALE=scale.
* @note Quantization offset of input tensor is passed in with -DOFFSET=offset.
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QSYMM8
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/gemv.cl b/src/core/CL/cl_kernels/gemv.cl
index 811aa1b865..aabde4119f 100644
--- a/src/core/CL/cl_kernels/gemv.cl
+++ b/src/core/CL/cl_kernels/gemv.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -110,12 +110,12 @@ __kernel void gemm_mv(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(weights), VEC
}
}
}
-#endif /* defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) */
-#if defined(SRC_WIDTH) && defined(SRC_HEIGHT)
/** This kernel applies dot product to each plane on the input tensor and the corresponding column of the reshaped weight tensor.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE=type, e.g. -DDATA_TYPE=uchar
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -123,13 +123,13 @@ __kernel void gemm_mv(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(weights), VEC
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] weights_ptr Pointer to the weights tensor. Same as @p src_ptr
+ * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr
* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
* @param[in] weights_step_y weights_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: S32
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
@@ -158,14 +158,14 @@ __kernel void gemm_mv_quantized(TENSOR3D_DECLARATION(src),
// This kernel handle 4 rows in per thread so that it can reuse the weights
for(int i = 0; i < SRC_WIDTH; i += 4)
{
- int4 w = convert_int4(vload4(0, (__global uchar *)(current_weights + i * weights_stride_x))) + (int4)weights_offset;
+ int4 w = convert_int4(vload4(0, (__global DATA_TYPE *)(current_weights + i * weights_stride_x))) + (int4)weights_offset;
int4 offset = (int4)i * (int4)src_stride_x + (int4)(0, 1, 2, 3) * (int4)src_stride_y;
- int4 tmp0 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s0))) + (int4)input_offset;
- int4 tmp1 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s1))) + (int4)input_offset;
- int4 tmp2 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s2))) + (int4)input_offset;
- int4 tmp3 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s3))) + (int4)input_offset;
+ int4 tmp0 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s0))) + (int4)input_offset;
+ int4 tmp1 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s1))) + (int4)input_offset;
+ int4 tmp2 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s2))) + (int4)input_offset;
+ int4 tmp3 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s3))) + (int4)input_offset;
// Accumulate
acc0 += tmp0.s0 * w.s0 + tmp0.s1 * w.s1 + tmp0.s2 * w.s2 + tmp0.s3 * w.s3;
@@ -197,4 +197,4 @@ __kernel void gemm_mv_quantized(TENSOR3D_DECLARATION(src),
}
}
}
-#endif /* defined(SRC_WIDTH) && defined(SRC_HEIGHT) */
+#endif /* defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) */
diff --git a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl
index 925975d2ba..b2ba65f812 100644
--- a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@
* @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8
* @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8
*
- * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
@@ -102,7 +102,7 @@ __kernel void normalize_planar_yuv_layer_q8_nchw(TENSOR3D_DECLARATION(src),
* @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8
* @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8
*
- * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_int.cl b/src/core/CL/cl_kernels/pixelwise_mul_int.cl
index 989316d661..d277c6c56f 100644
--- a/src/core/CL/cl_kernels/pixelwise_mul_int.cl
+++ b/src/core/CL/cl_kernels/pixelwise_mul_int.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -109,7 +109,7 @@ __kernel void pixelwise_mul_int(
* @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
- * @param[in] in1_ptr Pointer to the source image. Supported data types: QASYMM8/QSYMM16
+ * @param[in] in1_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
* @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
@@ -117,7 +117,7 @@ __kernel void pixelwise_mul_int(
* @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
* @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
+ * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr
* @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
@@ -125,7 +125,7 @@ __kernel void pixelwise_mul_int(
* @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
* @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr
* @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index c8b5e07b47..207669e43e 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -391,28 +391,16 @@ __kernel void pooling_layer_optimized_3(
#if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)
-// Set the initial value for the pooling operation accordingly with the data type
-#if defined(POOL_AVG) || defined(POOL_L2)
-#define INITIAL_VALUE 0
-#else /* defined(POOL_AVG) || defined(POOL_L2) */
-#if FP16
-#define INITIAL_VALUE -HALF_MAX
-#else // FP16
-#define INITIAL_VALUE -FLT_MAX
-#endif // FP16
-
-#endif // POOL_AVG
-
/** Performs a pooling function of pool size equal to N (NCHW)
*
* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
- * @note -DFP16 must be passed at compile time if half float data type is used
* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG must be provided otherwise max pooling will be performed.
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
@@ -519,13 +507,13 @@ ACC_DATA_TYPE calculate_avg_scale_nhwc(const int pool_size_x, const int pool_siz
/** Performs a pooling function of pool size equal to N (NHWC)
*
* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32
- * @note -DFP16 must be passed at compile time if half float data type is used
* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
* @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT
* @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG must be provided otherwise max pooling will be performed.
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pooling_layer_quantized.cl b/src/core/CL/cl_kernels/pooling_layer_quantized.cl
index 2df22d736c..3a370eea93 100644
--- a/src/core/CL/cl_kernels/pooling_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/pooling_layer_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,18 +23,19 @@
*/
#include "helpers.h"
+#if defined(DATA_TYPE) && defined(INITIAL_VALUE)
+#define VEC_TYPE(VEC_SIZE) VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
-#define VEC_FLOAT(VEC_SIZE) \
- VEC_DATA_TYPE(float, VEC_SIZE)
+#define VEC_FLOAT(VEC_SIZE) VEC_DATA_TYPE(float, VEC_SIZE)
#define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR(VEC_SIZE) VEC_DATA_TYPE(uchar, VEC_SIZE)
#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
#define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \
{ \
const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \
const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \
- res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_UCHAR(VEC_SIZE)); \
+ res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_TYPE(VEC_SIZE)); \
}
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
@@ -74,8 +75,10 @@ int calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
+ * @note Input data type must be passed at compile time using -DDAT_TYPE=type, e.g. -DDATA_TYPE=uchar
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -100,8 +103,8 @@ __kernel void pooling_layer_MxN_quantized_nchw(
Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
- int8 vdata = 0;
- int sdata = 0;
+ int8 vdata = INITIAL_VALUE;
+ int sdata = INITIAL_VALUE;
// Load data
for(int y = 0; y < POOL_SIZE_Y; y++)
@@ -109,17 +112,18 @@ __kernel void pooling_layer_MxN_quantized_nchw(
int x = 0;
for(; x <= ((int)POOL_SIZE_X - 8); x += 8)
{
- uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0));
- int8 data0 = convert_int8(data);
- vdata = POOL_OP(vdata, data0);
+ VEC_TYPE(8)
+ data = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
+ int8 data0 = convert_int8(data);
+ vdata = POOL_OP(vdata, data0);
}
// Leftover
for(; x < (int)POOL_SIZE_X; ++x)
{
- uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0));
- int data0 = convert_int(data);
- sdata = POOL_OP(sdata, data0);
+ DATA_TYPE data = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
+ int data0 = convert_int(data);
+ sdata = POOL_OP(sdata, data0);
}
}
@@ -133,22 +137,22 @@ __kernel void pooling_layer_MxN_quantized_nchw(
res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)));
#endif /* defined(POOL_AVG) */
- uchar result_u8 = convert_uchar(res);
+ DATA_TYPE result_q8 = CONVERT(res, DATA_TYPE);
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- const float result_f32 = convert_float(result_u8);
+ const float result_f32 = convert_float(result_q8);
const float input_offset = (float)OFFSET_IN1;
const float input_scale = (float)SCALE_IN1;
const float scale_out = (float)SCALE_OUT;
const float offset_out = (float)OFFSET_OUT;
const float in_f32 = (result_f32 - input_offset) * input_scale;
const float out_f32 = in_f32 / scale_out + offset_out;
- result_u8 = convert_uchar_sat(convert_int_rte(out_f32));
+ result_q8 = CONVERT_SAT(convert_int_rte(out_f32), DATA_TYPE);
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
- *(__global uchar *)output.ptr = result_u8;
+ *(__global DATA_TYPE *)output.ptr = result_q8;
}
int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
@@ -158,7 +162,7 @@ int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int u
#if defined(DST_DEPTH)
int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y;
#else /* defined(DST_DEPTH) */
- int start_y = get_global_id(2) * stride_y - pad_y;
+ int start_y = get_global_id(2) * stride_y - pad_y;
#endif /* defined(DST_DEPTH) */
const int end_x = min(start_x + pool_size_x, upper_bound_w);
@@ -178,8 +182,9 @@ int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int u
* @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG must be provided otherwise max pooling will be performed.
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -209,17 +214,17 @@ __kernel void pooling_layer_MxN_quantized_nhwc(
Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DST_DEPTH);
Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
#else /* defined(DST_DEPTH) */
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+ Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+ Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
#endif /* defined(DST_DEPTH) */
- int8 vdata = 0;
+ int8 vdata = INITIAL_VALUE;
const int idx_width = get_global_id(1) * STRIDE_X;
#if defined(DST_DEPTH)
const int idx_height = (get_global_id(2) % DST_DEPTH) * STRIDE_Y;
#else /* defined(DST_DEPTH) */
- const int idx_height = get_global_id(2) * STRIDE_Y;
+ const int idx_height = get_global_id(2) * STRIDE_Y;
#endif /* defined(DST_DEPTH) */
for(int y = 0; y < POOL_SIZE_Y; ++y)
@@ -231,9 +236,11 @@ __kernel void pooling_layer_MxN_quantized_nhwc(
x1 = select(x1, PAD_X - idx_width - 1, y != y1);
#if defined(DST_DEPTH)
- uchar8 data = vload8(0, (__global uchar *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));
+ VEC_TYPE(8)
+ data = vload8(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));
#else /* defined(DST_DEPTH) */
- uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));
+ VEC_TYPE(8)
+ data = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));
#endif /* defined(DST_DEPTH) */
int8 data0 = convert_int8(data);
@@ -246,11 +253,13 @@ __kernel void pooling_layer_MxN_quantized_nhwc(
vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))));
#endif /* defined(POOL_AVG) */
- uchar8 out_u8 = convert_uchar8(vdata);
+ VEC_TYPE(8)
+ out_q8 = CONVERT(vdata, VEC_TYPE(8));
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- REQUANTIZE(8, out_u8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_u8);
+ REQUANTIZE(8, out_q8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_q8);
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
// Store result
- vstore8(out_u8, 0, (__global uchar *)output.ptr);
+ vstore8(out_q8, 0, (__global DATA_TYPE *)output.ptr);
}
+#endif /* defined(DATA_TYPE) && defined(INITIAL_VALUE) */
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 5062fd1801..270eb78dcb 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,14 +39,14 @@
#include <cmath>
#include <set>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::F16, DataType::F32);
static std::set<ActivationLayerInfo::ActivationFunction> quantized_supported_activations =
{
@@ -63,12 +63,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(data_type) && (quantized_supported_activations.count(f_act) == 0),
"For Quantized data type only tanh, logistic, relu and lower/upper bounded relu are supported");
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8 && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8 && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128)));
+
// Checks performed when output is configured
if((output != nullptr) && (output->total_size() != 0))
{
@@ -135,27 +138,11 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
const DataType dt = input->info()->data_type();
float a_const = act_info.a();
float b_const = act_info.b();
- int a_const_int = 0;
- int b_const_int = 0;
const ActivationLayerInfo::ActivationFunction f_act = act_info.activation();
const bool is_quantized = is_data_type_quantized(dt);
const bool perform_activation_in_float = (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) || (f_act == ActivationLayerInfo::ActivationFunction::TANH);
- // Create quantized version of constants a, b if needed
- if(dt == DataType::QASYMM8)
- {
- const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
- a_const_int = quantize_qasymm8(a_const, iq_info);
- b_const_int = quantize_qasymm8(b_const, iq_info);
- }
- else if(dt == DataType::QSYMM16)
- {
- const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
- a_const_int = quantize_qsymm16(a_const, iq_info);
- b_const_int = quantize_qsymm16(b_const, iq_info);
- }
-
// Set build options
CLBuildOptions build_opts;
build_opts.add_option_if(perform_activation_in_float, "-DFLOAT_DOMAIN");
@@ -164,28 +151,59 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- // Set A, B constants in build options
- if(is_quantized && !perform_activation_in_float)
- {
- build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
- build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
- }
- else
- {
- build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
- build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
- }
+ std::string kernel_name = std::string("activation_layer");
// Set quantization info build options
if(is_quantized)
{
const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ if(!perform_activation_in_float)
+ {
+ int a_const_int = 0;
+ int b_const_int = 0;
+
+ // Create quantized version of constants a, b if needed
+ switch(dt)
+ {
+ case DataType::QASYMM8:
+ {
+ a_const_int = quantize_qasymm8(a_const, iq_info);
+ b_const_int = quantize_qasymm8(b_const, iq_info);
+ }
+ break;
+ case DataType::QASYMM8_SIGNED:
+ {
+ a_const_int = quantize_qasymm8_signed(a_const, iq_info);
+ b_const_int = quantize_qasymm8_signed(b_const, iq_info);
+ }
+ break;
+ case DataType::QSYMM16:
+ {
+ a_const_int = quantize_qsymm16(a_const, iq_info);
+ b_const_int = quantize_qsymm16(b_const, iq_info);
+ }
+ break;
+ default:
+ break;
+ }
+ build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
+ build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
+ }
+ else
+ {
+ build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
+ build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
+ }
+
// Quantized value of 0 corresponds to the offset o1
build_opts.add_option(("-DCONST_0=" + (is_data_type_quantized_asymmetric(dt) ? support::cpp11::to_string(iq_info.offset) : "0")));
build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale)));
build_opts.add_option_if(is_data_type_quantized_asymmetric(dt), "-DO1_VAL=" + support::cpp11::to_string(iq_info.offset));
+ // Set correct kernel name
+ kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant");
+
// Set scale and offset of the input and output if they have different quantization info
if(output != nullptr)
{
@@ -198,14 +216,14 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
}
}
-
- // Create kernel
- std::string kernel_name = std::string("activation_layer");
- if(is_quantized)
+ else
{
- kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant");
+ // Set A, B constants in build options for float types
+ build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
+ build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
}
+ // Create kernel
_kernel = create_opencl_kernel(_ctx, kernel_name, build_opts);
// Make sure _kernel is initialized before calling the parent's configure
_input = input;
@@ -254,3 +272,4 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
}
while(collapsed.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index 5570ecfc79..afee429219 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,12 +52,7 @@ int calculate_num_elems_processed_per_iteration(const ITensorInfo &input)
Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ComparisonOperation operation)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1,
- 1,
- DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
- DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input1.data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
ARM_COMPUTE_RETURN_ERROR_ON(supported_comparison_ops.count(operation) == 0);
@@ -132,7 +127,7 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp
build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info())));
build_opts.emplace("-DOP=" + operation_name);
build_opts.emplace("-DOP_NAME=" + lower_string(operation_name));
- if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
+ if(is_data_type_quantized(input1->info()->data_type()))
{
const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index 81856769b2..7ec6841149 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,7 +64,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL
// Set build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()));
build_opts.add_option("-DFACTOR_1=" + support::cpp11::to_string(factor_1));
build_opts.add_option("-DFACTOR_2=" + support::cpp11::to_string(factor_2));
@@ -79,18 +79,15 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL
Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
- DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
- DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3));
ARM_COMPUTE_RETURN_ERROR_ON(data_layout == DataLayout::UNKNOWN);
// Checks performed when output is configured
- if((output != nullptr) && (output->total_size() != 0))
+ if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index cd9552f149..ee392032ca 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -32,8 +32,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel()
: _input(nullptr), _output(nullptr), _info(), _data_layout(DataLayout::UNKNOWN)
{
@@ -45,7 +45,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -82,7 +82,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())));
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
constexpr unsigned int num_elems_processed_per_iteration = 1;
@@ -156,3 +156,4 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu
ARM_COMPUTE_ERROR("Unsupported data layout");
}
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
index 49a5590231..f23f7ce542 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2);
@@ -81,7 +82,7 @@ void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *out
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()));
build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel)));
build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 60659faaaf..f85cb7636a 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
if(output->tensor_shape().total_size() > 0)
{
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 9e5d677e89..c158937839 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -90,11 +90,11 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const
_output = output;
// Check if is a quantized operation
- bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type());
+ const bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type());
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option_if(!is_quantized, "-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input0->info()->dimension(0)));
build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input0->info()->dimension(1)));
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index b255ba346f..220c2cd576 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,15 +35,15 @@
#include "support/ToolchainSupport.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std);
@@ -97,12 +97,8 @@ CLNormalizePlanarYUVLayerKernel::CLNormalizePlanarYUVLayerKernel()
void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info()->clone());
-
// Perform validation step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), std->info()));
_input = input;
@@ -183,3 +179,4 @@ void CLNormalizePlanarYUVLayerKernel::run(const Window &window, cl::CommandQueue
}
while(collapsed.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 50cdc9c7f4..6bdb1242a6 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,10 +50,18 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
ARM_COMPUTE_UNUSED(overflow_policy);
ARM_COMPUTE_UNUSED(rounding_policy);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input2);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1,
+ 1,
+ DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2,
+ 1,
+ DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative.");
const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
@@ -63,12 +71,17 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
// Validate in case of configured output
if(output->total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output,
+ 1,
+ DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8 && (input1->data_type() != DataType::QASYMM8 || input2->data_type() != DataType::QASYMM8),
"Output can only be QASYMM8 if both inputs are QASYMM8");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8_SIGNED && (input1->data_type() != DataType::QASYMM8_SIGNED || input2->data_type() != DataType::QASYMM8_SIGNED),
+ "Output can only be QASYMM8_SIGNED if both inputs are QASYMM8_SIGNED");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QSYMM16 && (input1->data_type() != DataType::QSYMM16 || input2->data_type() != DataType::QSYMM16),
"Output can only be QSYMM16 if both inputs are QSYMM16");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output");
@@ -99,6 +112,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITe
{
set_data_type_if_unknown(*output, DataType::QASYMM8);
}
+ else if(input1->data_type() == DataType::QASYMM8_SIGNED)
+ {
+ set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED);
+ }
else if(input1->data_type() == DataType::QSYMM16)
{
set_data_type_if_unknown(*output, DataType::QSYMM16);
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index e3f1114f21..2d75e5f969 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,8 @@
#include <string>
#include <tuple>
-using namespace arm_compute;
+namespace arm_compute
+{
using namespace arm_compute::misc::shape_calculator;
namespace
@@ -57,19 +58,8 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, PoolingLayerInfo p
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- DataLayout data_layout = input->data_layout();
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- switch(data_layout)
- {
- case DataLayout::NCHW:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- break;
- case DataLayout::NHWC:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- break;
- default:
- ARM_COMPUTE_ERROR("Data layout not supported");
- }
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type() == PoolingType::L2),
"Unsupported combination of parameters!");
@@ -234,7 +224,25 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x));
build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y));
- build_opts.add_option_if(data_type == DataType::F16, "-DFP16");
+ // Set the initial value for the pooling operation accordingly with the data type
+ if(pool_type == PoolingType::MAX)
+ {
+ if(is_data_type_quantized(data_type))
+ {
+ PixelValue type_min{};
+ std::tie(type_min, std::ignore) = get_min_max(data_type);
+ build_opts.add_option("-DINITIAL_VALUE=" + support::cpp11::to_string(type_min.get<int32_t>()));
+ }
+ else
+ {
+ build_opts.add_option("-DINITIAL_VALUE=" + float_to_string_with_full_precision(std::numeric_limits<float>::lowest()));
+ }
+ }
+ else
+ {
+ // Pool AVG and Pool L2 initial value
+ build_opts.add_option("-DINITIAL_VALUE=0");
+ }
const auto use_fp_mixed_precision = (data_type == DataType::F16) && pool_info.fp_mixed_precision();
const auto use_wider_accumulator = use_fp_mixed_precision && (pool_type != PoolingType::MAX);
@@ -389,3 +397,4 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
ARM_COMPUTE_ERROR("Not implemented");
}
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
index c226e56aff..02927e83ad 100644
--- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
+++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
@@ -37,4 +37,5 @@ Status CLConvertFullyConnectedWeights::validate(const ITensorInfo *input, const
DataLayout data_layout)
{
return CLConvertFullyConnectedWeightsKernel::validate(input, output, original_input_shape, data_layout);
-} \ No newline at end of file
+}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
index 08aef92eae..1581dd9c19 100644
--- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,33 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "support/ToolchainSupport.h"
-using namespace arm_compute;
+#include <utility>
-CLDepthToSpaceLayer::CLDepthToSpaceLayer()
- : _depth_to_space_kernel()
+namespace arm_compute
{
-}
-
void CLDepthToSpaceLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
- _depth_to_space_kernel.configure(input, output, block_shape);
+ auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceLayerKernel>();
+ k->configure(input, output, block_shape);
+ _kernel = std::move(k);
}
Status CLDepthToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
{
return CLDepthToSpaceLayerKernel::validate(input, output, block_shape);
}
-
-void CLDepthToSpaceLayer::run()
-{
- CLScheduler::get().enqueue(_depth_to_space_kernel, true);
-}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
index 11d70e31fb..c5de591f5c 100644
--- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,22 +24,18 @@
#include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
namespace arm_compute
{
-CLNormalizePlanarYUVLayer::CLNormalizePlanarYUVLayer()
- : _norm_kernel()
-{
-}
-
void CLNormalizePlanarYUVLayer::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
{
- _norm_kernel.configure(input, output, mean, std);
+ auto k = arm_compute::support::cpp14::make_unique<CLNormalizePlanarYUVLayerKernel>();
+ k->configure(input, output, mean, std);
+ _kernel = std::move(k);
}
Status CLNormalizePlanarYUVLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
@@ -47,9 +43,4 @@ Status CLNormalizePlanarYUVLayer::validate(const ITensorInfo *input, const ITens
{
return CLNormalizePlanarYUVLayerKernel::validate(input, output, mean, std);
}
-
-void CLNormalizePlanarYUVLayer::run()
-{
- CLScheduler::get().enqueue(_norm_kernel, true);
-}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index 086017a7fd..f3ea926ae7 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,8 +28,8 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "support/ToolchainSupport.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
@@ -40,12 +40,14 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
k->configure(input, output, pool_info);
_kernel = std::move(k);
+ const DataType data_type = input->info()->data_type();
+
// Configure border depending on operation required (quantize border in case of asymmetric data_type)
BorderMode border_mode{};
PixelValue pixel_value(0.f);
- if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding())
+ if(is_data_type_quantized_asymmetric(data_type) && !pool_info.exclude_padding())
{
- pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
+ pixel_value = PixelValue(0, data_type, input->info()->quantization_info());
}
switch(input->info()->data_layout())
{
@@ -54,9 +56,16 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
break;
case DataLayout::NHWC:
border_mode = BorderMode::CONSTANT;
- if(PoolingType::MAX == pool_info.pool_type() && !is_data_type_quantized_asymmetric(input->info()->data_type()))
+ if(PoolingType::MAX == pool_info.pool_type())
{
- pixel_value = PixelValue(std::numeric_limits<float>::lowest());
+ if(is_data_type_quantized(data_type))
+ {
+ std::tie(pixel_value, std::ignore) = get_min_max(data_type);
+ }
+ else
+ {
+ pixel_value = PixelValue(std::numeric_limits<float>::lowest());
+ }
}
break;
default:
@@ -71,4 +80,5 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
{
return CLPoolingLayerKernel::validate(input, output, pool_info);
-} \ No newline at end of file
+}
+} // namespace arm_compute \ No newline at end of file
diff --git a/tests/datasets/DatatypeDataset.h b/tests/datasets/DatatypeDataset.h
index 72952e418e..cc79104ff1 100644
--- a/tests/datasets/DatatypeDataset.h
+++ b/tests/datasets/DatatypeDataset.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,6 +43,7 @@ public:
{
DataType::QSYMM8,
DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED,
DataType::QSYMM16,
})
{
diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp
index a17ad9b269..8b12b0b28b 100644
--- a/tests/validation/CL/ActivationLayer.cpp
+++ b/tests/validation/CL/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -235,15 +235,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture<uint8_t>, fra
// Validate output
validate(CLAccessor(_target), _reference, tolerance(_function, _data_type));
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLActivationLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), QuantizedActivationDataset),
- framework::dataset::make("DataType",
- DataType::QASYMM8)),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), QuantizedActivationDataset),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 10.0f) })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance(_function, _data_type));
}
-TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE(QSYMM16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), QuantizedActivationDataset),
framework::dataset::make("DataType",
diff --git a/tests/validation/CL/NormalizePlanarYUVLayer.cpp b/tests/validation/CL/NormalizePlanarYUVLayer.cpp
index 31e0625eed..54fff01915 100644
--- a/tests/validation/CL/NormalizePlanarYUVLayer.cpp
+++ b/tests/validation/CL/NormalizePlanarYUVLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -123,7 +123,7 @@ FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerFixture<half>, framework
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16, 0);
}
-TEST_SUITE_END()
+TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomNormalizePlanarYUVLayerDataset(),
@@ -133,8 +133,8 @@ FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerFixture<float>, framewor
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
template <typename T>
using CLNormalizePlanarYUVLayerQuantizedFixture = NormalizePlanarYUVLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLNormalizePlanarYUVLayer, T>;
@@ -143,17 +143,27 @@ TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomNormalizePlanarYUVLayerDataset(),
framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8, 0);
}
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(Random, CLNormalizePlanarYUVLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomNormalizePlanarYUVLayerDataset(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8, 0);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // NormalizePlanarYUVLayer
+TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp
index 32fb2a113b..ce678d9aea 100644
--- a/tests/validation/CL/PReluLayer.cpp
+++ b/tests/validation/CL/PReluLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,6 +54,9 @@ const auto PReluLayerU8Dataset = combine(combine(framework::dataset::make("DataT
const auto PReluLayerQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("DataType",
DataType::QASYMM8));
+const auto PReluLayerQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED));
const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
framework::dataset::make("DataType", DataType::S16));
const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -165,7 +168,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerQuantizedFixture<uint8_t>, framewor
)
{
// Validate output
- validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+ PReluLayerQASYMM8SIGNEDDataset),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 127.f, 10) })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 127.f, 5) }))
+
+ )
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
}
TEST_SUITE_END()
TEST_SUITE_END()
@@ -211,8 +228,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerFixture<half>, framework::DatasetMo
TEST_SUITE_END()
TEST_SUITE(FP32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
- shape)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(), shape)
{
// Create tensors
CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::F32);
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
index 22ff9f2fb9..3b55e25f37 100644
--- a/tests/validation/CL/PixelWiseMultiplication.cpp
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -139,8 +139,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<uint8
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPixelWiseMultiplicationQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(),
- framework::dataset::make("DataType", DataType::QASYMM8)),
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("Scale", { 1.f, 2.f })),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)),
@@ -151,7 +154,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPixelWiseMultiplicationQuantizedFixture<uint8
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // QASYMM8_SIGNED
+
TEST_SUITE(QSYMM16)
FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
framework::dataset::make("DataType", DataType::QSYMM16)),
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index ff7c24f024..262cea3338 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -71,9 +71,10 @@ framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(5, 7) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
framework::dataset::make("ExcludePadding", { true }));
-constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit signed asymmetric type */
const auto pool_data_layout_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
const auto pool_fp_mixed_precision_dataset = framework::dataset::make("FpMixedPrecision", { true, false });
@@ -188,14 +189,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framew
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
- framework::dataset::make("DataType", DataType::QASYMM8))),
- pool_data_layout_dataset))
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+ pool_data_layout_dataset))
{
// Validate output
- validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+ validate(CLAccessor(_target), _reference, tolerance_qasymm8_s);
}
-TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // PoolingLayer
TEST_SUITE_END() // CL
diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.cpp b/tests/validation/reference/NormalizePlanarYUVLayer.cpp
index ea0e75a3c7..d2d29cc682 100644
--- a/tests/validation/reference/NormalizePlanarYUVLayer.cpp
+++ b/tests/validation/reference/NormalizePlanarYUVLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,6 +72,17 @@ SimpleTensor<uint8_t> normalize_planar_yuv_layer<uint8_t>(const SimpleTensor<uin
return dst;
}
+template <>
+SimpleTensor<int8_t> normalize_planar_yuv_layer<int8_t>(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &mean, const SimpleTensor<int8_t> &std)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> mean_tmp = convert_from_asymmetric(mean);
+ SimpleTensor<float> std_tmp = convert_from_asymmetric(std);
+ SimpleTensor<float> dst_tmp = normalize_planar_yuv_layer<float>(src_tmp, mean_tmp, std_tmp);
+ SimpleTensor<int8_t> dst = convert_to_asymmetric<int8_t>(dst_tmp, src.quantization_info());
+ return dst;
+}
+
template SimpleTensor<half> normalize_planar_yuv_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &mean, const SimpleTensor<half> &std);
template SimpleTensor<float> normalize_planar_yuv_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &mean, const SimpleTensor<float> &std);
} // namespace reference
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index 010412c92b..40dd6fa505 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -172,6 +172,15 @@ SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, c
}
template <>
+SimpleTensor<int8_t> pooling_layer<int8_t>(const SimpleTensor<int8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = pooling_layer_internal<float>(src_tmp, info, output_qinfo);
+ SimpleTensor<int8_t> dst = convert_to_asymmetric<int8_t>(dst_tmp, output_qinfo);
+ return dst;
+}
+
+template <>
SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
{
if(src.data_type() == DataType::F16 && info.fp_mixed_precision())
diff --git a/tests/validation/reference/UpsampleLayer.cpp b/tests/validation/reference/UpsampleLayer.cpp
index d77f9ae348..a81a601057 100644
--- a/tests/validation/reference/UpsampleLayer.cpp
+++ b/tests/validation/reference/UpsampleLayer.cpp
@@ -23,6 +23,7 @@
*/
#include "UpsampleLayer.h"
+#include "arm_compute/core/utils/misc/Requires.h"
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -33,10 +34,8 @@ namespace validation
{
namespace reference
{
-namespace
-{
template <typename T>
-SimpleTensor<T> upsample_function(const SimpleTensor<T> &src, const Size2D &info, const InterpolationPolicy policy)
+SimpleTensor<T> upsample_layer(const SimpleTensor<T> &src, const Size2D &info, const InterpolationPolicy policy)
{
ARM_COMPUTE_ERROR_ON(policy != InterpolationPolicy::NEAREST_NEIGHBOR);
ARM_COMPUTE_UNUSED(policy);
@@ -76,36 +75,12 @@ SimpleTensor<T> upsample_function(const SimpleTensor<T> &src, const Size2D &info
return out;
}
-} // namespace
-
-template <typename T>
-SimpleTensor<T> upsample_layer(const SimpleTensor<T> &src, const Size2D &info, const InterpolationPolicy policy)
-{
- return upsample_function<T>(src, info, policy);
-}
-
-template <>
-SimpleTensor<uint8_t> upsample_layer(const SimpleTensor<uint8_t> &src, const Size2D &info, const InterpolationPolicy policy)
-{
- SimpleTensor<uint8_t> dst(src.shape(), src.data_type(), 1, src.quantization_info());
-
- if(is_data_type_quantized_asymmetric(src.data_type()))
- {
- SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
- SimpleTensor<float> dst_tmp = upsample_function<float>(src_tmp, info, policy);
- dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info());
- }
- else
- {
- dst = upsample_function<uint8_t>(src, info, policy);
- }
- return dst;
-}
-
template SimpleTensor<float> upsample_layer(const SimpleTensor<float> &src,
const Size2D &info, const InterpolationPolicy policy);
template SimpleTensor<half> upsample_layer(const SimpleTensor<half> &src,
const Size2D &info, const InterpolationPolicy policy);
+template SimpleTensor<uint8_t> upsample_layer(const SimpleTensor<uint8_t> &src,
+ const Size2D &info, const InterpolationPolicy policy);
template SimpleTensor<int8_t> upsample_layer(const SimpleTensor<int8_t> &src,
const Size2D &info, const InterpolationPolicy policy);
} // namespace reference