aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-12-20 13:26:08 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-01-14 17:21:26 +0000
commitcbbed288a71f2f048123db3cf396361e5d66ce93 (patch)
treebd48122e283272adb668a42097ea69ca3f0473a6 /src
parent70d33bdfd36e1b44b0573189dca67ed7c63dd59e (diff)
downloadComputeLibrary-cbbed288a71f2f048123db3cf396361e5d66ce93.tar.gz
COMPMID-2991: Add support for QASYMM8_SIGNED in CL kernels/functions - part 2
Adding support for QASYMM8_SIGNED to the following CL kernels/functions: - CLActivationLayerKernel/CLActivationLayer - CLComparisonKernel/CLComparison - CLConvertFullyConnectedWeightsKernel/CLConvertFullyConnectedWeights - CLDeconvolutionLayerUpsampleKernel/CLDeconvolutionLayerUpsample - CLDepthToSpaceLayerKernel/CLDepthToSpaceLayer - CLDequantizationLayerKernel/CLDequantizationLayer - CLGEMMMatrixVectorMultiplyKernel - CLNormalizePlanarYUVLayerKernel - CLPReluLayer - CLPixelWiseMultiplicationKernel/CLPixelWiseMultiplication - CLPoolingLayerKernel/CLPoolingLayer Change-Id: I874bbb7c2b08baa9c5ff4c9e6bc8778b42a6bec5 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2539 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/comparisons.cl11
-rw-r--r--src/core/CL/cl_kernels/convert_fc_weights.cl4
-rw-r--r--src/core/CL/cl_kernels/deconvolution_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/depth_to_space.cl6
-rw-r--r--src/core/CL/cl_kernels/dequantization_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/gemv.cl24
-rw-r--r--src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl6
-rw-r--r--src/core/CL/cl_kernels/pixelwise_mul_int.cl8
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl18
-rw-r--r--src/core/CL/cl_kernels/pooling_layer_quantized.cl67
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp95
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.cpp11
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp13
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp17
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp29
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp39
-rw-r--r--src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp9
-rw-r--r--src/runtime/CL/functions/CLDepthToSpaceLayer.cpp27
-rw-r--r--src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp25
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp26
24 files changed, 247 insertions, 220 deletions
diff --git a/src/core/CL/cl_kernels/comparisons.cl b/src/core/CL/cl_kernels/comparisons.cl
index 8824b136b2..a41b7e2966 100644
--- a/src/core/CL/cl_kernels/comparisons.cl
+++ b/src/core/CL/cl_kernels/comparisons.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,7 @@
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
* @attention The comparison operation should be given as a preprocessor argument using -DOP=operation. e.g. -DOP=LESS
*
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32
+ * @param[in] in1_ptr Pointer to the source tensor. Supported data types: All non-quantized data types.
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -93,12 +93,13 @@ __kernel void DEFINE_KERNEL(OP_NAME)(
#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2)
/** This function compares two quantized tensors.
*
+ * @note The inputs' data type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar
* @note The quantization offset of the first operand must be passed at compile time using -DOFFSET_IN1, i.e. -DOFFSET_IN1=10
* @note The quantization offset of the second operand must be passed at compile time using -DOFFSET_IN2, i.e. -DOFFSET_IN2=10
* @note The quantization scale of the first operand must be passed at compile time using -DSCALE_IN1, i.e. -DSCALE_IN1=10
* @note The quantization scale of the second operand must be passed at compile time using -DSCALE_IN2, i.e. -DSCALE_IN2=10
*
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] in1_ptr Pointer to the source tensor. Supported data types: All quantized data types.
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -133,8 +134,8 @@ __kernel void DEFINE_KERNEL_QUANTIZED(OP_NAME)(
Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
- int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16);
- int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16);
+ int16 in_a = CONVERT(vload16(0, (__global DATA_TYPE *)in1.ptr), int16);
+ int16 in_b = CONVERT(vload16(0, (__global DATA_TYPE *)in2.ptr), int16);
in_a = in_a - (int16)((int)OFFSET_IN1);
in_b = in_b - (int16)((int)OFFSET_IN2);
diff --git a/src/core/CL/cl_kernels/convert_fc_weights.cl b/src/core/CL/cl_kernels/convert_fc_weights.cl
index d47b733acd..db0873755e 100644
--- a/src/core/CL/cl_kernels/convert_fc_weights.cl
+++ b/src/core/CL/cl_kernels/convert_fc_weights.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,7 +32,7 @@
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
* @attention Original input tensor width*height and depth should be given as a preprocessor argument using -DFACTOR_1=size and -DFACTOR_2=size for NCHW and vice versa for NHWC. e.g. -DFACTOR_1=256 and -DFACTOR_2=128
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All.
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
index ea2455c613..a9a6ac1947 100644
--- a/src/core/CL/cl_kernels/deconvolution_layer.cl
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
/** This function applies upsample on an input image.
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8/F16/F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All.
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/depth_to_space.cl b/src/core/CL/cl_kernels/depth_to_space.cl
index 2ffd0a40e7..5c2e8a1d57 100644
--- a/src/core/CL/cl_kernels/depth_to_space.cl
+++ b/src/core/CL/cl_kernels/depth_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,7 +30,7 @@
* @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
* @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: All.
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -72,7 +72,7 @@ __kernel void depth_to_space_nchw(
* @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
* @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: All.
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl
index 7550b4ba76..add86e3f2e 100644
--- a/src/core/CL/cl_kernels/dequantization_layer.cl
+++ b/src/core/CL/cl_kernels/dequantization_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,7 +33,7 @@
* @note Quantization scale of input tensor is passed in with -DSCALE=scale.
* @note Quantization offset of input tensor is passed in with -DOFFSET=offset.
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QSYMM8
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/gemv.cl b/src/core/CL/cl_kernels/gemv.cl
index 811aa1b865..aabde4119f 100644
--- a/src/core/CL/cl_kernels/gemv.cl
+++ b/src/core/CL/cl_kernels/gemv.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -110,12 +110,12 @@ __kernel void gemm_mv(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(weights), VEC
}
}
}
-#endif /* defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) */
-#if defined(SRC_WIDTH) && defined(SRC_HEIGHT)
/** This kernel applies dot product to each plane on the input tensor and the corresponding column of the reshaped weight tensor.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE=type, e.g. -DDATA_TYPE=uchar
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -123,13 +123,13 @@ __kernel void gemm_mv(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(weights), VEC
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] weights_ptr Pointer to the weights tensor. Same as @p src_ptr
+ * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr
* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
* @param[in] weights_step_y weights_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: S32
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
@@ -158,14 +158,14 @@ __kernel void gemm_mv_quantized(TENSOR3D_DECLARATION(src),
// This kernel handle 4 rows in per thread so that it can reuse the weights
for(int i = 0; i < SRC_WIDTH; i += 4)
{
- int4 w = convert_int4(vload4(0, (__global uchar *)(current_weights + i * weights_stride_x))) + (int4)weights_offset;
+ int4 w = convert_int4(vload4(0, (__global DATA_TYPE *)(current_weights + i * weights_stride_x))) + (int4)weights_offset;
int4 offset = (int4)i * (int4)src_stride_x + (int4)(0, 1, 2, 3) * (int4)src_stride_y;
- int4 tmp0 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s0))) + (int4)input_offset;
- int4 tmp1 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s1))) + (int4)input_offset;
- int4 tmp2 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s2))) + (int4)input_offset;
- int4 tmp3 = convert_int4(vload4(0, (__global uchar *)(input_ptr + offset.s3))) + (int4)input_offset;
+ int4 tmp0 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s0))) + (int4)input_offset;
+ int4 tmp1 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s1))) + (int4)input_offset;
+ int4 tmp2 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s2))) + (int4)input_offset;
+ int4 tmp3 = convert_int4(vload4(0, (__global DATA_TYPE *)(input_ptr + offset.s3))) + (int4)input_offset;
// Accumulate
acc0 += tmp0.s0 * w.s0 + tmp0.s1 * w.s1 + tmp0.s2 * w.s2 + tmp0.s3 * w.s3;
@@ -197,4 +197,4 @@ __kernel void gemm_mv_quantized(TENSOR3D_DECLARATION(src),
}
}
}
-#endif /* defined(SRC_WIDTH) && defined(SRC_HEIGHT) */
+#endif /* defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) */
diff --git a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl
index 925975d2ba..b2ba65f812 100644
--- a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@
* @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8
* @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8
*
- * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
@@ -102,7 +102,7 @@ __kernel void normalize_planar_yuv_layer_q8_nchw(TENSOR3D_DECLARATION(src),
* @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8
* @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8
*
- * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_int.cl b/src/core/CL/cl_kernels/pixelwise_mul_int.cl
index 989316d661..d277c6c56f 100644
--- a/src/core/CL/cl_kernels/pixelwise_mul_int.cl
+++ b/src/core/CL/cl_kernels/pixelwise_mul_int.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -109,7 +109,7 @@ __kernel void pixelwise_mul_int(
* @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
- * @param[in] in1_ptr Pointer to the source image. Supported data types: QASYMM8/QSYMM16
+ * @param[in] in1_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
* @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
@@ -117,7 +117,7 @@ __kernel void pixelwise_mul_int(
* @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
* @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
+ * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr
* @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
@@ -125,7 +125,7 @@ __kernel void pixelwise_mul_int(
* @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
* @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr
* @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index c8b5e07b47..207669e43e 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -391,28 +391,16 @@ __kernel void pooling_layer_optimized_3(
#if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)
-// Set the initial value for the pooling operation accordingly with the data type
-#if defined(POOL_AVG) || defined(POOL_L2)
-#define INITIAL_VALUE 0
-#else /* defined(POOL_AVG) || defined(POOL_L2) */
-#if FP16
-#define INITIAL_VALUE -HALF_MAX
-#else // FP16
-#define INITIAL_VALUE -FLT_MAX
-#endif // FP16
-
-#endif // POOL_AVG
-
/** Performs a pooling function of pool size equal to N (NCHW)
*
* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
- * @note -DFP16 must be passed at compile time if half float data type is used
* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG must be provided otherwise max pooling will be performed.
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
@@ -519,13 +507,13 @@ ACC_DATA_TYPE calculate_avg_scale_nhwc(const int pool_size_x, const int pool_siz
/** Performs a pooling function of pool size equal to N (NHWC)
*
* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32
- * @note -DFP16 must be passed at compile time if half float data type is used
* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
* @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT
* @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG must be provided otherwise max pooling will be performed.
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pooling_layer_quantized.cl b/src/core/CL/cl_kernels/pooling_layer_quantized.cl
index 2df22d736c..3a370eea93 100644
--- a/src/core/CL/cl_kernels/pooling_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/pooling_layer_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,18 +23,19 @@
*/
#include "helpers.h"
+#if defined(DATA_TYPE) && defined(INITIAL_VALUE)
+#define VEC_TYPE(VEC_SIZE) VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
-#define VEC_FLOAT(VEC_SIZE) \
- VEC_DATA_TYPE(float, VEC_SIZE)
+#define VEC_FLOAT(VEC_SIZE) VEC_DATA_TYPE(float, VEC_SIZE)
#define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR(VEC_SIZE) VEC_DATA_TYPE(uchar, VEC_SIZE)
#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
#define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \
{ \
const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \
const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \
- res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_UCHAR(VEC_SIZE)); \
+ res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_TYPE(VEC_SIZE)); \
}
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
@@ -74,8 +75,10 @@ int calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int
* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
+ * @note Input data type must be passed at compile time using -DDAT_TYPE=type, e.g. -DDATA_TYPE=uchar
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -100,8 +103,8 @@ __kernel void pooling_layer_MxN_quantized_nchw(
Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
- int8 vdata = 0;
- int sdata = 0;
+ int8 vdata = INITIAL_VALUE;
+ int sdata = INITIAL_VALUE;
// Load data
for(int y = 0; y < POOL_SIZE_Y; y++)
@@ -109,17 +112,18 @@ __kernel void pooling_layer_MxN_quantized_nchw(
int x = 0;
for(; x <= ((int)POOL_SIZE_X - 8); x += 8)
{
- uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0));
- int8 data0 = convert_int8(data);
- vdata = POOL_OP(vdata, data0);
+ VEC_TYPE(8)
+ data = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
+ int8 data0 = convert_int8(data);
+ vdata = POOL_OP(vdata, data0);
}
// Leftover
for(; x < (int)POOL_SIZE_X; ++x)
{
- uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0));
- int data0 = convert_int(data);
- sdata = POOL_OP(sdata, data0);
+ DATA_TYPE data = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
+ int data0 = convert_int(data);
+ sdata = POOL_OP(sdata, data0);
}
}
@@ -133,22 +137,22 @@ __kernel void pooling_layer_MxN_quantized_nchw(
res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)));
#endif /* defined(POOL_AVG) */
- uchar result_u8 = convert_uchar(res);
+ DATA_TYPE result_q8 = CONVERT(res, DATA_TYPE);
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- const float result_f32 = convert_float(result_u8);
+ const float result_f32 = convert_float(result_q8);
const float input_offset = (float)OFFSET_IN1;
const float input_scale = (float)SCALE_IN1;
const float scale_out = (float)SCALE_OUT;
const float offset_out = (float)OFFSET_OUT;
const float in_f32 = (result_f32 - input_offset) * input_scale;
const float out_f32 = in_f32 / scale_out + offset_out;
- result_u8 = convert_uchar_sat(convert_int_rte(out_f32));
+ result_q8 = CONVERT_SAT(convert_int_rte(out_f32), DATA_TYPE);
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
- *(__global uchar *)output.ptr = result_u8;
+ *(__global DATA_TYPE *)output.ptr = result_q8;
}
int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
@@ -158,7 +162,7 @@ int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int u
#if defined(DST_DEPTH)
int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y;
#else /* defined(DST_DEPTH) */
- int start_y = get_global_id(2) * stride_y - pad_y;
+ int start_y = get_global_id(2) * stride_y - pad_y;
#endif /* defined(DST_DEPTH) */
const int end_x = min(start_x + pool_size_x, upper_bound_w);
@@ -178,8 +182,9 @@ int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int u
* @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
* @note In case of average pooling the following information must be passed at compile time:
* -DPOOL_AVG must be provided otherwise max pooling will be performed.
+ * @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -209,17 +214,17 @@ __kernel void pooling_layer_MxN_quantized_nhwc(
Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DST_DEPTH);
Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
#else /* defined(DST_DEPTH) */
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+ Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+ Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
#endif /* defined(DST_DEPTH) */
- int8 vdata = 0;
+ int8 vdata = INITIAL_VALUE;
const int idx_width = get_global_id(1) * STRIDE_X;
#if defined(DST_DEPTH)
const int idx_height = (get_global_id(2) % DST_DEPTH) * STRIDE_Y;
#else /* defined(DST_DEPTH) */
- const int idx_height = get_global_id(2) * STRIDE_Y;
+ const int idx_height = get_global_id(2) * STRIDE_Y;
#endif /* defined(DST_DEPTH) */
for(int y = 0; y < POOL_SIZE_Y; ++y)
@@ -231,9 +236,11 @@ __kernel void pooling_layer_MxN_quantized_nhwc(
x1 = select(x1, PAD_X - idx_width - 1, y != y1);
#if defined(DST_DEPTH)
- uchar8 data = vload8(0, (__global uchar *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));
+ VEC_TYPE(8)
+ data = vload8(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));
#else /* defined(DST_DEPTH) */
- uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));
+ VEC_TYPE(8)
+ data = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));
#endif /* defined(DST_DEPTH) */
int8 data0 = convert_int8(data);
@@ -246,11 +253,13 @@ __kernel void pooling_layer_MxN_quantized_nhwc(
vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))));
#endif /* defined(POOL_AVG) */
- uchar8 out_u8 = convert_uchar8(vdata);
+ VEC_TYPE(8)
+ out_q8 = CONVERT(vdata, VEC_TYPE(8));
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- REQUANTIZE(8, out_u8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_u8);
+ REQUANTIZE(8, out_q8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_q8);
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
// Store result
- vstore8(out_u8, 0, (__global uchar *)output.ptr);
+ vstore8(out_q8, 0, (__global DATA_TYPE *)output.ptr);
}
+#endif /* defined(DATA_TYPE) && defined(INITIAL_VALUE) */
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 5062fd1801..270eb78dcb 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,14 +39,14 @@
#include <cmath>
#include <set>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::F16, DataType::F32);
static std::set<ActivationLayerInfo::ActivationFunction> quantized_supported_activations =
{
@@ -63,12 +63,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(data_type) && (quantized_supported_activations.count(f_act) == 0),
"For Quantized data type only tanh, logistic, relu and lower/upper bounded relu are supported");
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8 && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8 && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128)));
+
// Checks performed when output is configured
if((output != nullptr) && (output->total_size() != 0))
{
@@ -135,27 +138,11 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
const DataType dt = input->info()->data_type();
float a_const = act_info.a();
float b_const = act_info.b();
- int a_const_int = 0;
- int b_const_int = 0;
const ActivationLayerInfo::ActivationFunction f_act = act_info.activation();
const bool is_quantized = is_data_type_quantized(dt);
const bool perform_activation_in_float = (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) || (f_act == ActivationLayerInfo::ActivationFunction::TANH);
- // Create quantized version of constants a, b if needed
- if(dt == DataType::QASYMM8)
- {
- const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
- a_const_int = quantize_qasymm8(a_const, iq_info);
- b_const_int = quantize_qasymm8(b_const, iq_info);
- }
- else if(dt == DataType::QSYMM16)
- {
- const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
- a_const_int = quantize_qsymm16(a_const, iq_info);
- b_const_int = quantize_qsymm16(b_const, iq_info);
- }
-
// Set build options
CLBuildOptions build_opts;
build_opts.add_option_if(perform_activation_in_float, "-DFLOAT_DOMAIN");
@@ -164,28 +151,59 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- // Set A, B constants in build options
- if(is_quantized && !perform_activation_in_float)
- {
- build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
- build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
- }
- else
- {
- build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
- build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
- }
+ std::string kernel_name = std::string("activation_layer");
// Set quantization info build options
if(is_quantized)
{
const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ if(!perform_activation_in_float)
+ {
+ int a_const_int = 0;
+ int b_const_int = 0;
+
+ // Create quantized version of constants a, b if needed
+ switch(dt)
+ {
+ case DataType::QASYMM8:
+ {
+ a_const_int = quantize_qasymm8(a_const, iq_info);
+ b_const_int = quantize_qasymm8(b_const, iq_info);
+ }
+ break;
+ case DataType::QASYMM8_SIGNED:
+ {
+ a_const_int = quantize_qasymm8_signed(a_const, iq_info);
+ b_const_int = quantize_qasymm8_signed(b_const, iq_info);
+ }
+ break;
+ case DataType::QSYMM16:
+ {
+ a_const_int = quantize_qsymm16(a_const, iq_info);
+ b_const_int = quantize_qsymm16(b_const, iq_info);
+ }
+ break;
+ default:
+ break;
+ }
+ build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
+ build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
+ }
+ else
+ {
+ build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
+ build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
+ }
+
// Quantized value of 0 corresponds to the offset o1
build_opts.add_option(("-DCONST_0=" + (is_data_type_quantized_asymmetric(dt) ? support::cpp11::to_string(iq_info.offset) : "0")));
build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale)));
build_opts.add_option_if(is_data_type_quantized_asymmetric(dt), "-DO1_VAL=" + support::cpp11::to_string(iq_info.offset));
+ // Set correct kernel name
+ kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant");
+
// Set scale and offset of the input and output if they have different quantization info
if(output != nullptr)
{
@@ -198,14 +216,14 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
}
}
-
- // Create kernel
- std::string kernel_name = std::string("activation_layer");
- if(is_quantized)
+ else
{
- kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant");
+ // Set A, B constants in build options for float types
+ build_opts.add_option(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
+ build_opts.add_option(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
}
+ // Create kernel
_kernel = create_opencl_kernel(_ctx, kernel_name, build_opts);
// Make sure _kernel is initialized before calling the parent's configure
_input = input;
@@ -254,3 +272,4 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
}
while(collapsed.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index 5570ecfc79..afee429219 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,12 +52,7 @@ int calculate_num_elems_processed_per_iteration(const ITensorInfo &input)
Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ComparisonOperation operation)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1,
- 1,
- DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
- DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input1.data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
ARM_COMPUTE_RETURN_ERROR_ON(supported_comparison_ops.count(operation) == 0);
@@ -132,7 +127,7 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp
build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info())));
build_opts.emplace("-DOP=" + operation_name);
build_opts.emplace("-DOP_NAME=" + lower_string(operation_name));
- if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
+ if(is_data_type_quantized(input1->info()->data_type()))
{
const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index 81856769b2..7ec6841149 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,7 +64,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL
// Set build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()));
build_opts.add_option("-DFACTOR_1=" + support::cpp11::to_string(factor_1));
build_opts.add_option("-DFACTOR_2=" + support::cpp11::to_string(factor_2));
@@ -79,18 +79,15 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL
Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
- DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
- DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3));
ARM_COMPUTE_RETURN_ERROR_ON(data_layout == DataLayout::UNKNOWN);
// Checks performed when output is configured
- if((output != nullptr) && (output->total_size() != 0))
+ if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index cd9552f149..ee392032ca 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -32,8 +32,8 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel()
: _input(nullptr), _output(nullptr), _info(), _data_layout(DataLayout::UNKNOWN)
{
@@ -45,7 +45,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -82,7 +82,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())));
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
constexpr unsigned int num_elems_processed_per_iteration = 1;
@@ -156,3 +156,4 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu
ARM_COMPUTE_ERROR("Unsupported data layout");
}
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
index 49a5590231..f23f7ce542 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2);
@@ -81,7 +82,7 @@ void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *out
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()));
build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel)));
build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 60659faaaf..f85cb7636a 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
if(output->tensor_shape().total_size() > 0)
{
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 9e5d677e89..c158937839 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -90,11 +90,11 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const
_output = output;
// Check if is a quantized operation
- bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type());
+ const bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type());
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option_if(!is_quantized, "-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input0->info()->dimension(0)));
build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input0->info()->dimension(1)));
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index b255ba346f..220c2cd576 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,15 +35,15 @@
#include "support/ToolchainSupport.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std);
@@ -97,12 +97,8 @@ CLNormalizePlanarYUVLayerKernel::CLNormalizePlanarYUVLayerKernel()
void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info()->clone());
-
// Perform validation step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), std->info()));
_input = input;
@@ -183,3 +179,4 @@ void CLNormalizePlanarYUVLayerKernel::run(const Window &window, cl::CommandQueue
}
while(collapsed.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 50cdc9c7f4..6bdb1242a6 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,10 +50,18 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
ARM_COMPUTE_UNUSED(overflow_policy);
ARM_COMPUTE_UNUSED(rounding_policy);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input2);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1,
+ 1,
+ DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2,
+ 1,
+ DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative.");
const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
@@ -63,12 +71,17 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
// Validate in case of configured output
if(output->total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output,
+ 1,
+ DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S16, DataType::QSYMM16, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
"Output can only be U8 if both inputs are U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8 && (input1->data_type() != DataType::QASYMM8 || input2->data_type() != DataType::QASYMM8),
"Output can only be QASYMM8 if both inputs are QASYMM8");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QASYMM8_SIGNED && (input1->data_type() != DataType::QASYMM8_SIGNED || input2->data_type() != DataType::QASYMM8_SIGNED),
+ "Output can only be QASYMM8_SIGNED if both inputs are QASYMM8_SIGNED");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::QSYMM16 && (input1->data_type() != DataType::QSYMM16 || input2->data_type() != DataType::QSYMM16),
"Output can only be QSYMM16 if both inputs are QSYMM16");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output");
@@ -99,6 +112,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITe
{
set_data_type_if_unknown(*output, DataType::QASYMM8);
}
+ else if(input1->data_type() == DataType::QASYMM8_SIGNED)
+ {
+ set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED);
+ }
else if(input1->data_type() == DataType::QSYMM16)
{
set_data_type_if_unknown(*output, DataType::QSYMM16);
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index e3f1114f21..2d75e5f969 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,8 @@
#include <string>
#include <tuple>
-using namespace arm_compute;
+namespace arm_compute
+{
using namespace arm_compute::misc::shape_calculator;
namespace
@@ -57,19 +58,8 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, PoolingLayerInfo p
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- DataLayout data_layout = input->data_layout();
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- switch(data_layout)
- {
- case DataLayout::NCHW:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- break;
- case DataLayout::NHWC:
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- break;
- default:
- ARM_COMPUTE_ERROR("Data layout not supported");
- }
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type() == PoolingType::L2),
"Unsupported combination of parameters!");
@@ -234,7 +224,25 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x));
build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y));
- build_opts.add_option_if(data_type == DataType::F16, "-DFP16");
+ // Set the initial value for the pooling operation accordingly with the data type
+ if(pool_type == PoolingType::MAX)
+ {
+ if(is_data_type_quantized(data_type))
+ {
+ PixelValue type_min{};
+ std::tie(type_min, std::ignore) = get_min_max(data_type);
+ build_opts.add_option("-DINITIAL_VALUE=" + support::cpp11::to_string(type_min.get<int32_t>()));
+ }
+ else
+ {
+ build_opts.add_option("-DINITIAL_VALUE=" + float_to_string_with_full_precision(std::numeric_limits<float>::lowest()));
+ }
+ }
+ else
+ {
+ // Pool AVG and Pool L2 initial value
+ build_opts.add_option("-DINITIAL_VALUE=0");
+ }
const auto use_fp_mixed_precision = (data_type == DataType::F16) && pool_info.fp_mixed_precision();
const auto use_wider_accumulator = use_fp_mixed_precision && (pool_type != PoolingType::MAX);
@@ -389,3 +397,4 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
ARM_COMPUTE_ERROR("Not implemented");
}
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
index c226e56aff..02927e83ad 100644
--- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
+++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,8 @@
*/
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
@@ -37,4 +37,5 @@ Status CLConvertFullyConnectedWeights::validate(const ITensorInfo *input, const
DataLayout data_layout)
{
return CLConvertFullyConnectedWeightsKernel::validate(input, output, original_input_shape, data_layout);
-} \ No newline at end of file
+}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
index 08aef92eae..1581dd9c19 100644
--- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,33 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "support/ToolchainSupport.h"
-using namespace arm_compute;
+#include <utility>
-CLDepthToSpaceLayer::CLDepthToSpaceLayer()
- : _depth_to_space_kernel()
+namespace arm_compute
{
-}
-
void CLDepthToSpaceLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
- _depth_to_space_kernel.configure(input, output, block_shape);
+ auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceLayerKernel>();
+ k->configure(input, output, block_shape);
+ _kernel = std::move(k);
}
Status CLDepthToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
{
return CLDepthToSpaceLayerKernel::validate(input, output, block_shape);
}
-
-void CLDepthToSpaceLayer::run()
-{
- CLScheduler::get().enqueue(_depth_to_space_kernel, true);
-}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
index 11d70e31fb..c5de591f5c 100644
--- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,22 +24,18 @@
#include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
namespace arm_compute
{
-CLNormalizePlanarYUVLayer::CLNormalizePlanarYUVLayer()
- : _norm_kernel()
-{
-}
-
void CLNormalizePlanarYUVLayer::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
{
- _norm_kernel.configure(input, output, mean, std);
+ auto k = arm_compute::support::cpp14::make_unique<CLNormalizePlanarYUVLayerKernel>();
+ k->configure(input, output, mean, std);
+ _kernel = std::move(k);
}
Status CLNormalizePlanarYUVLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
@@ -47,9 +43,4 @@ Status CLNormalizePlanarYUVLayer::validate(const ITensorInfo *input, const ITens
{
return CLNormalizePlanarYUVLayerKernel::validate(input, output, mean, std);
}
-
-void CLNormalizePlanarYUVLayer::run()
-{
- CLScheduler::get().enqueue(_norm_kernel, true);
-}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index 086017a7fd..f3ea926ae7 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,8 +28,8 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "support/ToolchainSupport.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
@@ -40,12 +40,14 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
k->configure(input, output, pool_info);
_kernel = std::move(k);
+ const DataType data_type = input->info()->data_type();
+
// Configure border depending on operation required (quantize border in case of asymmetric data_type)
BorderMode border_mode{};
PixelValue pixel_value(0.f);
- if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding())
+ if(is_data_type_quantized_asymmetric(data_type) && !pool_info.exclude_padding())
{
- pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
+ pixel_value = PixelValue(0, data_type, input->info()->quantization_info());
}
switch(input->info()->data_layout())
{
@@ -54,9 +56,16 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
break;
case DataLayout::NHWC:
border_mode = BorderMode::CONSTANT;
- if(PoolingType::MAX == pool_info.pool_type() && !is_data_type_quantized_asymmetric(input->info()->data_type()))
+ if(PoolingType::MAX == pool_info.pool_type())
{
- pixel_value = PixelValue(std::numeric_limits<float>::lowest());
+ if(is_data_type_quantized(data_type))
+ {
+ std::tie(pixel_value, std::ignore) = get_min_max(data_type);
+ }
+ else
+ {
+ pixel_value = PixelValue(std::numeric_limits<float>::lowest());
+ }
}
break;
default:
@@ -71,4 +80,5 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
{
return CLPoolingLayerKernel::validate(input, output, pool_info);
-} \ No newline at end of file
+}
+} // namespace arm_compute \ No newline at end of file