aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-05-08 17:23:52 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:51:17 +0000
commitd24af8a3e8f7cbd38fd3142056241c0c9f63e46a (patch)
treea6d90561bbc1b976708d120a576bf222059dd36b
parent932b561159cd6a8c9230bbd0343790c85755846e (diff)
downloadComputeLibrary-d24af8a3e8f7cbd38fd3142056241c0c9f63e46a.tar.gz
COMPMID-1125: Add support for FP16 in CLDepthwiseConvolution
Change-Id: I4838f5a8e4c33ed646cd05e0bb682fca635a29a3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130469 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLFillBorderKernel.h4
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution.cl2
-rw-r--r--src/core/CL/cl_kernels/fill_border.cl6
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp2
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayer.cpp57
7 files changed, 47 insertions, 28 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
index b8343da50a..3f3e36100a 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h
@@ -51,7 +51,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
* @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input,
* while every dimension above 3 represents a batch. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h
index c8d1e2f1ad..1c1eaca474 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h
@@ -51,7 +51,7 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM].
- * Data type supported: QASYMM8/F32.
+ * Data type supported: QASYMM8/F16/F32.
* @param[out] output The output tensor. Data type supported: same as @p input.
* @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input.
*/
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
index dc57978ae1..18031c7e7e 100644
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,7 +51,7 @@ public:
/** Initialise the kernel's input, output and border mode.
*
- * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F32.
+ * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F16/F32.
* @param[in] border_size Size of the border to fill in elements.
* @param[in] border_mode Border mode to use for the convolution.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index 21c28539ef..5f4247e5d3 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -500,7 +500,7 @@ __kernel void depthwise_weights_reshape(
#if defined(HAS_BIAS)
if(get_global_id(1) == 0)
{
- *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global float *)(biases.ptr + get_global_id(2) * biases_stride_x));
+ *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global DATA_TYPE *)(biases.ptr + get_global_id(2) * biases_stride_x));
}
#endif // defined(HAS_BIAS)
}
diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl
index fbd4f6ae0f..33a9495d66 100644
--- a/src/core/CL/cl_kernels/fill_border.cl
+++ b/src/core/CL/cl_kernels/fill_border.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,7 +35,7 @@
* @attention The border size for top, bottom, left, right needs to be passed at the compile time.
* e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2
*
- * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F32
+ * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32
* @param[in] buf_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes)
@@ -110,7 +110,7 @@ __kernel void fill_image_borders_replicate(
* @attention The border size for top, bottom, left, right needs to be passed at the compile time.
* e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2
*
- * @param[out] buf_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F32
+ * @param[out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32
* @param[in] buf_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index ea2f93b85d..88bb0c417d 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -80,7 +80,7 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer()
void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != weights->info()->dimension(2));
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index ad7a5d819b..54b7925a09 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -45,6 +45,7 @@ namespace
RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.001)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+constexpr float tolerance_num = 0.05f; /**< Tolerance number */
const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
} // namespace
@@ -55,29 +56,11 @@ TEST_SUITE(DepthwiseConvolutionLayer)
template <typename T>
using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
-TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)))
-{
- validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", DataLayout::NCHW)))
-{
- validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
template <typename T>
using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
TEST_SUITE(Float)
-TEST_SUITE(F16)
+TEST_SUITE(FP16)
TEST_SUITE(W3x3)
FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL,
combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
@@ -98,6 +81,24 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, fr
validate(CLAccessor(_target), _reference, tolerance_f16);
}
TEST_SUITE_END()
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
TEST_SUITE_END()
TEST_SUITE(FP32)
@@ -121,6 +122,24 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, f
validate(CLAccessor(_target), _reference, tolerance_f32);
}
TEST_SUITE_END()
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
+{
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
TEST_SUITE_END()
TEST_SUITE_END()