diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2018-05-08 17:23:52 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:51:17 +0000 |
commit | d24af8a3e8f7cbd38fd3142056241c0c9f63e46a (patch) | |
tree | a6d90561bbc1b976708d120a576bf222059dd36b | |
parent | 932b561159cd6a8c9230bbd0343790c85755846e (diff) | |
download | ComputeLibrary-d24af8a3e8f7cbd38fd3142056241c0c9f63e46a.tar.gz |
COMPMID-1125: Add support for FP16 in CLDepthwiseConvolution
Change-Id: I4838f5a8e4c33ed646cd05e0bb682fca635a29a3
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130469
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
7 files changed, 47 insertions, 28 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h index b8343da50a..3f3e36100a 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h @@ -51,7 +51,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, * while every dimension above 3 represents a batch. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). diff --git a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h index c8d1e2f1ad..1c1eaca474 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h @@ -51,7 +51,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. - * Data type supported: QASYMM8/F32. + * Data type supported: QASYMM8/F16/F32. * @param[out] output The output tensor. Data type supported: same as @p input. * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h index dc57978ae1..18031c7e7e 100644 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,7 +51,7 @@ public: /** Initialise the kernel's input, output and border mode. * - * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F32. + * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F16/F32. * @param[in] border_size Size of the border to fill in elements. * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 21c28539ef..5f4247e5d3 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -500,7 +500,7 @@ __kernel void depthwise_weights_reshape( #if defined(HAS_BIAS) if(get_global_id(1) == 0) { - *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global float *)(biases.ptr + get_global_id(2) * biases_stride_x)); + *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global DATA_TYPE *)(biases.ptr + get_global_id(2) * biases_stride_x)); } #endif // defined(HAS_BIAS) } diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl index fbd4f6ae0f..33a9495d66 100644 --- a/src/core/CL/cl_kernels/fill_border.cl +++ b/src/core/CL/cl_kernels/fill_border.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ * @attention The border size for top, bottom, left, right needs to be passed at the compile time. * e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2 * - * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F32 + * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32 * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) @@ -110,7 +110,7 @@ __kernel void fill_image_borders_replicate( * @attention The border size for top, bottom, left, right needs to be passed at the compile time. * e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2 * - * @param[out] buf_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F32 + * @param[out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32 * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index ea2f93b85d..88bb0c417d 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -80,7 +80,7 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer() void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != weights->info()->dimension(2)); diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index ad7a5d819b..54b7925a09 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -45,6 +45,7 @@ namespace RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.001)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ +constexpr float tolerance_num = 0.05f; /**< Tolerance number */ const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); } // namespace @@ -55,29 +56,11 @@ TEST_SUITE(DepthwiseConvolutionLayer) template <typename T> using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>; -TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() - template <typename T> using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>; TEST_SUITE(Float) -TEST_SUITE(F16) +TEST_SUITE(FP16) TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), @@ -98,6 +81,24 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, fr validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(FP32) @@ -121,6 +122,24 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, f validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE_END() + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE_END() |