From d24af8a3e8f7cbd38fd3142056241c0c9f63e46a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 8 May 2018 17:23:52 +0100 Subject: COMPMID-1125: Add support for FP16 in CLDepthwiseConvolution Change-Id: I4838f5a8e4c33ed646cd05e0bb682fca635a29a3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130469 Tested-by: Jenkins Reviewed-by: Michalis Spyrou --- .../core/CL/kernels/CLDepthwiseIm2ColKernel.h | 2 +- .../CL/kernels/CLDepthwiseWeightsReshapeKernel.h | 2 +- arm_compute/core/CL/kernels/CLFillBorderKernel.h | 4 +- src/core/CL/cl_kernels/depthwise_convolution.cl | 2 +- src/core/CL/cl_kernels/fill_border.cl | 6 +-- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 2 +- tests/validation/CL/DepthwiseConvolutionLayer.cpp | 57 ++++++++++++++-------- 7 files changed, 47 insertions(+), 28 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h index b8343da50a..3f3e36100a 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h @@ -51,7 +51,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, * while every dimension above 3 represents a batch. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). diff --git a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h index c8d1e2f1ad..1c1eaca474 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h @@ -51,7 +51,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. - * Data type supported: QASYMM8/F32. + * Data type supported: QASYMM8/F16/F32. * @param[out] output The output tensor. Data type supported: same as @p input. * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h index dc57978ae1..18031c7e7e 100644 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,7 +51,7 @@ public: /** Initialise the kernel's input, output and border mode. * - * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F32. + * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F16/F32. * @param[in] border_size Size of the border to fill in elements. * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 21c28539ef..5f4247e5d3 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -500,7 +500,7 @@ __kernel void depthwise_weights_reshape( #if defined(HAS_BIAS) if(get_global_id(1) == 0) { - *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global float *)(biases.ptr + get_global_id(2) * biases_stride_x)); + *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global DATA_TYPE *)(biases.ptr + get_global_id(2) * biases_stride_x)); } #endif // defined(HAS_BIAS) } diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl index fbd4f6ae0f..33a9495d66 100644 --- a/src/core/CL/cl_kernels/fill_border.cl +++ b/src/core/CL/cl_kernels/fill_border.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ * @attention The border size for top, bottom, left, right needs to be passed at the compile time. * e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2 * - * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F32 + * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32 * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) @@ -110,7 +110,7 @@ __kernel void fill_image_borders_replicate( * @attention The border size for top, bottom, left, right needs to be passed at the compile time. * e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2 * - * @param[out] buf_ptr Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F32 + * @param[out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32 * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index ea2f93b85d..88bb0c417d 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -80,7 +80,7 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer() void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != weights->info()->dimension(2)); diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index ad7a5d819b..54b7925a09 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -45,6 +45,7 @@ namespace RelativeTolerance tolerance_f16(half_float::half(0.001)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ +constexpr float tolerance_num = 0.05f; /**< Tolerance number */ const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); } // namespace @@ -55,29 +56,11 @@ TEST_SUITE(DepthwiseConvolutionLayer) template using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; -TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() - template using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; TEST_SUITE(Float) -TEST_SUITE(F16) +TEST_SUITE(FP16) TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), @@ -98,6 +81,24 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, fr validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE(FP32) @@ -121,6 +122,24 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, f validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE_END() + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE_END() -- cgit v1.2.1