From 793f87d10ec0b7cc98e84f8567f33151e14ac07e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 18 May 2018 20:08:58 +0100 Subject: COMPMID-1176: Add FP16 support in CLDeconvolutionLayer. Change-Id: Ic82ca002220fa31d8618a55084ff1dfc2585bea7 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131944 Tested-by: Jenkins Reviewed-by: Vidhya Sudhan Loganathan Reviewed-by: Anthony Barbier --- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 8 ++--- .../runtime/CL/functions/CLDeconvolutionLayer.h | 4 +-- src/core/CL/cl_kernels/deconvolution_layer.cl | 8 ++--- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 6 ++-- src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 2 +- tests/validation/CL/DeconvolutionLayer.cpp | 36 +++++++++++++++++++--- .../fixtures/DeconvolutionLayerFixture.h | 18 +++++------ tests/validation/reference/DeconvolutionLayer.cpp | 2 ++ 8 files changed, 57 insertions(+), 27 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h index f31560cb86..d63f5d4907 100644 --- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -50,16 +50,16 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data types supported: F16/F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero. * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] input Source tensor info. Data types supported: F16/F32. + * @param[in] output Destination tensor info. Data types supported: F16/F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero. * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. * diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h index a66fc258bb..f29039a97c 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h @@ -64,7 +64,7 @@ public: CLDeconvolutionLayer(std::shared_ptr memory_manager = nullptr); /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. @@ -77,7 +77,7 @@ public: unsigned int inner_border_right, unsigned int inner_border_top); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl index 2514ddc8cc..794f4aa950 100644 --- a/src/core/CL/cl_kernels/deconvolution_layer.cl +++ b/src/core/CL/cl_kernels/deconvolution_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,13 +25,13 @@ /** This function applies upsample on an input image. * - * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_ptr Pointer to the source image. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] dst_ptr Pointer to the destination image. Supported data types: F32 + * @param[out] dst_ptr Pointer to the destination image. Supported data types: F16/F32 * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) @@ -46,5 +46,5 @@ __kernel void deconvolution_upsample( Image dst = CONVERT_TO_IMAGE_STRUCT(dst); // Store result - *((__global float *)dst.ptr) = *((__global float *)src.ptr); + *((__global DATA_TYPE *)dst.ptr) = *((__global DATA_TYPE *)src.ptr); } diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 650c5b89dc..e7cdf8c607 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -43,7 +43,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) == 0); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) == 0); @@ -74,7 +74,9 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), inner_border, info)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_upsample")); + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options())); constexpr unsigned int num_elems_processed_per_iteration = 1; diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index cb8dc02386..4d971f3c72 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -46,7 +46,7 @@ Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf unsigned int inner_border_right, unsigned int inner_border_top) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1); ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index 58a20268ef..758cf3698a 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -43,7 +43,9 @@ namespace validation { namespace { -constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +RelativeTolerance tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's for DataType::F16 */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 }); @@ -169,10 +171,9 @@ template using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; TEST_SUITE(Float) - TEST_SUITE(FP32) -TEST_SUITE(W4x4) +TEST_SUITE(W4x4) FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F32))) { // Validate output @@ -181,7 +182,6 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4, framework::Da TEST_SUITE_END() TEST_SUITE(W3x3) - FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32))) { // Validate output @@ -197,6 +197,34 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1, framework::Da } TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE(FP16) + +TEST_SUITE(W4x4) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(W1x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() + TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index f2455f31ac..137068a4b9 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,16 +58,14 @@ protected: template void fill(U &&tensor, int i) { - switch(tensor.data_type()) + if(is_data_type_float(tensor.data_type())) { - case DataType::F32: - { - std::uniform_real_distribution<> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, i); - break; - } - default: - library->fill_tensor_uniform(tensor, i); + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + } + else + { + library->fill_tensor_uniform(tensor, i); } } diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp index 617f6908e4..35437084b8 100644 --- a/tests/validation/reference/DeconvolutionLayer.cpp +++ b/tests/validation/reference/DeconvolutionLayer.cpp @@ -91,6 +91,8 @@ SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTens template SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, const std::pair &a); +template SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, + const PadStrideInfo &info, const std::pair &a); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1