aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-05-18 20:08:58 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:54 +0000
commit793f87d10ec0b7cc98e84f8567f33151e14ac07e (patch)
tree904154195052d9f5e77f02369c6c6fdfc310d8ea
parent86f709686161b0ebe41cdbfb0a446e659503dcce (diff)
downloadComputeLibrary-793f87d10ec0b7cc98e84f8567f33151e14ac07e.tar.gz
COMPMID-1176: Add FP16 support in CLDeconvolutionLayer.
Change-Id: Ic82ca002220fa31d8618a55084ff1dfc2585bea7 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131944 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h4
-rw-r--r--src/core/CL/cl_kernels/deconvolution_layer.cl8
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp6
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayer.cpp2
-rw-r--r--tests/validation/CL/DeconvolutionLayer.cpp36
-rw-r--r--tests/validation/fixtures/DeconvolutionLayerFixture.h18
-rw-r--r--tests/validation/reference/DeconvolutionLayer.cpp2
8 files changed, 57 insertions, 27 deletions
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
index f31560cb86..d63f5d4907 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -50,16 +50,16 @@ public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data types supported: F16/F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
*
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: F16/F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
*
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index a66fc258bb..f29039a97c 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -64,7 +64,7 @@ public:
CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F16/F32.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input.
* @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
@@ -77,7 +77,7 @@ public:
unsigned int inner_border_right, unsigned int inner_border_top);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F16/F32.
* @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
* @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input.
* @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
index 2514ddc8cc..794f4aa950 100644
--- a/src/core/CL/cl_kernels/deconvolution_layer.cl
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,13 +25,13 @@
/** This function applies upsample on an input image.
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: F32
+ * @param[out] dst_ptr Pointer to the destination image. Supported data types: F16/F32
* @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
@@ -46,5 +46,5 @@ __kernel void deconvolution_upsample(
Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
// Store result
- *((__global float *)dst.ptr) = *((__global float *)src.ptr);
+ *((__global DATA_TYPE *)dst.ptr) = *((__global DATA_TYPE *)src.ptr);
}
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index 650c5b89dc..e7cdf8c607 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -43,7 +43,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) == 0);
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) == 0);
@@ -74,7 +74,9 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe
ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), inner_border, info));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample"));
+ CLBuildOptions build_opts;
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
constexpr unsigned int num_elems_processed_per_iteration = 1;
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index cb8dc02386..4d971f3c72 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -46,7 +46,7 @@ Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
unsigned int inner_border_right, unsigned int inner_border_top)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index 58a20268ef..758cf3698a 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -43,7 +43,9 @@ namespace validation
{
namespace
{
-constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's for DataType::F16 */
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
* framework::dataset::make("PadY", 0, 3) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
@@ -169,10 +171,9 @@ template <typename T>
using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
TEST_SUITE(Float)
-
TEST_SUITE(FP32)
-TEST_SUITE(W4x4)
+TEST_SUITE(W4x4)
FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F32)))
{
// Validate output
@@ -181,7 +182,6 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<float>, framework::Da
TEST_SUITE_END()
TEST_SUITE(W3x3)
-
FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
{
// Validate output
@@ -198,6 +198,34 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::Da
TEST_SUITE_END()
TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
TEST_SUITE_END()
TEST_SUITE_END()
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index f2455f31ac..137068a4b9 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,16 +58,14 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- switch(tensor.data_type())
+ if(is_data_type_float(tensor.data_type()))
{
- case DataType::F32:
- {
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
- library->fill(tensor, distribution, i);
- break;
- }
- default:
- library->fill_tensor_uniform(tensor, i);
+ std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ library->fill(tensor, distribution, i);
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, i);
}
}
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 617f6908e4..35437084b8 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -91,6 +91,8 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
template SimpleTensor<float> deconvolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a);
+template SimpleTensor<half> deconvolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
+ const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a);
} // namespace reference
} // namespace validation
} // namespace test