From 793f87d10ec0b7cc98e84f8567f33151e14ac07e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 18 May 2018 20:08:58 +0100 Subject: COMPMID-1176: Add FP16 support in CLDeconvolutionLayer. Change-Id: Ic82ca002220fa31d8618a55084ff1dfc2585bea7 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131944 Tested-by: Jenkins Reviewed-by: Vidhya Sudhan Loganathan Reviewed-by: Anthony Barbier --- src/core/CL/cl_kernels/deconvolution_layer.cl | 8 ++++---- src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'src/core/CL') diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl index 2514ddc8cc..794f4aa950 100644 --- a/src/core/CL/cl_kernels/deconvolution_layer.cl +++ b/src/core/CL/cl_kernels/deconvolution_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,13 +25,13 @@ /** This function applies upsample on an input image. * - * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_ptr Pointer to the source image. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] dst_ptr Pointer to the destination image. Supported data types: F32 + * @param[out] dst_ptr Pointer to the destination image. Supported data types: F16/F32 * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) @@ -46,5 +46,5 @@ __kernel void deconvolution_upsample( Image dst = CONVERT_TO_IMAGE_STRUCT(dst); // Store result - *((__global float *)dst.ptr) = *((__global float *)src.ptr); + *((__global DATA_TYPE *)dst.ptr) = *((__global DATA_TYPE *)src.ptr); } diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 650c5b89dc..e7cdf8c607 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -43,7 +43,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) == 0); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) == 0); @@ -74,7 +74,9 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), inner_border, info)); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_upsample")); + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options())); constexpr unsigned int num_elems_processed_per_iteration = 1; -- cgit v1.2.1