From c10bc0b5db5169a6ccea02a1aaefe34f082709e5 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 9 Jan 2019 11:55:00 +0000 Subject: COMPMID-1710: Collapse window in CLDepthConvertKernel Change-Id: I16589a2b3beb18e20b56059fdabccc61e26e3944 Reviewed-on: https://review.mlplatform.org/481 Reviewed-by: Isabella Gottardi Tested-by: Arm Jenkins --- src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index b0c21624ed..e188ee92a8 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,8 +83,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out const size_t input_size = data_size_from_type(input->info()->data_type()); const size_t output_size = data_size_from_type(output->info()->data_type()); + // Get number of elements to process per iterations + const unsigned int num_elems_processed_per_iteration = 16; + // Set build options CLBuildOptions build_opts; + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined @@ -96,12 +100,16 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Set shift arg - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters + unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters _kernel.setArg(idx++, shift); // Configure kernel - constexpr unsigned int num_elems_processed_per_iteration = 16; - ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); + ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration); + + // Collapse window + const Window &full_window = window(); + Window collapsed_window = full_window.collapse_if_possible(full_window, Window::DimZ); + ICLKernel::configure_internal(collapsed_window); } Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) -- cgit v1.2.1