diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-01-09 11:55:00 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-01-09 17:24:57 +0000 |
commit | c10bc0b5db5169a6ccea02a1aaefe34f082709e5 (patch) | |
tree | 2be8e3c929dc91de3de2f898a6e4b33d2bd51259 /src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | |
parent | 588ebc5ccab2e47c42c3e9303306e3744834f52f (diff) | |
download | ComputeLibrary-c10bc0b5db5169a6ccea02a1aaefe34f082709e5.tar.gz |
COMPMID-1710: Collapse window in CLDepthConvertKernel
Change-Id: I16589a2b3beb18e20b56059fdabccc61e26e3944
Reviewed-on: https://review.mlplatform.org/481
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthConvertLayerKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index b0c21624ed..e188ee92a8 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,8 +83,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out const size_t input_size = data_size_from_type(input->info()->data_type()); const size_t output_size = data_size_from_type(output->info()->data_type()); + // Get number of elements to process per iterations + const unsigned int num_elems_processed_per_iteration = 16; + // Set build options CLBuildOptions build_opts; + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined @@ -96,12 +100,16 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Set shift arg - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters + unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters _kernel.setArg(idx++, shift); // Configure kernel - constexpr unsigned int num_elems_processed_per_iteration = 16; - ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); + ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration); + + // Collapse window + const Window &full_window = window(); + Window collapsed_window = full_window.collapse_if_possible(full_window, Window::DimZ); + ICLKernel::configure_internal(collapsed_window); } Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) |