From c10bc0b5db5169a6ccea02a1aaefe34f082709e5 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Wed, 9 Jan 2019 11:55:00 +0000
Subject: COMPMID-1710: Collapse window in CLDepthConvertKernel

Change-Id: I16589a2b3beb18e20b56059fdabccc61e26e3944
Reviewed-on: https://review.mlplatform.org/481
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/core/CL/kernels')

diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index b0c21624ed..e188ee92a8 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -83,8 +83,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
     const size_t input_size  = data_size_from_type(input->info()->data_type());
     const size_t output_size = data_size_from_type(output->info()->data_type());
 
+    // Get number of elements to process per iterations
+    const unsigned int num_elems_processed_per_iteration = 16;
+
     // Set build options
     CLBuildOptions build_opts;
+    build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
     build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
     // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
@@ -96,12 +100,16 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
     _kernel                       = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
 
     // Set shift arg
-    unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
+    unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
     _kernel.setArg(idx++, shift);
 
     // Configure kernel
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-    ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
+    ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration);
+
+    // Collapse window
+    const Window &full_window      = window();
+    Window        collapsed_window = full_window.collapse_if_possible(full_window, Window::DimZ);
+    ICLKernel::configure_internal(collapsed_window);
 }
 
 Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
-- 
cgit v1.2.1