aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-09 11:55:00 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-09 17:24:57 +0000
commitc10bc0b5db5169a6ccea02a1aaefe34f082709e5 (patch)
tree2be8e3c929dc91de3de2f898a6e4b33d2bd51259 /src/core/CL/kernels
parent588ebc5ccab2e47c42c3e9303306e3744834f52f (diff)
downloadComputeLibrary-c10bc0b5db5169a6ccea02a1aaefe34f082709e5.tar.gz
COMPMID-1710: Collapse window in CLDepthConvertKernel
Change-Id: I16589a2b3beb18e20b56059fdabccc61e26e3944 Reviewed-on: https://review.mlplatform.org/481 Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.cpp16
1 files changed, 12 insertions, 4 deletions
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index b0c21624ed..e188ee92a8 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,8 +83,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
const size_t input_size = data_size_from_type(input->info()->data_type());
const size_t output_size = data_size_from_type(output->info()->data_type());
+ // Get number of elements to process per iterations
+ const unsigned int num_elems_processed_per_iteration = 16;
+
// Set build options
CLBuildOptions build_opts;
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
// Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
@@ -96,12 +100,16 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Set shift arg
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
_kernel.setArg(idx++, shift);
// Configure kernel
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
+ ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration);
+
+ // Collapse window
+ const Window &full_window = window();
+ Window collapsed_window = full_window.collapse_if_possible(full_window, Window::DimZ);
+ ICLKernel::configure_internal(collapsed_window);
}
Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)