From 0021d750d66d199c411df00cdd8308c325f1fef3 Mon Sep 17 00:00:00 2001 From: Diego Lopez Recas Date: Mon, 18 Dec 2017 14:42:56 +0000 Subject: IVGCVSW-863 Broadcast support in CL/NEON Arithmetic Add Also, added instrumentation to support generic tensor broadcasting for NEON and CL backends. Change-Id: I1bc5747a286e1a4b464c209067581e103d473b9a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114201 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/core/CL/kernels/CLPermuteKernel.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src/core/CL/kernels/CLPermuteKernel.cpp') diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index 132de60b68..1f36445732 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -106,10 +106,10 @@ void CLPermuteKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - Window slice_in = window.first_slice_window_4D(); - Window slice_out(slice_in); + Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); // Setup output slice + Window slice_out(slice_in); slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); @@ -117,12 +117,10 @@ void CLPermuteKernel::run(const Window &window, cl::CommandQueue &queue) do { - auto collapsed_slice_in = slice_in.collapse(ICLKernel::window(), 2); - auto collapsed_slice_out = slice_out.collapse(ICLKernel::window(), 2); - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, collapsed_slice_in); - add_4D_tensor_argument(idx, _output, collapsed_slice_out); - enqueue(queue, *this, collapsed_slice_in); + unsigned int idx = 0; + add_4D_tensor_argument(idx, _input, slice_in); + add_4D_tensor_argument(idx, _output, slice_out); + enqueue(queue, *this, slice_in); } while(window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); } -- cgit v1.2.1