From ae2af74ae4368004221a41e6891e0173453996ac Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Thu, 15 Feb 2018 12:35:44 +0000 Subject: COMPMID-935 - Implementing Convolution with Winograd on OpenCL (Part 1) This patch enables GEMM to execute multiple batches in parallel https://confluence.arm.com/display/MLENG/Winograd%3A+batched+GEMM Change-Id: I66222db041dd35e82af11fbb262fd1ebd3ca4b2f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120866 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index 24d218760e..5489fde818 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -86,8 +86,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->tensor_shape())); } + // Collapse along the Z direction + Window collapsed = win.collapse(win, Window::DimZ); + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); + return std::make_pair(err, collapsed); } } // namespace @@ -151,15 +154,15 @@ void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue out_window.set(Window::DimX, window.y()); out_window.set(Window::DimY, window.x()); - Window in_slice = window.first_slice_window_2D(); - Window out_slice = out_window.first_slice_window_2D(); + Window in_slice = window.first_slice_window_3D(); + Window out_slice = out_window.first_slice_window_3D(); do { unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, in_slice); - add_2D_tensor_argument(idx, _output, out_slice); + add_3D_tensor_argument(idx, _input, in_slice); + add_3D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice, _lws_hint); } - while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice)); + while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice)); } -- cgit v1.2.1