aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
diff options
context:
space:
mode:
authorGian Marco <gianmarco.iodice@arm.com>2018-02-15 12:35:44 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:48:33 +0000
commitae2af74ae4368004221a41e6891e0173453996ac (patch)
treea9d16fd683ee45e1caf071c0175c9d61cb99fdc3 /src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
parentd56e770e7c394d13706a21ee350e7dafe4278987 (diff)
downloadComputeLibrary-ae2af74ae4368004221a41e6891e0173453996ac.tar.gz
COMPMID-935 - Implementing Convolution with Winograd on OpenCL (Part 1)
This patch enables GEMM to execute multiple batches in parallel https://confluence.arm.com/display/MLENG/Winograd%3A+batched+GEMM Change-Id: I66222db041dd35e82af11fbb262fd1ebd3ca4b2f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120866 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp15
1 files changed, 9 insertions, 6 deletions
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
index 24d218760e..5489fde818 100644
--- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
@@ -86,8 +86,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->tensor_shape()));
}
+ // Collapse along the Z direction
+ Window collapsed = win.collapse(win, Window::DimZ);
+
Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
+ return std::make_pair(err, collapsed);
}
} // namespace
@@ -151,15 +154,15 @@ void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue
out_window.set(Window::DimX, window.y());
out_window.set(Window::DimY, window.x());
- Window in_slice = window.first_slice_window_2D();
- Window out_slice = out_window.first_slice_window_2D();
+ Window in_slice = window.first_slice_window_3D();
+ Window out_slice = out_window.first_slice_window_3D();
do
{
unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, in_slice);
- add_2D_tensor_argument(idx, _output, out_slice);
+ add_3D_tensor_argument(idx, _input, in_slice);
+ add_3D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice, _lws_hint);
}
- while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice));
+ while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice));
}