aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-04-11 15:59:10 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:37 +0000
commite52a3000d2c13bc1b66ca66b3d12b6b836982394 (patch)
tree70e8ef5ba216762604f84228805aac9bd65747b6 /src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
parentdd03870b63784abe499761da2b26b209b33f2db2 (diff)
downloadComputeLibrary-e52a3000d2c13bc1b66ca66b3d12b6b836982394.tar.gz
COMPMID-1026 - Add support for 4x4 output tile in CLWinogradConvolutionLayer
The performance achieved can be found at the following confluence page: https://confluence.arm.com/display/MLENG/GEMM-based+convolution+vs+Winograd-based+convolution+on+OpenCL Change-Id: I4b690cfdd4eb4ff0cd17b14fdd49ccaa1d1dc85c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127729 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
index 8ee1a82209..c5d2528aa2 100644
--- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
@@ -58,6 +58,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_size != Size2D(3U, 3U) && kernel_size != Size2D(5U, 5U), "Only 3x3 and 5x5 kernels are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_size == Size2D(3U, 3U) && output_tile_size == Size2D(2U, 2U) && input->dimension(2) != 16, "Wrong number of batches");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_size == Size2D(3U, 3U) && output_tile_size == Size2D(4U, 4U) && input->dimension(2) != 36, "Wrong number of batches");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(kernel_size == Size2D(5U, 5U) && output_tile_size == Size2D(4U, 4U) && input->dimension(2) != 64, "Wrong number of batches");
// Compute number of elements to process in the X and Y direction
@@ -67,7 +68,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
const int num_tiles_y = std::ceil(num_elements_y / static_cast<float>(output_tile_size.height));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != static_cast<unsigned int>((num_tiles_x * num_tiles_y)));
- ARM_COMPUTE_UNUSED(output_tile_size);
if(bias != nullptr)
{
@@ -207,4 +207,4 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue
enqueue(queue, *this, slice, _lws_hint);
}
while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_out));
-} \ No newline at end of file
+}