From 2d9de0a3fa6ad858e70040124f362799a962bb6a Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Thu, 15 Mar 2018 17:58:20 +0000 Subject: COMPMID-1009 Support 4x4 output tile for Winograd Filter Transform on OpenCL. Change-Id: I68c6453e0f192de659582404f109a89616b9fbb9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124811 Tested-by: Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Gian Marco Iodice --- src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp') diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 5081cbac4e..a861e0072e 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -64,7 +64,7 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we _input_transform.configure(input, &_input0, conv_info, Size2D(kernel_w, kernel_h)); // Configure filter transform - _filter_transform.configure(weights, &_input1); + _filter_transform.configure(weights, &_input1, Size2D(2U, 2U)); // Configure batched matrix multiply _batched_mm.configure(&_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/)); @@ -103,9 +103,9 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradInputTransform::validate(input, &input0, conv_info, Size2D(kernel_w, kernel_h))); // Validate filter transform - const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights); + const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, Size2D(2U, 2U)); const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape); - ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1)); + ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1, Size2D(2U, 2U))); // Configure batched matrix multiply TensorShape batched_mm_output_shape = input0.tensor_shape(); -- cgit v1.2.1