From 71ecf396bb08e302dc06b2c7ed44001894d3a2ea Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 31 Aug 2018 16:10:16 +0100 Subject: COMPMID-1266 : support for FP16 in CLWinogradConvolutionLayer Added support for FP16 in CLWinogradConvolutionLayer: 5x5 kernels and 3x3 kernels(COMPMID-937) Change-Id: I0f394cbdc978dd04176416e9f612aca3986b09e6 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145537 Tested-by: Jenkins Reviewed-by: Giorgio Arena --- tests/validation/reference/Winograd.cpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'tests/validation/reference/Winograd.cpp') diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp index 132d252383..3c2c11d632 100644 --- a/tests/validation/reference/Winograd.cpp +++ b/tests/validation/reference/Winograd.cpp @@ -232,7 +232,7 @@ SimpleTensor winograd_input_transform(const SimpleTensor &in, const Tensor initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT); // Transpose matrix - transpose_matrix(matrix, matrix_transposed); + transpose_matrix(matrix, matrix_transposed); const int in_w = in.shape().x(); const int in_h = in.shape().y(); @@ -293,14 +293,14 @@ SimpleTensor winograd_input_transform(const SimpleTensor &in, const Tensor int yi = y * step_y - conv_info.pad_top(); // Get the tile from the input tensor - get_tile(in, src_tile, Coordinates(xi, yi, z, b)); + get_tile(in, src_tile, Coordinates(xi, yi, z, b)); // Fill partially with zeros in case of 1D convolution - zeros(src_tile, anchor_zeros, shape_zeros); + zeros(src_tile, anchor_zeros, shape_zeros); // Compute the transformation - matrix_multiply(matrix, src_tile, tmp_tile); - matrix_multiply(tmp_tile, matrix_transposed, dst_tile); + matrix_multiply(matrix, src_tile, tmp_tile); + matrix_multiply(tmp_tile, matrix_transposed, dst_tile); // Store the output tile across the channels for(int i = 0; i < out_d; ++i) @@ -358,7 +358,7 @@ SimpleTensor winograd_filter_transform(const SimpleTensor &in, const Tenso initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER); // Transpose the transformation matrix - transpose_matrix(trans_matrix, trans_matrix_transposed); + transpose_matrix(trans_matrix, trans_matrix_transposed); const int num_channels = in.shape()[2]; const int num_filters = in.shape()[3]; @@ -374,13 +374,13 @@ SimpleTensor winograd_filter_transform(const SimpleTensor &in, const Tenso for(int z = 0; z < num_channels; ++z) { // Load the tile from the input tensor - get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); // First transformation - matrix_multiply(trans_matrix, input_tile, tmp_tile); + matrix_multiply(trans_matrix, input_tile, tmp_tile); // Second transformation - matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile); + matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile); // Store the output tile across the channels const int output_offset = w + z * num_filters; @@ -451,7 +451,7 @@ SimpleTensor winograd_output_transform(const SimpleTensor &in, const Simpl initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT); // Transpose the transformation matrix - transpose_matrix(trans_matrix, trans_matrix_transposed); + transpose_matrix(trans_matrix, trans_matrix_transposed); const int w_in = in.shape()[0]; const int h_in = in.shape()[1]; @@ -487,7 +487,7 @@ SimpleTensor winograd_output_transform(const SimpleTensor &in, const Simpl const int step_y_transf_tile = kernel_size.width == 1 ? 1 : output_tile.shape()[0]; // Initialize with zeros the input tile - zeros(input_tile, Coordinates(0, 0), input_tile.shape()); + zeros(input_tile, Coordinates(0, 0), input_tile.shape()); for(int n = 0; n < num_batches; ++n) { @@ -502,10 +502,10 @@ SimpleTensor winograd_output_transform(const SimpleTensor &in, const Simpl } // First transformation - matrix_multiply(trans_matrix, input_tile, tmp_tile); + matrix_multiply(trans_matrix, input_tile, tmp_tile); // Second transformation - matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); + matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); // Store the output tile const int xo = (y % num_tiles_x) * out_tile_w; @@ -538,6 +538,10 @@ SimpleTensor winograd_output_transform(const SimpleTensor &in, const Simpl template SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); template SimpleTensor winograd_input_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); template SimpleTensor winograd_output_transform(const SimpleTensor &in, const SimpleTensor &b, const TensorShape &output_shape, const WinogradInfo &winograd_info); +template SimpleTensor winograd_filter_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); +template SimpleTensor winograd_input_transform(const SimpleTensor &in, const TensorShape &output_shape, const WinogradInfo &winograd_info); +template SimpleTensor winograd_output_transform(const SimpleTensor &in, const SimpleTensor &b, const TensorShape &output_shape, const WinogradInfo &winograd_info); + } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1