From a25d16c86f0d870408bc8b941aa755093417b0f0 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 16 Nov 2018 11:33:12 +0000 Subject: COMPMID-1266 : Add support for FP16 in CLWinogradConvolutionLayer: 5x5 kernels Introduced F32 accumulation for F16 winograd gemm and output transform WinogradConvolution will be available for F16 only if fast math flag is enabled Change-Id: I215593c205236a0f9669218437bb40b184ec6a4f --- tests/validation/CL/Winograd.cpp | 21 ++++++----- tests/validation/NEON/ConvolutionLayer.cpp | 2 +- .../fixtures/WinogradConvolutionLayerFixture.h | 41 ++++++++++++++-------- 3 files changed, 39 insertions(+), 25 deletions(-) (limited to 'tests/validation') diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 930f7aa8ce..f7f06b7f79 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -58,6 +58,9 @@ constexpr AbsoluteTolerance tolerance_f32(0.001f); const AbsoluteTolerance tolerance_f16(half(0.5f)); constexpr AbsoluteTolerance tolerance_convolution_layer_f32(0.1f); const AbsoluteTolerance tolerance_convolution_layer_f16(half(0.4f)); +RelativeTolerance rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */ +constexpr float tolerance_num = 0.05f; /**< Tolerance number */ +constexpr float abs_tolerance_convolution_layer_f16 = 2.5f; /**< Tolerance number */ // Input transform const auto SmallWinogradInputTransformDatasetNCHW = @@ -834,10 +837,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, fram TEST_SUITE_END() // Conv1x5 TEST_SUITE_END() // FP32 -#ifdef WINOGRAD_F16_SUPPORT //to be reintroduced after COMPMID-1266 is resolved + TEST_SUITE(FP16) -using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture; +using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture; TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), @@ -856,7 +859,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv3x3 @@ -878,7 +881,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv3x1 @@ -900,7 +903,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv1x3 @@ -924,7 +927,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv5x5 @@ -948,7 +951,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv5x1 @@ -972,12 +975,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16); + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv1x5 TEST_SUITE_END() // FP16 -#endif /*#ifdef WINOGRAD_F16_SUPPORT*/ + TEST_SUITE_END() // ConvolutionLayer TEST_SUITE_END() // Winograd TEST_SUITE_END() // CL diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index d216d9db86..d8710ee91b 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -120,7 +120,7 @@ template using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture; template -using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture; +using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture; TEST_SUITE(FP32) diff --git a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h index 15ce201222..9c9e634205 100644 --- a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h +++ b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h @@ -39,6 +39,7 @@ #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/Utils.h" #include "tests/validation/reference/Winograd.h" +#include "utils/Utils.h" #include @@ -156,7 +157,7 @@ protected: SimpleTensor _reference{}; }; -template +template class WinogradConvolutionLayerFastMathValidationFixture : public framework::Fixture { public: @@ -177,6 +178,11 @@ protected: switch(tensor.data_type()) { case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_fp16 distribution((half)min, (half)max); + library->fill(tensor, distribution, i); + break; + } case DataType::F32: { std::uniform_real_distribution<> distribution(min, max); @@ -245,21 +251,25 @@ protected: DataType data_type, ActivationLayerInfo act_info) { // Create reference - SimpleTensor src{ input_shape, data_type, 1 }; - SimpleTensor weights{ weights_shape, data_type, 1 }; - SimpleTensor bias{ bias_shape, data_type, 1 }; + SimpleTensor src_t{ input_shape, data_type, 1 }; + SimpleTensor weights_t{ weights_shape, data_type, 1 }; + SimpleTensor bias_t{ bias_shape, data_type, 1 }; // Fill reference - fill(src, 0, -1.f, 1.f); - fill(weights, 1, -1.f, 1.f); + fill(src_t, 0, -1.f, 1.f); + SimpleTensor src_t1(copy_tensor(src_t)); + + fill(weights_t, 1, -1.f, 1.f); + SimpleTensor weights_t1(copy_tensor(weights_t)); if(use_bias) { - fill(bias, 2, -1.f, 1.f); + fill(bias_t, 2, -1.f, 1.f); } else { - fill(bias, 2, 0.f, 0.f); + fill(bias_t, 2, 0.f, 0.f); } + SimpleTensor bias_t1(copy_tensor(bias_t)); // Set output tile Size2D output_tile(4U, 4U); @@ -286,7 +296,7 @@ protected: Size2D(weights_shape[0], weights_shape[1]), Size2D(input_shape[0], input_shape[1]), info, - src.data_layout()); + src_t1.data_layout()); // Compute tensor shapes for input, filter and output transforms TensorShape input_transform_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); @@ -296,15 +306,16 @@ protected: TensorShape output_transform_shape = compute_winograd_output_transform_shape(TensorInfo(batched_gemm_shape, 1, data_type), winograd_info); // Dummy matrix C to perform matrix multiplication - SimpleTensor dummy_c{ batched_gemm_shape, data_type, 1 }; + SimpleTensor dummy_c{ batched_gemm_shape, data_type, 1 }; // Compute Winograd-based convolution - SimpleTensor input_transform_out = reference::winograd_input_transform(src, input_transform_shape, winograd_info); - SimpleTensor filter_transform_out = reference::winograd_filter_transform(weights, filter_transform_shape, winograd_info); - SimpleTensor batched_gemm = reference::gemm(input_transform_out, filter_transform_out, dummy_c, 1.0f, 0.0f); - SimpleTensor conv_out = reference::winograd_output_transform(batched_gemm, bias, output_transform_shape, winograd_info); + SimpleTensor input_transform_out = reference::winograd_input_transform(src_t1, input_transform_shape, winograd_info); - return (act_info.enabled()) ? reference::activation_layer(conv_out, act_info) : conv_out; + SimpleTensor filter_transform_out = reference::winograd_filter_transform(weights_t1, filter_transform_shape, winograd_info); + SimpleTensor batched_gemm = reference::gemm(input_transform_out, filter_transform_out, dummy_c, 1.0f, 0.0f); + SimpleTensor conv_out = reference::winograd_output_transform(batched_gemm, bias_t1, output_transform_shape, winograd_info); + SimpleTensor conv_out_t(std::move(copy_tensor(conv_out))); + return (act_info.enabled()) ? reference::activation_layer(conv_out_t, act_info) : conv_out_t; } TensorType _target{}; -- cgit v1.2.1