diff options
Diffstat (limited to 'tests/validation')
-rw-r--r-- | tests/validation/CL/Winograd.cpp | 179 | ||||
-rw-r--r-- | tests/validation/NEON/ConvolutionLayer.cpp | 6 | ||||
-rw-r--r-- | tests/validation/fixtures/WinogradLayerFixture.h | 120 | ||||
-rw-r--r-- | tests/validation/reference/ConvolutionLayer.cpp | 2 | ||||
-rw-r--r-- | tests/validation/reference/Winograd.cpp | 218 | ||||
-rw-r--r-- | tests/validation/reference/Winograd.h | 3 |
6 files changed, 452 insertions, 76 deletions
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 0b21ed2577..aa668fa575 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -22,17 +22,22 @@ * SOFTWARE. */ #include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" +#include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/ShapeDatasets.h" +#include "tests/datasets/SmallConvolutionLayerDataset.h" #include "tests/datasets/WinogradFilterTransformDataset.h" #include "tests/datasets/WinogradInputTransformDataset.h" +#include "tests/datasets/WinogradOutputTransformDataset.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -47,7 +52,7 @@ namespace validation { namespace { -constexpr AbsoluteTolerance<float> tolerance_f32(0.0001f); +constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); } // namespace using namespace arm_compute::misc::shape_calculator; @@ -65,9 +70,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32), // Kernel size not supported TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32), // Strides not supported - TensorInfo(TensorShape(53U, 33U, 4U), 1, DataType::F32), // valid - TensorInfo(TensorShape(34U, 42U, 7U, 3U), 1, DataType::F32), // valid - TensorInfo(TensorShape(31U, 37U, 37U), 1, DataType::F32) // valid + TensorInfo(TensorShape(53U, 33U, 4U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(34U, 42U, 7U, 3U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(31U, 37U, 37U), 1, DataType::F32) // Padding needed }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::F16), @@ -96,7 +101,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( Size2D(3U, 3U), Size2D(3U, 3U) })), - framework::dataset::make("Expected", { false, false, false, false, true, true, true })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false })), input_info, output_info, conv_info, kernel_dims, expected) { ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, kernel_dims)) == expected, framework::LogLevel::ERRORS); @@ -203,8 +208,172 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixture, framework::Da // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } + TEST_SUITE_END() // FilterTransform +TEST_SUITE(OutputTransform) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo",{ + TensorInfo(TensorShape(24U, 49U, 16U, 5U), 1, DataType::F16), // F16 not supported + TensorInfo(TensorShape(128U, 3136U, 16U, 5U), 1, DataType::QASYMM8), // QASYMM8 not supported + TensorInfo(TensorShape(256U, 784U, 16U, 5U), 1, DataType::F32), // Kernel size not supported + TensorInfo(TensorShape(512U, 169U, 16U, 5U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(13U, 6U, 16U, 4U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(7U, 16U, 16U, 7U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(1U, 442U, 16U, 37U), 1, DataType::F32) // Wrong number of tiles + }), + framework::dataset::make("BiasInfo", { + TensorInfo(TensorShape(24U), 1, DataType::F16), + TensorInfo(TensorShape(128U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(256U), 1, DataType::F32), + TensorInfo(TensorShape(512U), 1, DataType::F32), + TensorInfo(TensorShape(13U), 1, DataType::F32), + TensorInfo(TensorShape(7U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { + TensorInfo(TensorShape(14U, 14U, 24U, 5U), 1, DataType::F16), + TensorInfo(TensorShape(112U, 112U, 128U, 5U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(55U, 55U, 256U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(26U, 26U, 512U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 4U, 13U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 8U, 7U, 7U), 1, DataType::F32), + TensorInfo(TensorShape(51U, 33U, 1U, 37U), 1, DataType::F32) + })), + framework::dataset::make("KernelDims", { + Size2D(3U, 3U), + Size2D(3U, 3U), + Size2D(5U, 5U), + Size2D(3U, 3U), + Size2D(3U, 3U), + Size2D(3U, 3U), + Size2D(3U, 3U) + })), + framework::dataset::make("OutputDims", { + Size2D(14U, 14U), + Size2D(112U, 112U), + Size2D(55U, 55U), + Size2D(26U, 26U), + Size2D(5U, 4U), + Size2D(8U, 8U), + Size2D(51U, 33U) + })), + framework::dataset::make("NumTiles", { + Size2D(7U, 7U), + Size2D(56U, 56U), + Size2D(28U, 28U), + Size2D(13U, 13U), + Size2D(3U, 2U), + Size2D(4U, 4U), + Size2D(26U, 16U) + })), + framework::dataset::make("Expected", { false, false, false, true, false, true, false })), + input_info, bias_info, output_info, kernel_dims, output_dims, num_tiles, expected) +{ + ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), kernel_dims, output_dims, num_tiles)) == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +using CLWinogradOutputTransform = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradOutputTransformKernel, 0>; +using CLWinogradOutputTransformFixture = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, float>; + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallWinogradOutputTransformDataset(), datasets::LargeWinogradOutputTransformDataset()), + framework::dataset::make("DataType", { DataType::F32 })), + shape_a, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type) +{ + TensorShape shape_b = compute_winograd_output_transform_shape(TensorInfo(shape_a, 1, data_type), output_convolved_dims, data_layout); + + // Create tensors + CLTensor a = create_tensor<CLTensor>(shape_a, data_type); + CLTensor b = create_tensor<CLTensor>(shape_b, data_type); + + ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLWinogradOutputTransform winograd_output_transform; + winograd_output_transform.configure(&a, nullptr, &b, kernel_dims, output_convolved_dims, num_tiles); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixture, framework::DatasetMode::ALL, combine(datasets::SmallWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() // OutputTransform + +TEST_SUITE(ConvolutionLayer) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // FP16 not supported + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch + TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported + TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported + }), + framework::dataset::make("WeightsInfo", { + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) + })), + framework::dataset::make("BiasesInfo", { + TensorInfo(TensorShape(19U), 1, DataType::F32), + TensorInfo(TensorShape(19U), 1, DataType::F32), + TensorInfo(TensorShape(21U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { + TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) + })), + framework::dataset::make("ConvInfo", { + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 2, 0, 0), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 0) + })), + framework::dataset::make("Expected", { false, false, false, false, false })), + input_info, weights_info, bias_info, output_info, conv_info, expected) +{ + ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +using CLWinogradConvolutionLayerFixture = WinogradConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>; +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // ConvolutionLayer + TEST_SUITE_END() // Winograd TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 59db279ac7..34306b381c 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -109,10 +109,12 @@ TEST_SUITE_END() TEST_SUITE(WinogradLayer) template <typename T> -using NEWinogradLayerFixture = WinogradLayerValidationFixture<Tensor, Accessor, NEWinogradLayer, T>; +using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerValidationFixture<Tensor, Accessor, NEWinogradLayer, T>; TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradLayerFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallWinogradLayerDataset()) +FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + datasets::SmallWinogradConvolutionLayer5x5Dataset()), + framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h index bfe1efce3b..9811c28008 100644 --- a/tests/validation/fixtures/WinogradLayerFixture.h +++ b/tests/validation/fixtures/WinogradLayerFixture.h @@ -48,14 +48,14 @@ namespace validation using namespace arm_compute::misc::shape_calculator; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class WinogradLayerValidationFixture : public framework::Fixture +class WinogradConvolutionLayerValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, DataType data_type) { - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type); } protected: @@ -79,13 +79,14 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type) { // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, DataType::F32, 1); - TensorType weights = create_tensor<TensorType>(weights_shape, DataType::F32, 1); - TensorType bias = create_tensor<TensorType>(bias_shape, DataType::F32, 1); - TensorType dst = create_tensor<TensorType>(output_shape, DataType::F32, 1); + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1); + TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1); + TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1); // Create and configure function FunctionType conv; @@ -111,20 +112,20 @@ protected: fill(AccessorType(src), 0, -1.f, 1.f); fill(AccessorType(weights), 1, -1.f, 1.f); fill(AccessorType(bias), 2, -1.f, 1.f); - fill(AccessorType(dst), 3, -1.f, 1.f); - // Compute NEWinogradLayer function + // Compute Winograd Convolution function conv.run(); return dst; } - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info) + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type) { // Create reference - SimpleTensor<T> src{ input_shape, DataType::F32, 1 }; - SimpleTensor<T> weights{ weights_shape, DataType::F32, 1 }; - SimpleTensor<T> bias{ bias_shape, DataType::F32, 1 }; + SimpleTensor<T> src{ input_shape, data_type, 1 }; + SimpleTensor<T> weights{ weights_shape, data_type, 1 }; + SimpleTensor<T> bias{ bias_shape, data_type, 1 }; // Fill reference fill(src, 0, -1.f, 1.f); @@ -136,8 +137,6 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; - int _fractional_bits{}; - DataType _data_type{}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> @@ -178,7 +177,6 @@ protected: { ARM_COMPUTE_UNUSED(is_nchw_format); - // Create tensors TensorType src = create_tensor<TensorType>(input_shape, data_type); TensorType dst = create_tensor<TensorType>(output_shape, data_type); @@ -261,8 +259,8 @@ protected: ARM_COMPUTE_UNUSED(is_nchw_format); // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type); - TensorType dst = create_tensor<TensorType>(output_shape, data_type); + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1); // Create and configure function FunctionType filter_transform; @@ -288,7 +286,7 @@ protected: SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, bool is_nchw_format, DataType data_type) { - ARM_COMPUTE_ERROR_ON(!is_nchw_format); + ARM_COMPUTE_UNUSED(is_nchw_format); // Create reference SimpleTensor<T> src{ input_shape, data_type, 1 }; @@ -302,6 +300,86 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; }; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class WinogradOutputTransformValidationFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(TensorShape input_shape, Size2D kernel_dims, Size2D output_convolved_dims, Size2D num_tiles, DataLayout data_layout, DataType data_type) + { + TensorShape output_shape = compute_winograd_output_transform_shape(TensorInfo(input_shape, 1, data_type), output_convolved_dims, data_layout); + + _target = compute_target(input_shape, output_shape, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type); + _reference = compute_reference(input_shape, output_shape, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, float min, float max) + { + switch(tensor.data_type()) + { + case DataType::F32: + { + std::uniform_real_distribution<> distribution(min, max); + library->fill(tensor, distribution, i); + break; + } + default: + { + ARM_COMPUTE_ERROR("Not supported"); + library->fill_tensor_uniform(tensor, i); + break; + } + } + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &output_convolved_dims, Size2D &num_tiles, DataLayout data_layout, + DataType data_type) + { + // Create tensors + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + + // Create and configure function + FunctionType output_transform; + output_transform.configure(&src, nullptr, &dst, kernel_dims, output_convolved_dims, num_tiles); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0, -1.f, 1.f); + + output_transform.run(); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &output_convolved_dims, Size2D &num_tiles, + DataLayout data_layout, + DataType data_type) + { + // Create reference + SimpleTensor<T> src{ input_shape, data_type, 1, 0, QuantizationInfo(), data_layout }; + + // Fill reference + fill(src, 0, -1.f, 1.f); + + return reference::winograd_output_transform<T>(src, output_shape, kernel_dims, num_tiles); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp index 24bbf32a30..f3db274935 100644 --- a/tests/validation/reference/ConvolutionLayer.cpp +++ b/tests/validation/reference/ConvolutionLayer.cpp @@ -118,4 +118,4 @@ template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &sr } // namespace reference } // namespace validation } // namespace test -} // namespace arm_compute +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp index 3ed55fb9fc..c760663b22 100644 --- a/tests/validation/reference/Winograd.cpp +++ b/tests/validation/reference/Winograd.cpp @@ -39,6 +39,87 @@ namespace reference namespace { template <typename T> +void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out) +{ + // Simple tensor for the 3x3 input tile + SimpleTensor<T> input_tile{ TensorShape(3u, 3u), in.data_type(), 1 }; + + // Simple tensor for the transformation matrix + SimpleTensor<T> trans_matrix{ TensorShape(3u, 4u), in.data_type(), 1 }; + + // Simple tensor for the transformation matrix transpose + SimpleTensor<T> trans_matrix_transposed{ TensorShape(4u, 3u), in.data_type(), 1 }; + + // Simple tensor for the 4x3 temporary tile + SimpleTensor<T> tmp_tile{ TensorShape(3u, 4u), in.data_type(), 1 }; + + // Simple tensor for the 4x4 output tile + SimpleTensor<T> output_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; + + // Initialize transformation matrix + // 1 | 0 | 0 + // 0.5 | 0.5 | 0.5 + // 0.5 |-0.5 | 0.5 + // 0 | 0 | 1 + trans_matrix[0 + 0 * 3] = 1.0f; + trans_matrix[1 + 0 * 3] = 0.0f; + trans_matrix[2 + 0 * 3] = 0.0f; + trans_matrix[0 + 1 * 3] = 0.5f; + trans_matrix[1 + 1 * 3] = 0.5f; + trans_matrix[2 + 1 * 3] = 0.5f; + trans_matrix[0 + 2 * 3] = 0.5f; + trans_matrix[1 + 2 * 3] = -0.5f; + trans_matrix[2 + 2 * 3] = 0.5f; + trans_matrix[0 + 3 * 3] = 0.0f; + trans_matrix[1 + 3 * 3] = 0.0f; + trans_matrix[2 + 3 * 3] = 1.0f; + + // Transpose the transformation matrix + transpose_matrix(trans_matrix, trans_matrix_transposed); + + const int num_channels = in.shape()[2]; + const int num_filters = in.shape()[3]; + const int num_batches = in.shape().total_size() / (9 * num_channels * num_filters); + + for(int n = 0; n < num_batches; ++n) + { + for(int w = 0; w < num_filters; ++w) + { + for(int z = 0; z < num_channels; ++z) + { + // Load the 3x3 tile from the input tensor + get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + + // First transformation + matrix_multiply(trans_matrix, input_tile, tmp_tile); + + // Second transformation + matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); + + // Store the 4x4 output tile across the 16 channels + const int output_offset = w + z * num_filters; + out[output_offset + 0 * num_filters * num_channels] = output_tile[0 + 0 * 4]; + out[output_offset + 1 * num_filters * num_channels] = output_tile[1 + 0 * 4]; + out[output_offset + 2 * num_filters * num_channels] = output_tile[2 + 0 * 4]; + out[output_offset + 3 * num_filters * num_channels] = output_tile[3 + 0 * 4]; + out[output_offset + 4 * num_filters * num_channels] = output_tile[0 + 1 * 4]; + out[output_offset + 5 * num_filters * num_channels] = output_tile[1 + 1 * 4]; + out[output_offset + 6 * num_filters * num_channels] = output_tile[2 + 1 * 4]; + out[output_offset + 7 * num_filters * num_channels] = output_tile[3 + 1 * 4]; + out[output_offset + 8 * num_filters * num_channels] = output_tile[0 + 2 * 4]; + out[output_offset + 9 * num_filters * num_channels] = output_tile[1 + 2 * 4]; + out[output_offset + 10 * num_filters * num_channels] = output_tile[2 + 2 * 4]; + out[output_offset + 11 * num_filters * num_channels] = output_tile[3 + 2 * 4]; + out[output_offset + 12 * num_filters * num_channels] = output_tile[0 + 3 * 4]; + out[output_offset + 13 * num_filters * num_channels] = output_tile[1 + 3 * 4]; + out[output_offset + 14 * num_filters * num_channels] = output_tile[2 + 3 * 4]; + out[output_offset + 15 * num_filters * num_channels] = output_tile[3 + 3 * 4]; + } + } + } +} + +template <typename T> void winograd_input_transform3x3(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const PadStrideInfo &conv_info) { TensorShape shape4x4(4u, 4u); @@ -112,56 +193,70 @@ void winograd_input_transform3x3(const SimpleTensor<T> &src, SimpleTensor<T> &ds } template <typename T> -void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out) +void winograd_output_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out, int num_tiles_x) { + ARM_COMPUTE_ERROR_ON(in.shape()[2] != 16); + ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]); + // Simple tensor for the 3x3 input tile - SimpleTensor<T> input_tile{ TensorShape(3u, 3u), in.data_type(), 1 }; + SimpleTensor<T> input_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; // Simple tensor for the transformation matrix - SimpleTensor<T> trans_matrix{ TensorShape(3u, 4u), in.data_type(), 1 }; + SimpleTensor<T> trans_matrix{ TensorShape(4u, 2u), in.data_type(), 1 }; // Simple tensor for the transformation matrix transpose - SimpleTensor<T> trans_matrix_transposed{ TensorShape(4u, 3u), in.data_type(), 1 }; + SimpleTensor<T> trans_matrix_transposed{ TensorShape(2u, 4u), in.data_type(), 1 }; // Simple tensor for the 4x3 temporary tile - SimpleTensor<T> tmp_tile{ TensorShape(3u, 4u), in.data_type(), 1 }; + SimpleTensor<T> tmp_tile{ TensorShape(4u, 2u), in.data_type(), 1 }; // Simple tensor for the 4x4 output tile - SimpleTensor<T> output_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; + SimpleTensor<T> output_tile{ TensorShape(2u, 2u), in.data_type(), 1 }; // Initialize transformation matrix - // 1 | 0 | 0 - // 0.5 | 0.5 | 0.5 - // 0.5 |-0.5 | 0.5 - // 0 | 0 | 1 - trans_matrix[0 + 0 * 3] = 1.0f; - trans_matrix[1 + 0 * 3] = 0.0f; - trans_matrix[2 + 0 * 3] = 0.0f; - trans_matrix[0 + 1 * 3] = 0.5f; - trans_matrix[1 + 1 * 3] = 0.5f; - trans_matrix[2 + 1 * 3] = 0.5f; - trans_matrix[0 + 2 * 3] = 0.5f; - trans_matrix[1 + 2 * 3] = -0.5f; - trans_matrix[2 + 2 * 3] = 0.5f; - trans_matrix[0 + 3 * 3] = 0.0f; - trans_matrix[1 + 3 * 3] = 0.0f; - trans_matrix[2 + 3 * 3] = 1.0f; + // 1 | 1 | 1 | 1 + // 0 | 1 | -1 | -1 + trans_matrix[0 + 0 * 4] = 1.0f; + trans_matrix[1 + 0 * 4] = 1.0f; + trans_matrix[2 + 0 * 4] = 1.0f; + trans_matrix[3 + 0 * 4] = 0.0f; + trans_matrix[0 + 1 * 4] = 0.0f; + trans_matrix[1 + 1 * 4] = 1.0f; + trans_matrix[2 + 1 * 4] = -1.0f; + trans_matrix[3 + 1 * 4] = -1.0f; // Transpose the transformation matrix transpose_matrix(trans_matrix, trans_matrix_transposed); - const int num_channels = in.shape()[2]; - const int num_filters = in.shape()[3]; - const int num_batches = in.shape().total_size() / (9 * num_channels * num_filters); + const int w_in = in.shape()[0]; + const int h_in = in.shape()[1]; + const int c_in = in.shape()[2]; + const int w_out = out.shape()[0]; + const int h_out = out.shape()[1]; + const int c_out = out.shape()[2]; + const int num_batches = in.shape().total_size() / (w_in * h_in * c_in); + + // Input strides + const int stridey_in = w_in; + const int stridez_in = stridey_in * h_in; + const int stridew_in = stridez_in * c_in; + + // Output strides + const int stridey_out = w_out; + const int stridez_out = stridey_out * h_out; + const int stridew_out = stridez_out * c_out; for(int n = 0; n < num_batches; ++n) { - for(int w = 0; w < num_filters; ++w) + for(int y = 0; y < h_in; ++y) { - for(int z = 0; z < num_channels; ++z) + for(int x = 0; x < w_in; ++x) { - // Load the 3x3 tile from the input tensor - get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + // Load the 4x4 tile across the 16 channels of the input tensor + for(int z = 0; z < c_in; ++z) + { + input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)]; + } // First transformation matrix_multiply(trans_matrix, input_tile, tmp_tile); @@ -169,24 +264,29 @@ void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &ou // Second transformation matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); - // Store the 4x4 output tile across the 16 channels - const int output_offset = w + z * num_filters; - out[output_offset + 0 * num_filters * num_channels] = output_tile[0 + 0 * 4]; - out[output_offset + 1 * num_filters * num_channels] = output_tile[1 + 0 * 4]; - out[output_offset + 2 * num_filters * num_channels] = output_tile[2 + 0 * 4]; - out[output_offset + 3 * num_filters * num_channels] = output_tile[3 + 0 * 4]; - out[output_offset + 4 * num_filters * num_channels] = output_tile[0 + 1 * 4]; - out[output_offset + 5 * num_filters * num_channels] = output_tile[1 + 1 * 4]; - out[output_offset + 6 * num_filters * num_channels] = output_tile[2 + 1 * 4]; - out[output_offset + 7 * num_filters * num_channels] = output_tile[3 + 1 * 4]; - out[output_offset + 8 * num_filters * num_channels] = output_tile[0 + 2 * 4]; - out[output_offset + 9 * num_filters * num_channels] = output_tile[1 + 2 * 4]; - out[output_offset + 10 * num_filters * num_channels] = output_tile[2 + 2 * 4]; - out[output_offset + 11 * num_filters * num_channels] = output_tile[3 + 2 * 4]; - out[output_offset + 12 * num_filters * num_channels] = output_tile[0 + 3 * 4]; - out[output_offset + 13 * num_filters * num_channels] = output_tile[1 + 3 * 4]; - out[output_offset + 14 * num_filters * num_channels] = output_tile[2 + 3 * 4]; - out[output_offset + 15 * num_filters * num_channels] = output_tile[3 + 3 * 4]; + // Store the 2x2 output tile + const int xo = (y % num_tiles_x) * 2; + const int yo = (y / num_tiles_x) * 2; + const int zo = x; + + const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out); + out[output_offset + 0 * stridey_out + 0] = output_tile[0 + 0 * 2]; + + // Check out-of-bound writes + if(xo + 1 < w_out) + { + out[output_offset + 0 * stridey_out + 1] = output_tile[1 + 0 * 2]; + } + + if(yo + 1 < h_out) + { + out[output_offset + 1 * stridey_out + 0] = output_tile[0 + 1 * 2]; + } + + if((yo + 1 < h_out) && (xo + 1 < w_out)) + { + out[output_offset + 1 * stridey_out + 1] = output_tile[1 + 1 * 2]; + } } } } @@ -234,8 +334,32 @@ SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const Tenso return out; } +template <typename T> +SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles) +{ + ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format"); + ARM_COMPUTE_ERROR_ON(kernel_dims.width != kernel_dims.height); + ARM_COMPUTE_ERROR_ON(in.shape()[1] != num_tiles.area()); + + // Create reference + SimpleTensor<T> out{ output_shape, in.data_type(), 1 }; + + switch(kernel_dims.width) + { + case 3: + winograd_output_transform3x3(in, out, num_tiles.width); + break; + default: + ARM_COMPUTE_ERROR("Only supported 3x3 kernel"); + break; + } + + return out; +} + template SimpleTensor<float> winograd_input_transform(const SimpleTensor<float> &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims); template SimpleTensor<float> winograd_filter_transform(const SimpleTensor<float> &in, const TensorShape &output_shape); +template SimpleTensor<float> winograd_output_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Winograd.h b/tests/validation/reference/Winograd.h index ba8e5c1cb6..fa1a7f3f61 100644 --- a/tests/validation/reference/Winograd.h +++ b/tests/validation/reference/Winograd.h @@ -41,6 +41,9 @@ SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &src, const Tenso template <typename T> SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape); + +template <typename T> +SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles); } // namespace reference } // namespace validation } // namespace test |