diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/datasets/LargeConvolutionLayerDataset.h | 24 | ||||
-rw-r--r-- | tests/datasets/SmallConvolutionLayerDataset.h | 12 | ||||
-rw-r--r-- | tests/datasets/WinogradOutputTransformDataset.h | 153 | ||||
-rw-r--r-- | tests/validation/CL/Winograd.cpp | 179 | ||||
-rw-r--r-- | tests/validation/NEON/ConvolutionLayer.cpp | 6 | ||||
-rw-r--r-- | tests/validation/fixtures/WinogradLayerFixture.h | 120 | ||||
-rw-r--r-- | tests/validation/reference/ConvolutionLayer.cpp | 2 | ||||
-rw-r--r-- | tests/validation/reference/Winograd.cpp | 218 | ||||
-rw-r--r-- | tests/validation/reference/Winograd.h | 3 |
9 files changed, 637 insertions, 80 deletions
diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 086b2e3def..ec8e09fa81 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,28 @@ namespace test { namespace datasets { +class LargeWinogradConvolutionLayer3x3Dataset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer3x3Dataset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 256U), TensorShape(256U), TensorShape(51U, 56U, 256U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 3U, 256U, 256U), TensorShape(256U), TensorShape(56U, 54U, 256U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 512U), TensorShape(512U), TensorShape(26U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(12U, 12U, 512U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 3, 2 and 4 + add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(110U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 1)); + } +}; + class LargeConvolutionLayerDataset final : public ConvolutionLayerDataset { public: diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index adb61de8e2..696c396eef 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -37,10 +37,10 @@ namespace test { namespace datasets { -class SmallWinogradLayerDataset final : public ConvolutionLayerDataset +class SmallWinogradConvolutionLayer3x3Dataset final : public ConvolutionLayerDataset { public: - SmallWinogradLayerDataset() + SmallWinogradConvolutionLayer3x3Dataset() { // Kernel size 3 // Batch size 1 @@ -48,8 +48,14 @@ public: // Batch size 4 add_config(TensorShape(23U, 27U, 5U, 4U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(21U, 25U, 21U, 4U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(8U, 8U, 2U), TensorShape(3U, 3U, 2U, 1U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 1, 1)); + } +}; - // Kernel size 5 +class SmallWinogradConvolutionLayer5x5Dataset final : public ConvolutionLayerDataset +{ +public: + SmallWinogradConvolutionLayer5x5Dataset() + { add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U, 1U), TensorShape(1U), TensorShape(4U, 4U, 1U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 2, 2)); } diff --git a/tests/datasets/WinogradOutputTransformDataset.h b/tests/datasets/WinogradOutputTransformDataset.h new file mode 100644 index 0000000000..c42d6c8ebd --- /dev/null +++ b/tests/datasets/WinogradOutputTransformDataset.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_WINOGRAD_OUTPUT_TRANSFORM_DATASET +#define ARM_COMPUTE_TEST_WINOGRAD_OUTPUT_TRANSFORM_DATASET + +#include "utils/TypePrinter.h" + +#include "arm_compute/core/TensorShape.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class WinogradOutputTransformDataset +{ +public: + using type = std::tuple<TensorShape, Size2D, Size2D, Size2D, DataLayout>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator a_it, + std::vector<Size2D>::const_iterator b_it, + std::vector<Size2D>::const_iterator c_it, + std::vector<Size2D>::const_iterator d_it, + std::vector<DataLayout>::const_iterator data_layout_it) + : _a_it{ std::move(a_it) }, + _b_it{ std::move(b_it) }, + _c_it{ std::move(c_it) }, + _d_it{ std::move(d_it) }, + _data_layout_it{ std::move(data_layout_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "Input=" << *_a_it << ":"; + description << "KernelDims=" << *_b_it << ":"; + description << "OutputDims=" << *_c_it << ":"; + description << "NumTiles=" << *_d_it << ":"; + description << "DataLayout=" << *_data_layout_it; + return description.str(); + } + + WinogradOutputTransformDataset::type operator*() const + { + return std::make_tuple(*_a_it, *_b_it, *_c_it, *_d_it, *_data_layout_it); + } + + iterator &operator++() + { + ++_a_it; + ++_b_it; + ++_c_it; + ++_d_it; + ++_data_layout_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _a_it; + std::vector<Size2D>::const_iterator _b_it; + std::vector<Size2D>::const_iterator _c_it; + std::vector<Size2D>::const_iterator _d_it; + std::vector<DataLayout>::const_iterator _data_layout_it; + }; + + iterator begin() const + { + return iterator(_a_shapes.begin(), _b_dims.begin(), _c_dims.begin(), _d_dims.begin(), _data_layout.begin()); + } + + int size() const + { + return std::min(_a_shapes.size(), std::min(_b_dims.size(), std::min(_c_dims.size(), std::min(_d_dims.size(), _data_layout.size())))); + } + + void add_config(TensorShape a, Size2D b, Size2D c, Size2D d, DataLayout data_layout) + { + _a_shapes.emplace_back(std::move(a)); + _b_dims.emplace_back(std::move(b)); + _c_dims.emplace_back(std::move(c)); + _d_dims.emplace_back(std::move(d)); + _data_layout.emplace_back(std::move(data_layout)); + } + +protected: + WinogradOutputTransformDataset() = default; + WinogradOutputTransformDataset(WinogradOutputTransformDataset &&) = default; + +private: + std::vector<TensorShape> _a_shapes{}; + std::vector<Size2D> _b_dims{}; + std::vector<Size2D> _c_dims{}; + std::vector<Size2D> _d_dims{}; + std::vector<DataLayout> _data_layout{}; +}; + +class SmallWinogradOutputTransformDataset final : public WinogradOutputTransformDataset +{ +public: + SmallWinogradOutputTransformDataset() + { + add_config(TensorShape(24U, 49U, 16U), Size2D(3, 3), Size2D(14U, 14U), Size2D(7U, 7U), DataLayout::NCHW); + add_config(TensorShape(13U, 6U, 16U), Size2D(3, 3), Size2D(5U, 4U), Size2D(3U, 2U), DataLayout::NCHW); + add_config(TensorShape(7U, 20U, 16U), Size2D(3, 3), Size2D(8U, 9U), Size2D(4U, 5U), DataLayout::NCHW); + add_config(TensorShape(24U, 49U, 16U, 3U), Size2D(3, 3), Size2D(14U, 14U), Size2D(7U, 7U), DataLayout::NCHW); + add_config(TensorShape(13U, 6U, 16U, 2U), Size2D(3, 3), Size2D(5U, 4U), Size2D(3U, 2U), DataLayout::NCHW); + add_config(TensorShape(7U, 20U, 16U, 5U), Size2D(3, 3), Size2D(8U, 9U), Size2D(4U, 5U), DataLayout::NCHW); + } +}; + +class LargeWinogradOutputTransformDataset final : public WinogradOutputTransformDataset +{ +public: + LargeWinogradOutputTransformDataset() + { + add_config(TensorShape(128U, 3136U, 16U), Size2D(3, 3), Size2D(112U, 112U), Size2D(56U, 56U), DataLayout::NCHW); + add_config(TensorShape(256U, 784U, 16U), Size2D(3, 3), Size2D(55U, 55U), Size2D(28U, 28U), DataLayout::NCHW); + add_config(TensorShape(512U, 169U, 16U), Size2D(3, 3), Size2D(26U, 26U), Size2D(13U, 13U), DataLayout::NCHW); + add_config(TensorShape(128U, 3136U, 16U, 3U), Size2D(3, 3), Size2D(112U, 112U), Size2D(56U, 56U), DataLayout::NCHW); + add_config(TensorShape(256U, 784U, 16U, 2U), Size2D(3, 3), Size2D(55U, 55U), Size2D(28U, 28U), DataLayout::NCHW); + add_config(TensorShape(512U, 169U, 16U, 5U), Size2D(3, 3), Size2D(26U, 26U), Size2D(13U, 13U), DataLayout::NCHW); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_WINOGRAD_OUTPUT_TRANSFORM_DATASET */ diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 0b21ed2577..aa668fa575 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -22,17 +22,22 @@ * SOFTWARE. */ #include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" +#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" +#include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/ShapeDatasets.h" +#include "tests/datasets/SmallConvolutionLayerDataset.h" #include "tests/datasets/WinogradFilterTransformDataset.h" #include "tests/datasets/WinogradInputTransformDataset.h" +#include "tests/datasets/WinogradOutputTransformDataset.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -47,7 +52,7 @@ namespace validation { namespace { -constexpr AbsoluteTolerance<float> tolerance_f32(0.0001f); +constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); } // namespace using namespace arm_compute::misc::shape_calculator; @@ -65,9 +70,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32), // Kernel size not supported TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32), // Strides not supported - TensorInfo(TensorShape(53U, 33U, 4U), 1, DataType::F32), // valid - TensorInfo(TensorShape(34U, 42U, 7U, 3U), 1, DataType::F32), // valid - TensorInfo(TensorShape(31U, 37U, 37U), 1, DataType::F32) // valid + TensorInfo(TensorShape(53U, 33U, 4U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(34U, 42U, 7U, 3U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(31U, 37U, 37U), 1, DataType::F32) // Padding needed }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::F16), @@ -96,7 +101,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( Size2D(3U, 3U), Size2D(3U, 3U) })), - framework::dataset::make("Expected", { false, false, false, false, true, true, true })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false })), input_info, output_info, conv_info, kernel_dims, expected) { ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, kernel_dims)) == expected, framework::LogLevel::ERRORS); @@ -203,8 +208,172 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixture, framework::Da // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } + TEST_SUITE_END() // FilterTransform +TEST_SUITE(OutputTransform) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo",{ + TensorInfo(TensorShape(24U, 49U, 16U, 5U), 1, DataType::F16), // F16 not supported + TensorInfo(TensorShape(128U, 3136U, 16U, 5U), 1, DataType::QASYMM8), // QASYMM8 not supported + TensorInfo(TensorShape(256U, 784U, 16U, 5U), 1, DataType::F32), // Kernel size not supported + TensorInfo(TensorShape(512U, 169U, 16U, 5U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(13U, 6U, 16U, 4U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(7U, 16U, 16U, 7U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(1U, 442U, 16U, 37U), 1, DataType::F32) // Wrong number of tiles + }), + framework::dataset::make("BiasInfo", { + TensorInfo(TensorShape(24U), 1, DataType::F16), + TensorInfo(TensorShape(128U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(256U), 1, DataType::F32), + TensorInfo(TensorShape(512U), 1, DataType::F32), + TensorInfo(TensorShape(13U), 1, DataType::F32), + TensorInfo(TensorShape(7U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { + TensorInfo(TensorShape(14U, 14U, 24U, 5U), 1, DataType::F16), + TensorInfo(TensorShape(112U, 112U, 128U, 5U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(55U, 55U, 256U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(26U, 26U, 512U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 4U, 13U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 8U, 7U, 7U), 1, DataType::F32), + TensorInfo(TensorShape(51U, 33U, 1U, 37U), 1, DataType::F32) + })), + framework::dataset::make("KernelDims", { + Size2D(3U, 3U), + Size2D(3U, 3U), + Size2D(5U, 5U), + Size2D(3U, 3U), + Size2D(3U, 3U), + Size2D(3U, 3U), + Size2D(3U, 3U) + })), + framework::dataset::make("OutputDims", { + Size2D(14U, 14U), + Size2D(112U, 112U), + Size2D(55U, 55U), + Size2D(26U, 26U), + Size2D(5U, 4U), + Size2D(8U, 8U), + Size2D(51U, 33U) + })), + framework::dataset::make("NumTiles", { + Size2D(7U, 7U), + Size2D(56U, 56U), + Size2D(28U, 28U), + Size2D(13U, 13U), + Size2D(3U, 2U), + Size2D(4U, 4U), + Size2D(26U, 16U) + })), + framework::dataset::make("Expected", { false, false, false, true, false, true, false })), + input_info, bias_info, output_info, kernel_dims, output_dims, num_tiles, expected) +{ + ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), kernel_dims, output_dims, num_tiles)) == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +using CLWinogradOutputTransform = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradOutputTransformKernel, 0>; +using CLWinogradOutputTransformFixture = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, float>; + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallWinogradOutputTransformDataset(), datasets::LargeWinogradOutputTransformDataset()), + framework::dataset::make("DataType", { DataType::F32 })), + shape_a, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type) +{ + TensorShape shape_b = compute_winograd_output_transform_shape(TensorInfo(shape_a, 1, data_type), output_convolved_dims, data_layout); + + // Create tensors + CLTensor a = create_tensor<CLTensor>(shape_a, data_type); + CLTensor b = create_tensor<CLTensor>(shape_b, data_type); + + ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLWinogradOutputTransform winograd_output_transform; + winograd_output_transform.configure(&a, nullptr, &b, kernel_dims, output_convolved_dims, num_tiles); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixture, framework::DatasetMode::ALL, combine(datasets::SmallWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() // OutputTransform + +TEST_SUITE(ConvolutionLayer) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // FP16 not supported + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch + TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported + TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported + }), + framework::dataset::make("WeightsInfo", { + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) + })), + framework::dataset::make("BiasesInfo", { + TensorInfo(TensorShape(19U), 1, DataType::F32), + TensorInfo(TensorShape(19U), 1, DataType::F32), + TensorInfo(TensorShape(21U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { + TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) + })), + framework::dataset::make("ConvInfo", { + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 2, 0, 0), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 0) + })), + framework::dataset::make("Expected", { false, false, false, false, false })), + input_info, weights_info, bias_info, output_info, conv_info, expected) +{ + ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +using CLWinogradConvolutionLayerFixture = WinogradConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>; +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // ConvolutionLayer + TEST_SUITE_END() // Winograd TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 59db279ac7..34306b381c 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -109,10 +109,12 @@ TEST_SUITE_END() TEST_SUITE(WinogradLayer) template <typename T> -using NEWinogradLayerFixture = WinogradLayerValidationFixture<Tensor, Accessor, NEWinogradLayer, T>; +using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerValidationFixture<Tensor, Accessor, NEWinogradLayer, T>; TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradLayerFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallWinogradLayerDataset()) +FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + datasets::SmallWinogradConvolutionLayer5x5Dataset()), + framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h index bfe1efce3b..9811c28008 100644 --- a/tests/validation/fixtures/WinogradLayerFixture.h +++ b/tests/validation/fixtures/WinogradLayerFixture.h @@ -48,14 +48,14 @@ namespace validation using namespace arm_compute::misc::shape_calculator; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class WinogradLayerValidationFixture : public framework::Fixture +class WinogradConvolutionLayerValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, DataType data_type) { - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type); } protected: @@ -79,13 +79,14 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type) { // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, DataType::F32, 1); - TensorType weights = create_tensor<TensorType>(weights_shape, DataType::F32, 1); - TensorType bias = create_tensor<TensorType>(bias_shape, DataType::F32, 1); - TensorType dst = create_tensor<TensorType>(output_shape, DataType::F32, 1); + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1); + TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1); + TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1); // Create and configure function FunctionType conv; @@ -111,20 +112,20 @@ protected: fill(AccessorType(src), 0, -1.f, 1.f); fill(AccessorType(weights), 1, -1.f, 1.f); fill(AccessorType(bias), 2, -1.f, 1.f); - fill(AccessorType(dst), 3, -1.f, 1.f); - // Compute NEWinogradLayer function + // Compute Winograd Convolution function conv.run(); return dst; } - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info) + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type) { // Create reference - SimpleTensor<T> src{ input_shape, DataType::F32, 1 }; - SimpleTensor<T> weights{ weights_shape, DataType::F32, 1 }; - SimpleTensor<T> bias{ bias_shape, DataType::F32, 1 }; + SimpleTensor<T> src{ input_shape, data_type, 1 }; + SimpleTensor<T> weights{ weights_shape, data_type, 1 }; + SimpleTensor<T> bias{ bias_shape, data_type, 1 }; // Fill reference fill(src, 0, -1.f, 1.f); @@ -136,8 +137,6 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; - int _fractional_bits{}; - DataType _data_type{}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> @@ -178,7 +177,6 @@ protected: { ARM_COMPUTE_UNUSED(is_nchw_format); - // Create tensors TensorType src = create_tensor<TensorType>(input_shape, data_type); TensorType dst = create_tensor<TensorType>(output_shape, data_type); @@ -261,8 +259,8 @@ protected: ARM_COMPUTE_UNUSED(is_nchw_format); // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type); - TensorType dst = create_tensor<TensorType>(output_shape, data_type); + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1); // Create and configure function FunctionType filter_transform; @@ -288,7 +286,7 @@ protected: SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, bool is_nchw_format, DataType data_type) { - ARM_COMPUTE_ERROR_ON(!is_nchw_format); + ARM_COMPUTE_UNUSED(is_nchw_format); // Create reference SimpleTensor<T> src{ input_shape, data_type, 1 }; @@ -302,6 +300,86 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; }; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class WinogradOutputTransformValidationFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(TensorShape input_shape, Size2D kernel_dims, Size2D output_convolved_dims, Size2D num_tiles, DataLayout data_layout, DataType data_type) + { + TensorShape output_shape = compute_winograd_output_transform_shape(TensorInfo(input_shape, 1, data_type), output_convolved_dims, data_layout); + + _target = compute_target(input_shape, output_shape, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type); + _reference = compute_reference(input_shape, output_shape, kernel_dims, output_convolved_dims, num_tiles, data_layout, data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, float min, float max) + { + switch(tensor.data_type()) + { + case DataType::F32: + { + std::uniform_real_distribution<> distribution(min, max); + library->fill(tensor, distribution, i); + break; + } + default: + { + ARM_COMPUTE_ERROR("Not supported"); + library->fill_tensor_uniform(tensor, i); + break; + } + } + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &output_convolved_dims, Size2D &num_tiles, DataLayout data_layout, + DataType data_type) + { + // Create tensors + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + + // Create and configure function + FunctionType output_transform; + output_transform.configure(&src, nullptr, &dst, kernel_dims, output_convolved_dims, num_tiles); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0, -1.f, 1.f); + + output_transform.run(); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &output_convolved_dims, Size2D &num_tiles, + DataLayout data_layout, + DataType data_type) + { + // Create reference + SimpleTensor<T> src{ input_shape, data_type, 1, 0, QuantizationInfo(), data_layout }; + + // Fill reference + fill(src, 0, -1.f, 1.f); + + return reference::winograd_output_transform<T>(src, output_shape, kernel_dims, num_tiles); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp index 24bbf32a30..f3db274935 100644 --- a/tests/validation/reference/ConvolutionLayer.cpp +++ b/tests/validation/reference/ConvolutionLayer.cpp @@ -118,4 +118,4 @@ template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &sr } // namespace reference } // namespace validation } // namespace test -} // namespace arm_compute +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp index 3ed55fb9fc..c760663b22 100644 --- a/tests/validation/reference/Winograd.cpp +++ b/tests/validation/reference/Winograd.cpp @@ -39,6 +39,87 @@ namespace reference namespace { template <typename T> +void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out) +{ + // Simple tensor for the 3x3 input tile + SimpleTensor<T> input_tile{ TensorShape(3u, 3u), in.data_type(), 1 }; + + // Simple tensor for the transformation matrix + SimpleTensor<T> trans_matrix{ TensorShape(3u, 4u), in.data_type(), 1 }; + + // Simple tensor for the transformation matrix transpose + SimpleTensor<T> trans_matrix_transposed{ TensorShape(4u, 3u), in.data_type(), 1 }; + + // Simple tensor for the 4x3 temporary tile + SimpleTensor<T> tmp_tile{ TensorShape(3u, 4u), in.data_type(), 1 }; + + // Simple tensor for the 4x4 output tile + SimpleTensor<T> output_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; + + // Initialize transformation matrix + // 1 | 0 | 0 + // 0.5 | 0.5 | 0.5 + // 0.5 |-0.5 | 0.5 + // 0 | 0 | 1 + trans_matrix[0 + 0 * 3] = 1.0f; + trans_matrix[1 + 0 * 3] = 0.0f; + trans_matrix[2 + 0 * 3] = 0.0f; + trans_matrix[0 + 1 * 3] = 0.5f; + trans_matrix[1 + 1 * 3] = 0.5f; + trans_matrix[2 + 1 * 3] = 0.5f; + trans_matrix[0 + 2 * 3] = 0.5f; + trans_matrix[1 + 2 * 3] = -0.5f; + trans_matrix[2 + 2 * 3] = 0.5f; + trans_matrix[0 + 3 * 3] = 0.0f; + trans_matrix[1 + 3 * 3] = 0.0f; + trans_matrix[2 + 3 * 3] = 1.0f; + + // Transpose the transformation matrix + transpose_matrix(trans_matrix, trans_matrix_transposed); + + const int num_channels = in.shape()[2]; + const int num_filters = in.shape()[3]; + const int num_batches = in.shape().total_size() / (9 * num_channels * num_filters); + + for(int n = 0; n < num_batches; ++n) + { + for(int w = 0; w < num_filters; ++w) + { + for(int z = 0; z < num_channels; ++z) + { + // Load the 3x3 tile from the input tensor + get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + + // First transformation + matrix_multiply(trans_matrix, input_tile, tmp_tile); + + // Second transformation + matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); + + // Store the 4x4 output tile across the 16 channels + const int output_offset = w + z * num_filters; + out[output_offset + 0 * num_filters * num_channels] = output_tile[0 + 0 * 4]; + out[output_offset + 1 * num_filters * num_channels] = output_tile[1 + 0 * 4]; + out[output_offset + 2 * num_filters * num_channels] = output_tile[2 + 0 * 4]; + out[output_offset + 3 * num_filters * num_channels] = output_tile[3 + 0 * 4]; + out[output_offset + 4 * num_filters * num_channels] = output_tile[0 + 1 * 4]; + out[output_offset + 5 * num_filters * num_channels] = output_tile[1 + 1 * 4]; + out[output_offset + 6 * num_filters * num_channels] = output_tile[2 + 1 * 4]; + out[output_offset + 7 * num_filters * num_channels] = output_tile[3 + 1 * 4]; + out[output_offset + 8 * num_filters * num_channels] = output_tile[0 + 2 * 4]; + out[output_offset + 9 * num_filters * num_channels] = output_tile[1 + 2 * 4]; + out[output_offset + 10 * num_filters * num_channels] = output_tile[2 + 2 * 4]; + out[output_offset + 11 * num_filters * num_channels] = output_tile[3 + 2 * 4]; + out[output_offset + 12 * num_filters * num_channels] = output_tile[0 + 3 * 4]; + out[output_offset + 13 * num_filters * num_channels] = output_tile[1 + 3 * 4]; + out[output_offset + 14 * num_filters * num_channels] = output_tile[2 + 3 * 4]; + out[output_offset + 15 * num_filters * num_channels] = output_tile[3 + 3 * 4]; + } + } + } +} + +template <typename T> void winograd_input_transform3x3(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const PadStrideInfo &conv_info) { TensorShape shape4x4(4u, 4u); @@ -112,56 +193,70 @@ void winograd_input_transform3x3(const SimpleTensor<T> &src, SimpleTensor<T> &ds } template <typename T> -void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out) +void winograd_output_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out, int num_tiles_x) { + ARM_COMPUTE_ERROR_ON(in.shape()[2] != 16); + ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]); + // Simple tensor for the 3x3 input tile - SimpleTensor<T> input_tile{ TensorShape(3u, 3u), in.data_type(), 1 }; + SimpleTensor<T> input_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; // Simple tensor for the transformation matrix - SimpleTensor<T> trans_matrix{ TensorShape(3u, 4u), in.data_type(), 1 }; + SimpleTensor<T> trans_matrix{ TensorShape(4u, 2u), in.data_type(), 1 }; // Simple tensor for the transformation matrix transpose - SimpleTensor<T> trans_matrix_transposed{ TensorShape(4u, 3u), in.data_type(), 1 }; + SimpleTensor<T> trans_matrix_transposed{ TensorShape(2u, 4u), in.data_type(), 1 }; // Simple tensor for the 4x3 temporary tile - SimpleTensor<T> tmp_tile{ TensorShape(3u, 4u), in.data_type(), 1 }; + SimpleTensor<T> tmp_tile{ TensorShape(4u, 2u), in.data_type(), 1 }; // Simple tensor for the 4x4 output tile - SimpleTensor<T> output_tile{ TensorShape(4u, 4u), in.data_type(), 1 }; + SimpleTensor<T> output_tile{ TensorShape(2u, 2u), in.data_type(), 1 }; // Initialize transformation matrix - // 1 | 0 | 0 - // 0.5 | 0.5 | 0.5 - // 0.5 |-0.5 | 0.5 - // 0 | 0 | 1 - trans_matrix[0 + 0 * 3] = 1.0f; - trans_matrix[1 + 0 * 3] = 0.0f; - trans_matrix[2 + 0 * 3] = 0.0f; - trans_matrix[0 + 1 * 3] = 0.5f; - trans_matrix[1 + 1 * 3] = 0.5f; - trans_matrix[2 + 1 * 3] = 0.5f; - trans_matrix[0 + 2 * 3] = 0.5f; - trans_matrix[1 + 2 * 3] = -0.5f; - trans_matrix[2 + 2 * 3] = 0.5f; - trans_matrix[0 + 3 * 3] = 0.0f; - trans_matrix[1 + 3 * 3] = 0.0f; - trans_matrix[2 + 3 * 3] = 1.0f; + // 1 | 1 | 1 | 1 + // 0 | 1 | -1 | -1 + trans_matrix[0 + 0 * 4] = 1.0f; + trans_matrix[1 + 0 * 4] = 1.0f; + trans_matrix[2 + 0 * 4] = 1.0f; + trans_matrix[3 + 0 * 4] = 0.0f; + trans_matrix[0 + 1 * 4] = 0.0f; + trans_matrix[1 + 1 * 4] = 1.0f; + trans_matrix[2 + 1 * 4] = -1.0f; + trans_matrix[3 + 1 * 4] = -1.0f; // Transpose the transformation matrix transpose_matrix(trans_matrix, trans_matrix_transposed); - const int num_channels = in.shape()[2]; - const int num_filters = in.shape()[3]; - const int num_batches = in.shape().total_size() / (9 * num_channels * num_filters); + const int w_in = in.shape()[0]; + const int h_in = in.shape()[1]; + const int c_in = in.shape()[2]; + const int w_out = out.shape()[0]; + const int h_out = out.shape()[1]; + const int c_out = out.shape()[2]; + const int num_batches = in.shape().total_size() / (w_in * h_in * c_in); + + // Input strides + const int stridey_in = w_in; + const int stridez_in = stridey_in * h_in; + const int stridew_in = stridez_in * c_in; + + // Output strides + const int stridey_out = w_out; + const int stridez_out = stridey_out * h_out; + const int stridew_out = stridez_out * c_out; for(int n = 0; n < num_batches; ++n) { - for(int w = 0; w < num_filters; ++w) + for(int y = 0; y < h_in; ++y) { - for(int z = 0; z < num_channels; ++z) + for(int x = 0; x < w_in; ++x) { - // Load the 3x3 tile from the input tensor - get_tile(in, input_tile, Coordinates(0, 0, z, w, n)); + // Load the 4x4 tile across the 16 channels of the input tensor + for(int z = 0; z < c_in; ++z) + { + input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)]; + } // First transformation matrix_multiply(trans_matrix, input_tile, tmp_tile); @@ -169,24 +264,29 @@ void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &ou // Second transformation matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile); - // Store the 4x4 output tile across the 16 channels - const int output_offset = w + z * num_filters; - out[output_offset + 0 * num_filters * num_channels] = output_tile[0 + 0 * 4]; - out[output_offset + 1 * num_filters * num_channels] = output_tile[1 + 0 * 4]; - out[output_offset + 2 * num_filters * num_channels] = output_tile[2 + 0 * 4]; - out[output_offset + 3 * num_filters * num_channels] = output_tile[3 + 0 * 4]; - out[output_offset + 4 * num_filters * num_channels] = output_tile[0 + 1 * 4]; - out[output_offset + 5 * num_filters * num_channels] = output_tile[1 + 1 * 4]; - out[output_offset + 6 * num_filters * num_channels] = output_tile[2 + 1 * 4]; - out[output_offset + 7 * num_filters * num_channels] = output_tile[3 + 1 * 4]; - out[output_offset + 8 * num_filters * num_channels] = output_tile[0 + 2 * 4]; - out[output_offset + 9 * num_filters * num_channels] = output_tile[1 + 2 * 4]; - out[output_offset + 10 * num_filters * num_channels] = output_tile[2 + 2 * 4]; - out[output_offset + 11 * num_filters * num_channels] = output_tile[3 + 2 * 4]; - out[output_offset + 12 * num_filters * num_channels] = output_tile[0 + 3 * 4]; - out[output_offset + 13 * num_filters * num_channels] = output_tile[1 + 3 * 4]; - out[output_offset + 14 * num_filters * num_channels] = output_tile[2 + 3 * 4]; - out[output_offset + 15 * num_filters * num_channels] = output_tile[3 + 3 * 4]; + // Store the 2x2 output tile + const int xo = (y % num_tiles_x) * 2; + const int yo = (y / num_tiles_x) * 2; + const int zo = x; + + const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out); + out[output_offset + 0 * stridey_out + 0] = output_tile[0 + 0 * 2]; + + // Check out-of-bound writes + if(xo + 1 < w_out) + { + out[output_offset + 0 * stridey_out + 1] = output_tile[1 + 0 * 2]; + } + + if(yo + 1 < h_out) + { + out[output_offset + 1 * stridey_out + 0] = output_tile[0 + 1 * 2]; + } + + if((yo + 1 < h_out) && (xo + 1 < w_out)) + { + out[output_offset + 1 * stridey_out + 1] = output_tile[1 + 1 * 2]; + } } } } @@ -234,8 +334,32 @@ SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const Tenso return out; } +template <typename T> +SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles) +{ + ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format"); + ARM_COMPUTE_ERROR_ON(kernel_dims.width != kernel_dims.height); + ARM_COMPUTE_ERROR_ON(in.shape()[1] != num_tiles.area()); + + // Create reference + SimpleTensor<T> out{ output_shape, in.data_type(), 1 }; + + switch(kernel_dims.width) + { + case 3: + winograd_output_transform3x3(in, out, num_tiles.width); + break; + default: + ARM_COMPUTE_ERROR("Only supported 3x3 kernel"); + break; + } + + return out; +} + template SimpleTensor<float> winograd_input_transform(const SimpleTensor<float> &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims); template SimpleTensor<float> winograd_filter_transform(const SimpleTensor<float> &in, const TensorShape &output_shape); +template SimpleTensor<float> winograd_output_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Winograd.h b/tests/validation/reference/Winograd.h index ba8e5c1cb6..fa1a7f3f61 100644 --- a/tests/validation/reference/Winograd.h +++ b/tests/validation/reference/Winograd.h @@ -41,6 +41,9 @@ SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &src, const Tenso template <typename T> SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape); + +template <typename T> +SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles); } // namespace reference } // namespace validation } // namespace test |