From 7dc0234331f2150a6b4ac5c2b49de419870f7cf5 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Mon, 21 Nov 2022 21:46:50 +0000 Subject: Implement FP32/16 Depthwise Conv2d operator in dynamic fusion This patch adds Depthwise Conv2d operator into dynamic fusion interface and adds the associated tests. Resolves: COMPMID-5517 Change-Id: I385c94dff7fd40c72b8337ef797e508df4499a82 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8678 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Reviewed-by: Gian Marco Iodice Benchmark: Arm Jenkins --- .../dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp | 477 +++++++++++++++++++++ .../dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h | 222 ++++++++++ 2 files changed, 699 insertions(+) create mode 100644 tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp create mode 100644 tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h (limited to 'tests/validation') diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp new file mode 100644 index 0000000000..f08cc60ea2 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/DepthwiseConvolutionLayerDataset.h" +#include "tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1U, 4U }); +const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 8 }); + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(DEPTHWISE_CONV2D) + +RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float tolerance_num = 0.02f; /**< Tolerance number */ + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type input/weights + TensorInfo(TensorShape(3U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching input feature maps + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching depth multiplier + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases size + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases dimensions + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid output size + TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // patch size bigger than input width + TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // dilation < 1 + TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U32, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S32, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), // weight dimension > 3 + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + }), + framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U, 5U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 4U, 3U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NCHW), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QSYMM16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QSYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QASYMM16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::U8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::S8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::U16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::S16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::U32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(24U, 32U, 11U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 32U, 11U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 33U, 14U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 17U, 5U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 15U, 4U, 4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("Padding", { Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(2, 1, 2, 1), + Padding2D(2, 1, 2, 1), + Padding2D(2, 1, 2, 1), + })), + framework::dataset::make("Stride", { Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(2, 3), + Size2D(2, 3), + })), + framework::dataset::make("DepthMultiplier", { 1, + 1, + 3, + 1, + 1, + 1, + 2, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + })), + framework::dataset::make("Dilation", { Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(20U, 1U), + Size2D(0U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(2U, 3U), + })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true, false, + false, false, false, false, false, false, false, false, false, false, + false, false, true, false, true, true, true })), + input_info, weights_info, biases_info, output_info, padding, stride, depth_multiplier, dilation, expected) +{ + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); + const TensorInfo sketch_weights_info = sketch.create_tensor_info(weights_info); + const TensorInfo sketch_biases_info = sketch.create_tensor_info(biases_info); + const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + + DepthwiseConv2dAttributes attributes {}; + attributes.pad(padding) + .stride(stride) + .dilation(dilation) + .depth_multiplier(depth_multiplier); + + const Status status = GpuDepthwiseConv2d::validate_op(sketch, &sketch_input_info, &sketch_weights_info, &sketch_biases_info, &sketch_output_info, attributes); + const bool res = bool(status); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template +using DynamicFusionGpuDepthwiseConv2dFixture = DynamicFusionGpuDepthwiseConv2dValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE(Dilation) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +TEST_SUITE(Dilation) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // Generic +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(Dilation) + +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeKernelSize, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(), + framework::dataset::make("DepthMultiplier", { 1 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(Dilation) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDepthwiseConv2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // Generic +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float +TEST_SUITE_END() // DEPTHWISE_CONV2D +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h new file mode 100644 index 0000000000..c7600e082e --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE +#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" + +#include "tests/CL/CLAccessor.h" + +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" + +#include "tests/validation/Validation.h" +#include "tests/validation/reference/DepthwiseConvolutionLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional < std::is_same::type, uint8_t>::value + || std::is_same::type, int8_t>::value, + int32_t, T >::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + template + void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &pad_stride, const Size2D &dilation, + const unsigned int depth_multiplier, const DataType data_type, const DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout + + DepthwiseConv2dAttributes dwc_conv2d_attr; + const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), pad_stride.pad_bottom()); + dwc_conv2d_attr.pad(padding_2d) + .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) + .dilation(dilation) + .depth_multiplier(depth_multiplier) + .dimension_rounding_type(pad_stride.round()); + + // Calculate Output and Weight Shapes + TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(input_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + + const ConvolutionInfo info{ pad_stride, depth_multiplier, ActivationLayerInfo(), dilation }; + const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); + + weights_shape.set(2, output_shape.z()); + const TensorShape bias_shape = TensorShape(weights_shape[2]); + + _data_type = data_type; + _data_layout = data_layout; + _target = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr); + } + +protected: + template + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, const DepthwiseConv2dAttributes dwc_conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + + // Our test shapes are assumed in NCHW data layout, thus the permutation + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Create sketch tensors + auto input_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + auto weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + auto bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + auto dst_info = sketch.create_tensor_info(); + FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, &dst_info, dwc_conv2d_attr); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for(auto &data : runtime.get_auxiliary_tensors()) + { + auto tensor = data.first; + const auto aux_mem_req = data.second; + tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment); + tensor->allocator()->allocate(); + } + + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(input_info); + t_weight.allocator()->init(weight_info); + t_bias.allocator()->init(bias_info); + t_dst.allocator()->init(dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); + return t_dst; + } + + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, + const TensorShape &output_shape, DepthwiseConv2dAttributes dwc_conv2d_attr) + { + // Create reference + SimpleTensor src{ input_shape, _data_type, 1 }; + SimpleTensor weight{ weights_shape, _data_type, 1 }; + SimpleTensor bias{ bias_shape, _data_type, 1 }; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, dwc_conv2d_attr.pad().top, + dwc_conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; +}; + +template +class DynamicFusionGpuDepthwiseConv2dValidationFixture : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture +{ +public: + template + void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &info, const Size2D &dilation, const unsigned int depth_multiplier, DataType data_type, DataLayout data_layout) + { + DynamicFusionGpuDepthwiseConv2dValidationGenericFixture::setup(input_shape, kernel_size, info, dilation, + depth_multiplier, data_type, data_layout); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE */ -- cgit v1.2.1