From 404462af4ca002ece819161a03a4bdb19a87abf2 Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Tue, 8 Nov 2022 02:14:46 +0000 Subject: Adding GpuAdd to dynamic fusion operators - Provide support for Add operator - Auto initialize the destination tensor before testing fusion in conv2d and elementwise binary ops. Resolves: COMPMID-5518 Signed-off-by: Ramy Elgammal Change-Id: Ibd815020f02b57f88eea7c2921bdcf98605d99c5 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8617 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- .../validation/dynamic_fusion/gpu/Integration.cpp | 10 - tests/validation/dynamic_fusion/gpu/cl/Add.cpp | 267 +++++++++++++++++++++ .../dynamic_fusion/gpu/cl/DirectConv2d.cpp | 25 +- 3 files changed, 269 insertions(+), 33 deletions(-) create mode 100644 tests/validation/dynamic_fusion/gpu/cl/Add.cpp (limited to 'tests/validation/dynamic_fusion/gpu') diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index 036f28b29f..0b81dac1f0 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -28,24 +28,14 @@ #include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/gpu/cl/operators/ClAdd.h" -#include "src/gpu/cl/operators/ClConv2d.h" #include "tests/CL/CLAccessor.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/validation/Validation.h" #include "tests/validation/dynamic_fusion/Utils.h" #include "tests/validation/reference/ConvolutionLayer.h" -#include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/Permute.h" -#ifdef ARM_COMPUTE_ASSERTS_ENABLED -#include "tests/SimpleTensorPrinter.h" -#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ - using namespace arm_compute::experimental::dynamic_fusion; using namespace arm_compute::test::validation::utils; diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp new file mode 100644 index 0000000000..3743fbb664 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h" +#include "tests/validation/reference/ElementwiseOperations.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(ADD) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Add + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching not supported + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching not supported + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, false, true, true, false, false, true, false})), + input1_info, input2_info, output_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse Elementwise Add + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + auto dst_info = sketch.create_tensor_info(output_info); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &dst_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(ValidateRhsInplace, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for rhs + })), + framework::dataset::make("Expected", { true, false})), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse Elementwise Add + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(ValidateLhsInplace, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + })), + framework::dataset::make("Expected", { false, true})), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse Elementwise Add + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &lhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float tolerance_num = 0.01f; /**< Tolerance number */ + +template +using DynamicFusionAddOpFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture; + +template +using DynamicFusionAddOpBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture; + +template +using DynamicFusionGpuFuseTwoAddOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture; + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine( + framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, DynamicFusionAddOpFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine( + framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::LargeShapesNoBatches()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, DynamicFusionAddOpBroadcastFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedLargeShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, DynamicFusionGpuFuseTwoAddOpsFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::S32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionAddOpFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::LargeShapesNoBatches()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::U8 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE_END() // ADD +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp index 1f9319b10f..bfb9735599 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp @@ -22,21 +22,8 @@ * SOFTWARE. */ -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" - #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -46,12 +33,6 @@ #include "tests/datasets/SmallConvolutionLayerDataset.h" #include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h" -#ifdef ARM_COMPUTE_ASSERTS_ENABLED -#include "tests/SimpleTensorPrinter.h" -#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" namespace arm_compute { namespace test @@ -60,7 +41,7 @@ namespace validation { TEST_SUITE(CL) TEST_SUITE(DYNAMIC_FUSION) -TEST_SUITE(GPU_CONV2D) +TEST_SUITE(CONV2D) RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ RelativeTolerance tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ @@ -79,7 +60,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture, framework } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("DataType", DataType::F16)), @@ -90,9 +70,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture, framework: validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -TEST_SUITE_END() // GPU_CONV2D +TEST_SUITE_END() // CONV2D TEST_SUITE_END() // DYNAMIC_FUSION TEST_SUITE_END() // CL } // namespace validation -- cgit v1.2.1