diff options
author | Ramy Elgammal <ramy.elgammal@arm.com> | 2022-11-08 02:14:46 +0000 |
---|---|---|
committer | Ramy Elgammal <ramy.elgammal@arm.com> | 2022-11-29 10:24:59 +0000 |
commit | 404462af4ca002ece819161a03a4bdb19a87abf2 (patch) | |
tree | 09cf812530afcbe3fc524ce7eded5f06129e0889 /tests | |
parent | 03b2971ac69a86f10a1566938d1a25afee15746c (diff) | |
download | ComputeLibrary-404462af4ca002ece819161a03a4bdb19a87abf2.tar.gz |
Adding GpuAdd to dynamic fusion operators
- Provide support for Add operator
- Auto initialize the destination tensor before testing fusion in conv2d
and elementwise binary ops.
Resolves: COMPMID-5518
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: Ibd815020f02b57f88eea7c2921bdcf98605d99c5
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8617
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/datasets/DynamicFusionDataset.h | 126 | ||||
-rw-r--r-- | tests/datasets/ShapeDatasets.h | 70 | ||||
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/Integration.cpp | 10 | ||||
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/cl/Add.cpp | 267 | ||||
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp | 25 | ||||
-rw-r--r-- | tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h | 36 | ||||
-rw-r--r-- | tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h | 284 |
7 files changed, 761 insertions, 57 deletions
diff --git a/tests/datasets/DynamicFusionDataset.h b/tests/datasets/DynamicFusionDataset.h new file mode 100644 index 0000000000..5a1453b9ab --- /dev/null +++ b/tests/datasets/DynamicFusionDataset.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TESTS_DATASETS_DYNAMICFUSIONDATASET +#define TESTS_DATASETS_DYNAMICFUSIONDATASET + +#include "utils/TypePrinter.h" + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class DynamicFusionThreeInputs +{ +public: + using type = std::tuple<TensorShape, TensorShape, TensorShape>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator shape0_it, + std::vector<TensorShape>::const_iterator shape1_it, + std::vector<TensorShape>::const_iterator shape2_it) + : _shape0_it{ std::move(shape0_it) }, + _shape1_it{ std::move(shape1_it) }, + _shape2_it{ std::move(shape2_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "shape0=" << *_shape0_it << ":"; + description << "shape1=" << *_shape1_it << ":"; + description << "shape2=" << *_shape2_it << ":"; + + return description.str(); + } + + DynamicFusionThreeInputs::type operator*() const + { + return std::make_tuple(*_shape0_it, *_shape1_it, *_shape2_it); + } + + iterator &operator++() + { + ++_shape0_it; + ++_shape1_it; + ++_shape2_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _shape0_it; + std::vector<TensorShape>::const_iterator _shape1_it; + std::vector<TensorShape>::const_iterator _shape2_it; + }; + + iterator begin() const + { + return iterator(_shape0_shapes.begin(), _shape1_shapes.begin(), _shape2_shapes.begin()); + } + + int size() const + { + return std::min(_shape0_shapes.size(), std::min(_shape1_shapes.size(), _shape2_shapes.size())); + } + + void add_config(TensorShape shape0, TensorShape shape1, TensorShape shape2) + { + _shape0_shapes.emplace_back(std::move(shape0)); + _shape1_shapes.emplace_back(std::move(shape1)); + _shape2_shapes.emplace_back(std::move(shape2)); + } + +protected: + DynamicFusionThreeInputs() = default; + DynamicFusionThreeInputs(DynamicFusionThreeInputs &&) = default; + +private: + std::vector<TensorShape> _shape0_shapes{}; + std::vector<TensorShape> _shape1_shapes{}; + std::vector<TensorShape> _shape2_shapes{}; +}; + +class DynamicFusionElementwiseBinaryTwoOpsSmallShapes final : public DynamicFusionThreeInputs +{ +public: + DynamicFusionElementwiseBinaryTwoOpsSmallShapes() + { + add_config(TensorShape{ 9U, 9U, 5U }, TensorShape{ 9U, 9U, 5U }, TensorShape{ 9U, 9U, 5U }); + add_config(TensorShape{ 9U, 9U, 5U }, TensorShape{ 1U, 1U, 1U } /* Broadcast in X, Y, Z*/, TensorShape{ 9U, 9U, 5U }); + add_config(TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 1U, 1U } /* Broadcast in Y and Z*/, TensorShape{ 27U, 13U, 2U }); + add_config(TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 1U, 1U } /* Broadcast in Y and Z*/); + } +}; + +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* TESTS_DATASETS_DYNAMICFUSIONDATASET */ diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index e4277a981e..047457c99e 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -212,6 +212,25 @@ public: } }; +/** Data set containing small tensor shapes. */ +class SmallShapesNoBatches final : public ShapeDataset +{ +public: + SmallShapesNoBatches() + : ShapeDataset("Shape", + { + // Batch size 1 + TensorShape{ 3U, 11U }, + TensorShape{ 1U, 16U }, + TensorShape{ 27U, 13U, 7U }, + TensorShape{ 7U, 7U, 17U }, + TensorShape{ 33U, 13U, 2U }, + TensorShape{ 11U, 11U, 3U } + }) + { + } +}; + /** Data set containing pairs of tiny tensor shapes that are broadcast compatible. */ class TinyShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> { @@ -282,6 +301,44 @@ public: } }; +class TemporaryLimitedSmallShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> +{ +public: + TemporaryLimitedSmallShapesBroadcast() + : ZipDataset<ShapeDataset, ShapeDataset>( + ShapeDataset("Shape0", + { + TensorShape{ 9U, 9U, 5U }, + TensorShape{ 27U, 13U, 2U }, + }), + ShapeDataset("Shape1", + { + TensorShape{ 1U, 1U, 1U }, // Broadcast in X, Y, Z + TensorShape{ 27U, 1U, 1U }, // Broadcast in Y and Z + })) + { + } +}; + +class TemporaryLimitedLargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> +{ +public: + TemporaryLimitedLargeShapesBroadcast() + : ZipDataset<ShapeDataset, ShapeDataset>( + ShapeDataset("Shape0", + { + TensorShape{ 127U, 25U, 5U }, + TensorShape{ 485, 40U, 10U } + }), + ShapeDataset("Shape1", + { + TensorShape{ 1U, 1U, 1U }, // Broadcast in X, Y, Z + TensorShape{ 485U, 1U, 1U }, // Broadcast in Y, Z + })) + { + } +}; + /** Data set containing medium tensor shapes. */ class MediumShapes final : public ShapeDataset { @@ -359,6 +416,19 @@ public: } }; +/** Data set containing large tensor shapes. */ +class LargeShapesNoBatches final : public ShapeDataset +{ +public: + LargeShapesNoBatches() + : ShapeDataset("Shape", + { + TensorShape{ 582U, 131U, 2U }, + }) + { + } +}; + /** Data set containing pairs of large tensor shapes that are broadcast compatible. */ class LargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> { diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index 036f28b29f..0b81dac1f0 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -28,24 +28,14 @@ #include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/gpu/cl/operators/ClAdd.h" -#include "src/gpu/cl/operators/ClConv2d.h" #include "tests/CL/CLAccessor.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/validation/Validation.h" #include "tests/validation/dynamic_fusion/Utils.h" #include "tests/validation/reference/ConvolutionLayer.h" -#include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/Permute.h" -#ifdef ARM_COMPUTE_ASSERTS_ENABLED -#include "tests/SimpleTensorPrinter.h" -#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ - using namespace arm_compute::experimental::dynamic_fusion; using namespace arm_compute::test::validation::utils; diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp new file mode 100644 index 0000000000..3743fbb664 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h" +#include "tests/validation/reference/ElementwiseOperations.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(ADD) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Add + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching not supported + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching not supported + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, false, true, true, false, false, true, false})), + input1_info, input2_info, output_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse Elementwise Add + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + auto dst_info = sketch.create_tensor_info(output_info); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &dst_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(ValidateRhsInplace, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for rhs + })), + framework::dataset::make("Expected", { true, false})), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse Elementwise Add + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(ValidateLhsInplace, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + }), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + })), + framework::dataset::make("Expected", { false, true})), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse Elementwise Add + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &lhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float tolerance_num = 0.01f; /**< Tolerance number */ + +template <typename T> +using DynamicFusionAddOpFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; + +template <typename T> +using DynamicFusionAddOpBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; + +template <typename T> +using DynamicFusionGpuFuseTwoAddOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine( + framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, DynamicFusionAddOpFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine( + framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::LargeShapesNoBatches()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedLargeShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, DynamicFusionGpuFuseTwoAddOpsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false, true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::S32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionAddOpFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::LargeShapesNoBatches()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapesNoBatches()), + framework::dataset::make("DataType", { DataType::U8 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE_END() // ADD +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp index 1f9319b10f..bfb9735599 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp @@ -22,21 +22,8 @@ * SOFTWARE. */ -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" - #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -46,12 +33,6 @@ #include "tests/datasets/SmallConvolutionLayerDataset.h" #include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h" -#ifdef ARM_COMPUTE_ASSERTS_ENABLED -#include "tests/SimpleTensorPrinter.h" -#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" namespace arm_compute { namespace test @@ -60,7 +41,7 @@ namespace validation { TEST_SUITE(CL) TEST_SUITE(DYNAMIC_FUSION) -TEST_SUITE(GPU_CONV2D) +TEST_SUITE(CONV2D) RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ @@ -79,7 +60,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture<float>, framework } TEST_SUITE_END() // FP32 -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("DataType", DataType::F16)), @@ -90,9 +70,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture<half>, framework: validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE_END() // FP16 -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -TEST_SUITE_END() // GPU_CONV2D +TEST_SUITE_END() // CONV2D TEST_SUITE_END() // DYNAMIC_FUSION TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h index b0522488b4..e437c440d0 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -21,32 +21,23 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DYNAMIC_FUSION_FIXTURE -#define ARM_COMPUTE_TEST_DYNAMIC_FUSION_FIXTURE +#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE +#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" -#include "src/gpu/cl/operators/ClAdd.h" -#include "src/gpu/cl/operators/ClConv2d.h" - #include "tests/CL/CLAccessor.h" - -#include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" - #include "tests/validation/Validation.h" #include "tests/validation/reference/ConvolutionLayer.h" -#include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/Permute.h" using namespace arm_compute::experimental::dynamic_fusion; @@ -136,10 +127,10 @@ protected: tensor->allocator()->allocate(); // Use ACL allocated memory } // Construct user tensors - CLTensor t_input{}; - CLTensor t_weight{}; - CLTensor t_bias{}; - CLTensor t_dst{}; + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; // Initialize user tensors t_input.allocator()->init(input_info); @@ -152,9 +143,10 @@ protected: t_weight.allocator()->allocate(); t_bias.allocator()->allocate(); t_dst.allocator()->allocate(); - fill(CLAccessor(t_input), 0); - fill(CLAccessor(t_weight), 1); - fill(CLAccessor(t_bias), 2); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); // Run runtime runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); @@ -187,15 +179,11 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; DataType _data_type{}; - DataType _weights_data_type{}; DataType _bias_data_type{}; - DataType _output_data_type{}; DataLayout _data_layout{}; QuantizationInfo _quantization_info{}; QuantizationInfo _weight_quantization_info{}; bool _is_quantized = false; - bool _is_bfloat16 = false; - bool _mixed_layout = false; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> @@ -207,10 +195,10 @@ public: const PadStrideInfo &info, const Size2D &dialation, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info) { DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, output_shape, bias_shape, info, dialation, - data_type, data_layout, quantization_info, quantization_info); + data_type, data_layout, quantization_info, quantization_info); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DYNAMIC_FUSION_FIXTURE */ +#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE */ diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h new file mode 100644 index 0000000000..d11237748f --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE +#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/reference/ElementwiseOperations.h" +#include "tests/validation/reference/Permute.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, TensorShape shape2, const DataType data_type, const bool is_inplace) + { + _op = op; + _is_inplace = is_inplace; + _data_type = data_type; + _fuse = shape2.total_size() != 0; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + if(is_data_type_float(tensor.data_type())) + { + switch(_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) }); + break; + case ArithmeticOperation::POWER: + library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else if(tensor.data_type() == DataType::S32) + { + switch(_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, { std::pair<int32_t, int32_t>(-1U, 1U) }); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(TensorShape shape0, TensorShape shape1, TensorShape shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + TensorInfo dst_info{}; + TensorInfo dst_info_fuse{}; + + // Fuse first element wise binary Op + auto lhs_info = sketch.create_tensor_info(shape0, 1, _data_type); + auto rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + TensorInfo rhs_info_fuse; + + // Testing root case while in-place + if(!_is_inplace) + { + dst_info = sketch.create_tensor_info(TensorInfo(1, _data_type)); + + FunctionType::create_op(sketch, &lhs_info, &rhs_info, &dst_info); + } + else + { + FunctionType::create_op(sketch, &lhs_info, &rhs_info, &lhs_info); + } + + if(_fuse) + { + // Fuse first element wise binary Op + rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + dst_info_fuse = sketch.create_tensor_info(); + FunctionType::create_op(sketch, &dst_info, &rhs_info_fuse, &dst_info_fuse); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for(auto &data : runtime.get_auxiliary_tensors()) + { + TensorType *tensor = data.first; + AuxMemoryInfo aux_mem_req = data.second; + tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment); + tensor->allocator()->allocate(); + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + TensorType t_dst_fuse{}; + + // Initialize user tensors + t_lhs.allocator()->init(lhs_info); + t_rhs.allocator()->init(rhs_info); + if(!_is_inplace) + { + t_dst.allocator()->init(dst_info); + if(_fuse) + { + t_rhs_fuse.allocator()->init(rhs_info_fuse); + t_dst_fuse.allocator()->init(dst_info_fuse); + } + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + if(!_is_inplace) + { + t_dst.allocator()->allocate(); + if(_fuse) + { + t_rhs_fuse.allocator()->allocate(); + t_dst_fuse.allocator()->allocate(); + } + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if(_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + // Run runtime + if(_is_inplace) + { + runtime.run({ &t_lhs, &t_rhs, &t_lhs }); + } + else + { + if(_fuse) + { + runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst_fuse }); + } + else + { + runtime.run({ &t_lhs, &t_rhs, &t_dst }); + } + } + + if(_is_inplace) + { + return t_lhs; + } + else if(_fuse) + { + return t_dst_fuse; + } + return t_dst; + } + + SimpleTensor<T> compute_reference(TensorShape shape0, TensorShape shape1, TensorShape shape2) + { + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); + + // Create reference + SimpleTensor<T> ref_lhs{ shape0, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_rhs{ shape1, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_dst{ out_shape, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_dst_fuse{ out_shape_fuse, _data_type, 1, QuantizationInfo() }; + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + + reference::arithmetic_operation<T>(_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); + if(_fuse) + { + fill(ref_rhs_fuse, 2); + reference::arithmetic_operation<T>(_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); + } + SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst; + return *ret; + } + + ArithmeticOperation _op{ ArithmeticOperation::ADD }; + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; + bool _is_inplace{ false }; + bool _fuse{ false }; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(ArithmeticOperation op, TensorShape shape, const DataType data_type, const bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape, shape, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, const DataType data_type, const bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, TensorShape shape2, const DataType data_type, const bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, shape2, data_type, is_inplace); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE */ |