From b1fcefddf3f59219a9d7930d607175b7e6c39347 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 15 Jun 2022 19:02:28 +0100 Subject: Implement new Elementwise Dynamic Fusion Operators: Div, Floor Resolves: COMPMID-5355 Change-Id: I92f73fbe885f28bbe7b07965b90cfd807c93602f Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7745 Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: SiCong Li --- .../CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp | 9 +- tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp | 135 +++++++++++++++++++++ .../Integration_OperatorFuseMovenetSubGraph1.cpp | 30 ++--- 3 files changed, 155 insertions(+), 19 deletions(-) create mode 100644 tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp (limited to 'tests/validation/CL') diff --git a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp index 96a845c36e..3ffbc077c6 100644 --- a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp +++ b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp @@ -74,8 +74,9 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) ClExecutionDescriptor exec_desc{}; Status st{}; - const auto data_type = DataType::F32; - const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ }; + const auto data_type = DataType::F32; + const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ }; + const auto eltwise_info = ElementwiseDescriptor{ ArithmeticOperation::ADD }; const auto width = 7U; const auto height = 6U; @@ -99,7 +100,7 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) const auto m0 = (OFM > 16) ? ((data_type == DataType::F32) ? 2U : 4U) : 1U; const ClDirectConv2dKernelDescriptor direct_conv2d_desc{ conv_info }; - const ClEltwiseAddKernelDescriptor eltwise_add_desc{}; + const ClElementwiseKernelDescriptor eltwise_add_desc{ eltwise_info }; const TileDescriptor store_tile_info{ Size2D(n0, m0), Size2D(width, height), ClippingStrategy::TOP_LEFT }; ArgumentID src_id{ g_arg_placeholder }; @@ -119,7 +120,7 @@ TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) st = add_tensor(bp, &dst_info, dst_id); st = add_kcomp_direct_conv2d(bp, direct_conv2d_desc, src_id, wei_id, bia_id, acc_id); - st = add_kcomp_eltwise_add(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id); + st = add_kcomp_eltwise_op(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id); st = add_kcomp_store(bp, StoreType::TStoreIndirectWidthSelect, acc_1_id, dst_id); exec_desc.skip_sliding_window = true; diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp new file mode 100644 index 0000000000..2b8f69e5e7 --- /dev/null +++ b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#include "arm_compute/core/TensorInfo.h" + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/experimental/ClWorkload.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/experimental/ClCompositeOperator.h" +#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h" +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h" +#include "tests/validation/Validation.h" + +#include "tests/validation/reference/Floor.h" +#include "tests/validation/reference/Permute.h" + +#ifdef ARM_COMPUTE_ASSERTS_ENABLED +#include "tests/SimpleTensorPrinter.h" +#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ + +using namespace arm_compute::experimental::dynamic_fusion; +using namespace arm_compute::test::validation::utils; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(DYNAMIC_FUSION) +TEST_CASE(Operator_Floor_1_F32, framework::DatasetMode::ALL) +{ + /* Computation: + * out = floor(input) + */ + const auto data_type = DataType::F32; + const auto data_layout = DataLayout::NHWC; + const auto t_shape = TensorShape(32, 16); + auto t_input_info = TensorInfo(t_shape, 1, data_type, data_layout); + auto t_dst_info = TensorInfo(); + + FloorDescriptor floor_desc{}; + + // Create reference + SimpleTensor ref_t_input{ t_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; + + // Fill reference + fill(ref_t_input, 0, library.get()); + + auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U)); + auto t_dst_shape_nchw = t_shape; + permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U)); + + auto ref_t_dst_nchw = reference::floor_layer(ref_t_input_nchw); + const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); + + CLScheduler::get().default_reinit(); + const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + OperatorGraph op_graph; + + const auto op_t_input = add_tensor(op_graph, t_input_info); + const auto op_t_dst = add_tensor(op_graph, t_dst_info); + + add_op_floor(op_graph, floor_desc, op_t_input, op_t_dst); + + const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; + ClWorkload workload; + build(workload, op_graph, workload_ctx); + + ClCompositeOperator op; + op.configure(cl_compile_ctx, workload); + + // Construct tensors + CLTensor t_input{}; + CLTensor t_dst{}; + + // Init tensors + t_input.allocator()->init(t_input_info); + t_dst.allocator()->init(t_dst_info); + + // Allocate and fill tensors + t_input.allocator()->allocate(); + t_dst.allocator()->allocate(); + fill(CLAccessor(t_input), 0, library.get()); + // "Pack" tensors + OpTensorBinding bp_tensors({ { op_t_input, &t_input }, + { op_t_dst, &t_dst } + }); + + // Populate prepare and run pack-maps (including allocating aux tensors) + ClAuxTensorData aux_tensor_data{}; + TensorPackMap prepare_pack_map{}; + TensorPackMap run_pack_map{}; + bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); + + op.prepare(prepare_pack_map); + op.run(run_pack_map); + RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); +} + +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp index fe8d23ef15..3a8b7c8ce8 100644 --- a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp +++ b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp @@ -77,8 +77,8 @@ TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL) auto t_acc_info = TensorInfo(); // Intermediate tensor for cond3 auto t_dst_info = TensorInfo(); - Conv2dDescriptor conv2d_desc{}; - AddDescriptor add_desc{}; + Conv2dDescriptor conv2d_desc{}; + ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD }; // Create reference SimpleTensor ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; @@ -119,7 +119,7 @@ TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL) auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc); force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); - add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; @@ -180,8 +180,8 @@ TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL) auto t_acc_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - Conv2dDescriptor conv2d_desc{}; - AddDescriptor add_desc{}; + Conv2dDescriptor conv2d_desc{}; + ElementwiseDescriptor add_desc{}; OperatorGraph op_graph; @@ -192,7 +192,7 @@ TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL) const auto op_t_dst = add_tensor(op_graph, t_dst_info); auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc); - add_op_elementwise_add(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; @@ -290,7 +290,7 @@ TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL) auto t_dst_info = TensorInfo(); OperatorGraph op_graph; - const auto add_desc = AddDescriptor{}; + const auto add_desc = ElementwiseDescriptor{}; const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info); const auto op_t_l0_rhs = add_tensor(op_graph, t_l0_rhs_info); @@ -300,9 +300,9 @@ TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL) const auto op_t_l1_dst = add_tensor(op_graph, t_l1_dst_info); // temp accumulator; TensorInfo to be inferred const auto op_t_dst = add_tensor(op_graph, t_dst_info); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; @@ -334,7 +334,7 @@ TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL) OperatorGraph op_graph; const auto conv2d_desc = Conv2dDescriptor{}; - const auto add_desc = AddDescriptor{}; + const auto add_desc = ElementwiseDescriptor{}; const auto op_t_l0_0_input = add_tensor(op_graph, t_l0_0_input_info); const auto op_t_l0_0_weight = add_tensor(op_graph, t_l0_0_weight_info); @@ -345,8 +345,8 @@ TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL) const auto op_t_dst = add_tensor(op_graph, t_dst_info); add_op_conv2d(op_graph, conv2d_desc, op_t_l0_0_input, op_t_l0_0_weight, op_t_l0_0_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst); - add_op_elementwise_add(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst); + add_op_elementwise_op(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; @@ -374,7 +374,7 @@ TEST_CASE(Loop, framework::DatasetMode::ALL) OperatorGraph op_graph; const auto conv2d_desc = Conv2dDescriptor{}; - const auto add_desc = AddDescriptor{}; + const auto add_desc = ElementwiseDescriptor{}; const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info); const auto op_t_l1_lhs = add_tensor(op_graph, t_l1_lhs_info); @@ -382,7 +382,7 @@ TEST_CASE(Loop, framework::DatasetMode::ALL) const auto op_t_state1 = add_tensor(op_graph, state1_info); add_op_conv2d(op_graph, conv2d_desc, op_t_l0_lhs, op_t_state0, op_t_state1); - add_op_elementwise_add(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0); + add_op_elementwise_op(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0); const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; ClWorkload workload; -- cgit v1.2.1