From 31df05a1870662a7288fbaeb6fbc7fc458bb5a73 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 9 Nov 2022 15:57:48 +0000 Subject: Remove dynamic fusion prototype with tests and examples Public headers of the new experimental dynamic fusion can be found in arm_compute/dynamic_fusion/ New examples on how to use the interface can be found in tests/validation/dynamic_fusion/gpu/Integration.cpp Resolves COMPMID-5683 Change-Id: I7ccb902a227fb487562df15fc3c30118d1d95bbd Signed-off-by: SiCong Li Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8671 Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins --- .../dynamic_fusion/ArbitraryElementwiseFusion.cpp | 394 -------------------- .../CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp | 215 ----------- .../CL/UNIT/dynamic_fusion/DependencyGraph.cpp | 266 -------------- tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp | 135 ------- .../Integration_OperatorFuseMovenetSubGraph1.cpp | 402 --------------------- tests/validation/CL/UNIT/dynamic_fusion/Utils.h | 71 ---- 6 files changed, 1483 deletions(-) delete mode 100644 tests/validation/CL/UNIT/dynamic_fusion/ArbitraryElementwiseFusion.cpp delete mode 100644 tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp delete mode 100644 tests/validation/CL/UNIT/dynamic_fusion/DependencyGraph.cpp delete mode 100644 tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp delete mode 100644 tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp delete mode 100644 tests/validation/CL/UNIT/dynamic_fusion/Utils.h (limited to 'tests/validation/CL/UNIT/dynamic_fusion') diff --git a/tests/validation/CL/UNIT/dynamic_fusion/ArbitraryElementwiseFusion.cpp b/tests/validation/CL/UNIT/dynamic_fusion/ArbitraryElementwiseFusion.cpp deleted file mode 100644 index 1b1e8aa761..0000000000 --- a/tests/validation/CL/UNIT/dynamic_fusion/ArbitraryElementwiseFusion.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h" -#include "src/core/utils/helpers/float_ops.h" -#include "tests/CL/CLAccessor.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/reference/ConvolutionLayer.h" -#include "tests/validation/reference/ElementwiseOperations.h" -#include "tests/validation/reference/Permute.h" - -#include "arm_compute/runtime/experimental/ClCompositeOperator.h" -#include "tests/validation/reference/Floor.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h" - -using namespace arm_compute::experimental::dynamic_fusion; -using namespace arm_compute::test::validation::utils; - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(CL) -TEST_SUITE(UNIT) -TEST_SUITE(DYNAMIC_FUSION) -TEST_SUITE(ArbitraryFusion) - -TEST_CASE(ElementwiseBroadcasting, framework::DatasetMode::ALL) -{ - // Test elementwise broadcasting - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - - const auto input_shape = TensorShape(7, 9, 5); - const auto rhs_shape = TensorShape(7, 1, 1); - const auto dst_shape = TensorShape(7, 9, 5); - - // Tensor Info - auto input_info = TensorInfo(input_shape, 1, data_type, data_layout); - auto addend_info = TensorInfo(rhs_shape, 1, data_type, data_layout); - auto dst_info = TensorInfo(); - - ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD }; - - CLScheduler::get().default_reinit(); - const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - OperatorGraph op_graph; - - const auto op_input = add_tensor(op_graph, input_info); - const auto op_addend = add_tensor(op_graph, addend_info); - const auto op_dst = add_tensor(op_graph, dst_info); - - add_op_elementwise_op(op_graph, add_desc, op_input, op_addend, op_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - build(workload, op_graph, workload_ctx); - - ClCompositeOperator op; - op.configure(cl_compile_ctx, workload); - - // Construct tensors - CLTensor t_input{}; - CLTensor t_addend{}; - CLTensor t_dst{}; - - // Init tensors - t_input.allocator()->init(input_info); - t_addend.allocator()->init(addend_info); - t_dst.allocator()->init(dst_info); - - // Allocate and fill tensors - t_input.allocator()->allocate(); - t_addend.allocator()->allocate(); - t_dst.allocator()->allocate(); - - // Fill - fill(CLAccessor(t_input), 0, library.get()); - fill(CLAccessor(t_addend), 1, library.get()); - - // Pack tensors - OpTensorBinding bp_tensors({ { op_input, &t_input }, - { op_addend, &t_addend }, - { op_dst, &t_dst } - }); - - // Populate prepare and run pack-maps (including allocating aux tensors) - ClAuxTensorData aux_tensor_data{}; - TensorPackMap prepare_pack_map{}; - TensorPackMap run_pack_map{}; - bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); - - op.prepare(prepare_pack_map); - op.run(run_pack_map); - - // Create reference - SimpleTensor ref_input{ input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_addend{ rhs_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - - // Fill reference - fill(ref_input, 0, library.get()); - fill(ref_addend, 1, library.get()); - - auto ref_input_nchw = reference::permute(ref_input, PermutationVector(1U, 2U, 0U)); - auto ref_addend_nchw = reference::permute(ref_addend, PermutationVector(1U, 2U, 0U)); - - auto dst_shape_nchw = dst_shape; - permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - - auto ref_t_dst_nchw = reference::arithmetic_operation( - ArithmeticOperation::ADD, - ref_input_nchw, - ref_addend_nchw, - data_type, - ConvertPolicy{}); - - const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); - - RelativeTolerance tolerance_f32(0.001f); - validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); -} -TEST_CASE(DivFloor, framework::DatasetMode::ALL) -{ - // x = floor(div(input, input2)) - const auto data_type = DataType::F32; - const auto eltwise_info = ElementwiseDescriptor{ ArithmeticOperation::DIV }; - - // Tensor Values - const auto width = 7U; - const auto height = 6U; - - // Shapes - const auto input1_shape = TensorShape(width, height); - const auto input2_shape = TensorShape(width, height); - const auto dst_shape = TensorShape(width, height); - - // Create reference - SimpleTensor ref_src_nhwc{ input1_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_src2_nhwc{ input2_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - - // Fill reference - fill(ref_src_nhwc, 0, library.get()); - fill(ref_src2_nhwc, 1, library.get()); - - auto ref_src = reference::permute(ref_src_nhwc, PermutationVector(1U, 2U, 0U)); - auto ref_src2 = reference::permute(ref_src2_nhwc, PermutationVector(1U, 2U, 0U)); - - TensorShape dst_shape_nchw{ dst_shape }; - permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - - const auto ref_dst_nchw = reference::floor_layer(reference::arithmetic_operation( - ArithmeticOperation::DIV, - ref_src, - ref_src2, - data_type, - ConvertPolicy::SATURATE)); - - const auto ref_t_dst = reference::permute(ref_dst_nchw, PermutationVector(2U, 0U, 1U)); - - // Tensor Info - auto input1_info = TensorInfo(input1_shape, 1, data_type, DataLayout::NHWC); - auto input2_info = TensorInfo(input2_shape, 1, data_type, DataLayout::NHWC); - auto dst_info = TensorInfo(); - auto acc_info = TensorInfo(); // Intermediate tensor for division - - // Initialise Scheduler - CLScheduler::get().default_reinit(); - const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - OperatorGraph op_graph; - - // add tensors - auto op_input1 = add_tensor(op_graph, input1_info); - auto op_input2 = add_tensor(op_graph, input2_info); - auto op_acc = add_tensor(op_graph, acc_info); - auto op_dst = add_tensor(op_graph, dst_info); - - add_op_elementwise_op(op_graph, eltwise_info, op_input1, op_input2, op_acc); - add_op_floor(op_graph, FloorDescriptor(), op_acc, op_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - build(workload, op_graph, workload_ctx); - - ClCompositeOperator op; - op.configure(cl_compile_ctx, workload); - - // Configure and add tensors. - CLTensor t_input1{}; - CLTensor t_input2{}; - CLTensor t_dst{}; - - // Init Tensors - t_input1.allocator()->init(input1_info); - t_input2.allocator()->init(input2_info); - t_dst.allocator()->init(dst_info); - - // Allocate and fill tensors - t_input1.allocator()->allocate(); - t_input2.allocator()->allocate(); - t_dst.allocator()->allocate(); - - fill(CLAccessor(t_input1), 0, library.get()); - fill(CLAccessor(t_input2), 1, library.get()); - - // "Pack" tensors - OpTensorBinding bp_tensors({ { op_input1, &t_input1 }, - { op_input2, &t_input2 }, - { op_dst, &t_dst } - }); - - // Populate prepare and run pack-maps (including allocating aux tensors) - ClAuxTensorData aux_tensor_data{}; - TensorPackMap prepare_pack_map{}; - TensorPackMap run_pack_map{}; - bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); - - op.prepare(prepare_pack_map); - op.run(run_pack_map); - - RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ - validate(CLAccessor(t_dst), ref_dst_nchw, tolerance_f32); -} -TEST_CASE(Dconv2dAddDiv, framework::DatasetMode::ALL) -{ - // output = div(divend, add(addend, conv2d1x1(direct_conv)(input, weights, bias))) - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - - const auto input_shape = TensorShape(384, 12, 12); - const auto weight_shape = TensorShape(384, 1, 1, 16); - const auto dst_shape = TensorShape(16, 12, 12); - - // Tensor Info - auto input_info = TensorInfo(input_shape, 1, data_type, data_layout); - auto weight_info = TensorInfo(weight_shape, 1, data_type, data_layout); - auto addend_info = TensorInfo(dst_shape, 1, data_type, data_layout); - auto divend_info = TensorInfo(dst_shape, 1, data_type, data_layout); - auto acc_info = TensorInfo(); // Intermediate tensor for conv - auto acc_1_info = TensorInfo(); - auto dst_info = TensorInfo(); - - Conv2dDescriptor conv2d_desc{}; - ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD }; - ElementwiseDescriptor div_desc{ ArithmeticOperation::DIV }; - - CLScheduler::get().default_reinit(); - const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - OperatorGraph op_graph; - - const auto op_input = add_tensor(op_graph, input_info); - const auto op_weight = add_tensor(op_graph, weight_info); - const auto op_addend = add_tensor(op_graph, addend_info); - const auto op_divend = add_tensor(op_graph, divend_info); - const auto op_acc = add_tensor(op_graph, acc_info); // temp accumulator; TensorInfo to be inferred - const auto op_acc_1 = add_tensor(op_graph, acc_1_info); // temp accumulator; TensorInfo to be inferred - const auto op_dst = add_tensor(op_graph, dst_info); - - auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_input, op_weight, op_acc); - force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); - add_op_elementwise_op(op_graph, add_desc, op_acc, op_addend, op_acc_1); - add_op_elementwise_op(op_graph, div_desc, op_acc_1, op_divend, op_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - build(workload, op_graph, workload_ctx); - - ClCompositeOperator op; - op.configure(cl_compile_ctx, workload); - - // Construct tensors - CLTensor t_input{}; - CLTensor t_weight{}; - CLTensor t_addend{}; - CLTensor t_divend{}; - CLTensor t_dst{}; - - // Init tensors - t_input.allocator()->init(input_info); - t_weight.allocator()->init(weight_info); - t_divend.allocator()->init(divend_info); - t_addend.allocator()->init(addend_info); - t_dst.allocator()->init(dst_info); - - // Allocate and fill tensors - t_input.allocator()->allocate(); - t_weight.allocator()->allocate(); - t_divend.allocator()->allocate(); - t_addend.allocator()->allocate(); - t_dst.allocator()->allocate(); - - // Fill - fill(CLAccessor(t_input), 0, library.get()); - fill(CLAccessor(t_weight), 1, library.get()); - fill(CLAccessor(t_addend), 2, library.get()); - fill(CLAccessor(t_divend), 3, library.get()); - - // Pack tensors - OpTensorBinding bp_tensors({ { op_input, &t_input }, - { op_weight, &t_weight }, - { op_addend, &t_addend }, - { op_divend, &t_divend }, - { op_dst, &t_dst } - }); - - // Populate prepare and run pack-maps (including allocating aux tensors) - ClAuxTensorData aux_tensor_data{}; - TensorPackMap prepare_pack_map{}; - TensorPackMap run_pack_map{}; - bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); - - op.prepare(prepare_pack_map); - op.run(run_pack_map); - - // Create reference - SimpleTensor ref_input{ input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_weight{ weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_bias_placeholder{ dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_addend{ dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_divend{ dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - - // Fill reference - fill(ref_input, 0, library.get()); - fill(ref_weight, 1, library.get()); - fill(ref_addend, 2, library.get()); - fill(ref_divend, 3, library.get()); - - auto ref_input_nchw = reference::permute(ref_input, PermutationVector(1U, 2U, 0U)); - auto ref_weight_nchw = reference::permute(ref_weight, PermutationVector(1U, 2U, 0U)); - auto ref_bias_placeholder_nchw = reference::permute(ref_bias_placeholder, PermutationVector(1U, 2U, 0U)); - auto ref_addend_nchw = reference::permute(ref_addend, PermutationVector(1U, 2U, 0U)); - auto ref_divend_nchw = reference::permute(ref_divend, PermutationVector(1U, 2U, 0U)); - - auto dst_shape_nchw = dst_shape; - permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - - PadStrideInfo legacy_pad_stride(conv2d_desc.stride.x(), conv2d_desc.stride.y(), conv2d_desc.pad.left, conv2d_desc.pad.right, conv2d_desc.pad.top, conv2d_desc.pad.bottom, DimensionRoundingType{}); - auto ref_acc_nchw = reference::arithmetic_operation( - ArithmeticOperation::ADD, - ref_addend_nchw, - reference::convolution_layer(ref_input_nchw, ref_weight_nchw, ref_bias_placeholder_nchw, dst_shape_nchw, legacy_pad_stride, conv2d_desc.dilation), - data_type, - ConvertPolicy{}); - - auto ref_t_dst_nchw = reference::arithmetic_operation( - ArithmeticOperation::DIV, - ref_acc_nchw, - ref_divend_nchw, - data_type, - ConvertPolicy{}); - - const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); - - RelativeTolerance tolerance_f32(0.001f); - validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); -} - -TEST_SUITE_END() // ArbitraryFusion -TEST_SUITE_END() // DYNAMIC_FUSION -TEST_SUITE_END() // UNIT -TEST_SUITE_END() // CL - -} // namespace validation -} // namespace test -} // namespace arm_compute - -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ diff --git a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp deleted file mode 100644 index dc98d72f4b..0000000000 --- a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h" - -#include "src/core/utils/helpers/float_ops.h" -#include "src/gpu/cl/kernels/ClElementwiseKernel.h" -#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h" -#include "tests/CL/CLAccessor.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/reference/ConvolutionLayer.h" -#include "tests/validation/reference/ElementwiseOperations.h" -#include "tests/validation/reference/GEMM.h" -#include "tests/validation/reference/Permute.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h" - -#include - -using namespace arm_compute::experimental::dynamic_fusion; -using namespace arm_compute::test::validation::utils; - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(CL) -TEST_SUITE(UNIT) -TEST_SUITE(DYNAMIC_FUSION) -TEST_SUITE(ClCompositeKernel) -TEST_SUITE(Validate) - -TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL) -{ - /* Computation: - * out = add(addend, direct_conv2d(lhs, rhs, bias)) (non-broadcast) - */ - - ClCompositeKernel kernel{}; - ClKernelBlueprint bp{}; - ClKernelCode cl_code{}; - ClExecutionDescriptor exec_desc{}; - Status st{}; - - const auto data_type = DataType::F32; - const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ }; - const auto eltwise_info = ElementwiseDescriptor{ ArithmeticOperation::ADD }; - - const auto width = 7U; - const auto height = 6U; - const auto IFM = 5U; - const auto OFM = 4U; - const auto kernel_sz = 3U; - - const auto src_shape = TensorShape(IFM, width, height); - const auto wei_shape = TensorShape(IFM, kernel_sz, kernel_sz, OFM); - const auto bia_shape = TensorShape(OFM); - const auto addend_shape = TensorShape(1, 1); - const auto dst_shape = TensorShape(OFM, width, height); - - auto src_info = TensorInfo(src_shape, 1, data_type, DataLayout::NHWC); - auto wei_info = TensorInfo(wei_shape, 1, data_type, DataLayout::NHWC); - auto bia_info = TensorInfo(bia_shape, 1, data_type, DataLayout::NHWC); - auto addend_info = TensorInfo(addend_shape, 1, data_type, DataLayout::NHWC); - auto dst_info = TensorInfo(dst_shape, 1, data_type, DataLayout::NHWC); - - const auto n0 = std::min(OFM, 4u); - const auto m0 = (OFM > 16) ? ((data_type == DataType::F32) ? 2U : 4U) : 1U; - - const ClDirectConv2dKernelDescriptor direct_conv2d_desc{ conv_info }; - const ClElementwiseKernelDescriptor eltwise_add_desc{ eltwise_info }; - const TileDescriptor store_tile_info{ Size2D(n0, m0), Size2D(width, height), ClippingStrategy::TOP_LEFT }; - - ArgumentID src_id{ g_arg_placeholder }; - ArgumentID wei_id{ g_arg_placeholder }; - ArgumentID bia_id{ g_arg_placeholder }; - ArgumentID acc_id{ g_arg_placeholder }; - ArgumentID acc_1_id{ g_arg_placeholder }; - ArgumentID addend_id{ g_arg_placeholder }; - ArgumentID dst_id{ g_arg_placeholder }; - - st = add_tensor(bp, &src_info, src_id); - st = add_tensor(bp, &wei_info, wei_id); - st = add_tensor(bp, &bia_info, bia_id); - st = add_tensor(bp, &dst_info, acc_id); - st = add_tensor(bp, &dst_info, acc_1_id); - st = add_tensor(bp, &addend_info, addend_id); - st = add_tensor(bp, &dst_info, dst_id); - - st = add_kcomp_direct_conv2d(bp, direct_conv2d_desc, src_id, wei_id, bia_id, acc_id); - st = add_kcomp_eltwise_op(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id); - st = add_kcomp_store(bp, StoreType::TStoreIndirectWidthSelect, acc_1_id, dst_id); - - exec_desc.skip_sliding_window = true; - - st = set_tile_info(bp, store_tile_info); - st = build(cl_code, ClCodeBuilderContext{ GpuInfo{ GPUTarget::G71 } }, bp); - st = tune_static(exec_desc, cl_code); - - CLScheduler::get().default_reinit(); - kernel.configure(CLKernelLibrary::get().get_compile_context(), cl_code); - - // Construct tensors - CLTensor src{}; - CLTensor wei{}; - CLTensor bia{}; - CLTensor addend{}; - CLTensor dst{}; - - // Init tensors - src.allocator()->init(src_info); - wei.allocator()->init(wei_info); - bia.allocator()->init(bia_info); - addend.allocator()->init(dst_info); - dst.allocator()->init(dst_info); - - // "Pack" tensors - ITensorPack tensors{ { src_id, &src }, - { wei_id, &wei }, - { bia_id, &bia }, - { addend_id, &addend }, - { dst_id, &dst } }; - - // Allocate and fill tensors - src.allocator()->allocate(); - wei.allocator()->allocate(); - bia.allocator()->allocate(); - addend.allocator()->allocate(); - dst.allocator()->allocate(); - - fill(CLAccessor(src), 0, library.get()); - fill(CLAccessor(wei), 1, library.get()); - fill(CLAccessor(bia), 2, library.get()); - fill(CLAccessor(addend), 3, library.get()); - - CLScheduler::get().enqueue_op(kernel, tensors, exec_desc, true); - - // Create reference - SimpleTensor ref_src_nhwc{ src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_wei_nhwc{ wei_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_bia_nhwc{ bia_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_addend_nhwc{ addend_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - - // Fill reference - fill(ref_src_nhwc, 0, library.get()); - fill(ref_wei_nhwc, 1, library.get()); - fill(ref_bia_nhwc, 2, library.get()); - fill(ref_addend_nhwc, 3, library.get()); - - auto ref_src = reference::permute(ref_src_nhwc, PermutationVector(1U, 2U, 0U)); - auto ref_wei = reference::permute(ref_wei_nhwc, PermutationVector(1U, 2U, 0U)); - auto ref_bia = reference::permute(ref_bia_nhwc, PermutationVector(1U, 2U, 0U)); - auto ref_addend = reference::permute(ref_addend_nhwc, PermutationVector(1U, 2U, 0U)); - - TensorShape dst_shape_nchw{ dst_shape }; - permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - - const auto ref_dst = reference::arithmetic_operation( - ArithmeticOperation::ADD, - ref_addend, - reference::convolution_layer(ref_src, ref_wei, ref_bia, dst_shape_nchw, - PadStrideInfo - { - static_cast(conv_info.stride.x()), - static_cast(conv_info.stride.y()), - static_cast(conv_info.pad.left), - static_cast(conv_info.pad.top) }), - data_type, - ConvertPolicy::SATURATE); - - RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ - validate(CLAccessor(dst), ref_dst, tolerance_f32); -} - -TEST_SUITE_END() // Validate -TEST_SUITE_END() // ClCompositeKernel -TEST_SUITE_END() // DYNAMIC_FUSION -TEST_SUITE_END() // UNIT -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/tests/validation/CL/UNIT/dynamic_fusion/DependencyGraph.cpp b/tests/validation/CL/UNIT/dynamic_fusion/DependencyGraph.cpp deleted file mode 100644 index 1824efff99..0000000000 --- a/tests/validation/CL/UNIT/dynamic_fusion/DependencyGraph.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#include "arm_compute/core/experimental/DependencyGraph.h" - -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" - -using namespace arm_compute::experimental::dynamic_fusion; - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(CL) - -TEST_SUITE(UNIT) -TEST_SUITE(DYNAMIC_FUSION) -TEST_SUITE(DependencyGraph) - -TEST_CASE(Correct_Graph_Creation_Should_Pass, framework::DatasetMode::ALL) -{ - DependencyGraph graph{}; - const auto t0 = graph.add_tensor(); - const auto t1 = graph.add_tensor(); - const auto t2 = graph.add_tensor(); - const auto t3 = graph.add_tensor(); - const auto t4 = graph.add_tensor(); - - const auto o0 = graph.add_operator({ t0, t1 }, { t2 }).second; - const auto o1 = graph.add_operator({ t3, t2 }, { t4 }).second; - - ARM_COMPUTE_EXPECT_EQUAL(graph.number_of_ops(), 2U, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT_EQUAL(graph.number_of_tensors(), 5U, framework::LogLevel::ERRORS); - - const DependencyGraph ref_graph - { - { - // src_tensors - { o0, { t0, t1 } }, - { o1, { t3, t2 } }, - }, - { - // dst_tensors - { o0, { t2 } }, - { o1, { t4 } }, - }, - { - // src_ops - { t0, {} }, - { t1, {} }, - { t2, { o0 } }, - { t3, {} }, - { t4, { o1 } }, - }, - { - // dst_ops - { t0, { o0 } }, - { t1, { o0 } }, - { t2, { o1 } }, - { t3, { o1 } }, - { t4, {} }, - } - - }; - ARM_COMPUTE_EXPECT(graph == ref_graph, framework::LogLevel::ERRORS); -} - -TEST_CASE(Correct_Merge_Points_Should_Enable_Graph_Expansion, framework::DatasetMode::ALL) -{ - // Merge points are a simple way to collapse "graph of graphs" into a single graph - // Suppose we have a top-level graph g0 - DependencyGraph g0{}; - const auto g0_t0 = g0.add_tensor(); - const auto g0_t1 = g0.add_tensor(); - const auto g0_t2 = g0.add_tensor(); - const auto g0_t3 = g0.add_tensor(); - const auto g0_t4 = g0.add_tensor(); - g0.add_operator({ g0_t0, g0_t1 }, { g0_t2 }); // g0_o0 - g0.add_operator({ g0_t3, g0_t2 }, { g0_t4 }); // g0_o1 - - // Then g0 expands into g1, with additional nodes added in-between "merge point tensors" - // Note that the expansion logic may be local to each operator node - DependencyGraph g1{}; - // g0_o0 expands into g1_o0, g1_o1, g1_o2 - const auto g1_t0 = g1.add_tensor(g0_t0); - const auto g1_t1 = g1.add_tensor(g0_t1); - const auto g1_t2 = g1.add_tensor(); - const auto g1_t3 = g1.add_tensor(); - const auto g1_t4 = g1.add_tensor(g0_t2); - const auto g1_o0 = g1.add_operator({ g1_t0 }, { g1_t2 }).second; - const auto g1_o1 = g1.add_operator({ g1_t1 }, { g1_t3 }).second; - const auto g1_o2 = g1.add_operator({ g1_t2, g1_t3 }, { g1_t4 }).second; - - // g0_o1 expands into g1_o3 - const auto g1_t5 = g1.add_tensor(g0_t3); - const auto g1_t6 = g1.add_tensor(g0_t2); - const auto g1_t7 = g1.add_tensor(g0_t4); - ARM_COMPUTE_EXPECT_EQUAL(g1_t4, g1_t6, framework::LogLevel::ERRORS); // both associate with the same merge point g0_t2, thus they should point to the same tensor in g1 - const auto g1_o3 = g1.add_operator({ g1_t5, g1_t6 }, { g1_t7 }).second; - - const DependencyGraph ref_graph - { - { - // src_tensors - { g1_o0, { g1_t0 } }, - { g1_o1, { g1_t1 } }, - { g1_o2, { g1_t2, g1_t3 } }, - { g1_o3, { g1_t5, g1_t4 } }, - }, - { - // dst_tensors - { g1_o0, { g1_t2 } }, - { g1_o1, { g1_t3 } }, - { g1_o2, { g1_t4 } }, - { g1_o3, { g1_t7 } }, - }, - { - // src_ops - { g1_t0, {} }, - { g1_t1, {} }, - { g1_t2, { g1_o0 } }, - { g1_t3, { g1_o1 } }, - { g1_t4, { g1_o2 } }, - { g1_t5, {} }, - { g1_t7, { g1_o3 } }, - }, - { - // dst_ops - { g1_t0, { g1_o0 } }, - { g1_t1, { g1_o1 } }, - { g1_t2, { g1_o2 } }, - { g1_t3, { g1_o2 } }, - { g1_t4, { g1_o3 } }, - { g1_t5, { g1_o3 } }, - { g1_t7, {} }, - }, - { - // merge points - { g0_t0, g1_t0 }, - { g0_t1, g1_t1 }, - { g0_t2, g1_t4 }, - { g0_t3, g1_t5 }, - { g0_t4, g1_t7 }, - } - }; - ARM_COMPUTE_EXPECT(g1 == ref_graph, framework::LogLevel::ERRORS); -} - -TEST_CASE(Path_Existence_Check_0, framework::DatasetMode::ALL) -{ - DependencyGraph graph{}; - const auto t0 = graph.add_tensor(); - const auto t1 = graph.add_tensor(); - const auto t2 = graph.add_tensor(); - const auto t3 = graph.add_tensor(); - const auto t4 = graph.add_tensor(); - const auto t5 = graph.add_tensor(); - const auto t6 = graph.add_tensor(); - const auto t7 = graph.add_tensor(); - const auto o0 = graph.add_operator({ t1 }, { t3, t4 }).second; - const auto o1 = graph.add_operator({ t3 }, { t5 }).second; - const auto o2 = graph.add_operator({ t5, t6 }, { t7 }).second; - const auto o3 = graph.add_operator({ t4 }, { t6 }).second; - const auto o4 = graph.add_operator({ t0, t5 }, { t2 }).second; - - ARM_COMPUTE_UNUSED(o1, o3); - - ARM_COMPUTE_EXPECT((graph.path_exists_from_tensor_to_op(t3, o2)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT((graph.path_exists_from_tensor_to_op(t1, o4)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!(graph.path_exists_from_tensor_to_op(t2, o4)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!(graph.path_exists_from_tensor_to_op(t0, o2)), framework::LogLevel::ERRORS); - - ARM_COMPUTE_EXPECT((graph.path_exists_from_op_to_op(o0, o2)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!(graph.path_exists_from_op_to_op(o2, o0)), framework::LogLevel::ERRORS); - - ARM_COMPUTE_EXPECT(!(graph.path_exists_from_op_to_op(o2, o4)), framework::LogLevel::ERRORS); -} - -TEST_CASE(Correct_Topological_Sort_Should_Pass, framework::DatasetMode::ALL) -{ - DependencyGraph graph{}; - const auto t0 = graph.add_tensor(); - const auto t1 = graph.add_tensor(); - const auto t2 = graph.add_tensor(); - const auto t3 = graph.add_tensor(); - const auto t4 = graph.add_tensor(); - const auto t5 = graph.add_tensor(); - const auto t6 = graph.add_tensor(); - const auto t7 = graph.add_tensor(); - const auto o0 = graph.add_operator({ t1 }, { t3, t4 }).second; - const auto o1 = graph.add_operator({ t3 }, { t5 }).second; - const auto o2 = graph.add_operator({ t5, t6 }, { t7 }).second; - const auto o3 = graph.add_operator({ t4 }, { t6 }).second; - const auto o4 = graph.add_operator({ t0, t5 }, { t2 }).second; - - const auto res = graph.topological_sort(); - ARM_COMPUTE_EXPECT(bool(res.first), framework::LogLevel::ERRORS); - std::vector ref_sorted_op_packs - { - { o0, { t1 }, { t3, t4 } }, - { o1, { t3 }, { t5 } }, - { o3, { t4 }, { t6 } }, - { o4, { t0, t5 }, { t2 } }, - { o2, { t5, t6 }, { t7 } }, - - }; - ARM_COMPUTE_EXPECT((res.second == ref_sorted_op_packs), framework::LogLevel::ERRORS); -} - -TEST_CASE(Cycles_Should_Fail, framework::DatasetMode::ALL) -{ - DependencyGraph graph{}; - const auto t0 = graph.add_tensor(); - const auto t1 = graph.add_tensor(); - const auto t2 = graph.add_tensor(); - const auto t3 = graph.add_tensor(); - - graph.add_operator({ t0, t1 }, { t2 }); - graph.add_operator({ t2 }, { t1, t3 }); // Ideally error should occur here - - const auto res = graph.topological_sort(); - ARM_COMPUTE_EXPECT(!bool(res.first), framework::LogLevel::ERRORS); -} -TEST_CASE(Loops_Should_Fail, framework::DatasetMode::ALL) -{ - DependencyGraph graph{}; - const auto t0 = graph.add_tensor(); - const auto t1 = graph.add_tensor(); - const auto t2 = graph.add_tensor(); - - ARM_COMPUTE_EXPECT_THROW(graph.add_operator({ t0, t2 }, { t1, t2 }).first, framework::LogLevel::ERRORS); - ARM_COMPUTE_UNUSED(t0, t1, t2); -} -TEST_SUITE_END() // DependencyGraph -TEST_SUITE_END() // DYNAMIC_FUSION -TEST_SUITE_END() // UNIT - -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp deleted file mode 100644 index 2b8f69e5e7..0000000000 --- a/tests/validation/CL/UNIT/dynamic_fusion/Floor.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#include "arm_compute/core/TensorInfo.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/experimental/ClWorkload.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/experimental/ClCompositeOperator.h" -#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h" -#include "tests/CL/CLAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h" -#include "tests/validation/Validation.h" - -#include "tests/validation/reference/Floor.h" -#include "tests/validation/reference/Permute.h" - -#ifdef ARM_COMPUTE_ASSERTS_ENABLED -#include "tests/SimpleTensorPrinter.h" -#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ - -using namespace arm_compute::experimental::dynamic_fusion; -using namespace arm_compute::test::validation::utils; - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(CL) -TEST_SUITE(UNIT) -TEST_SUITE(DYNAMIC_FUSION) -TEST_CASE(Operator_Floor_1_F32, framework::DatasetMode::ALL) -{ - /* Computation: - * out = floor(input) - */ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - const auto t_shape = TensorShape(32, 16); - auto t_input_info = TensorInfo(t_shape, 1, data_type, data_layout); - auto t_dst_info = TensorInfo(); - - FloorDescriptor floor_desc{}; - - // Create reference - SimpleTensor ref_t_input{ t_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - - // Fill reference - fill(ref_t_input, 0, library.get()); - - auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U)); - auto t_dst_shape_nchw = t_shape; - permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - - auto ref_t_dst_nchw = reference::floor_layer(ref_t_input_nchw); - const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); - - CLScheduler::get().default_reinit(); - const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - OperatorGraph op_graph; - - const auto op_t_input = add_tensor(op_graph, t_input_info); - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - add_op_floor(op_graph, floor_desc, op_t_input, op_t_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - build(workload, op_graph, workload_ctx); - - ClCompositeOperator op; - op.configure(cl_compile_ctx, workload); - - // Construct tensors - CLTensor t_input{}; - CLTensor t_dst{}; - - // Init tensors - t_input.allocator()->init(t_input_info); - t_dst.allocator()->init(t_dst_info); - - // Allocate and fill tensors - t_input.allocator()->allocate(); - t_dst.allocator()->allocate(); - fill(CLAccessor(t_input), 0, library.get()); - // "Pack" tensors - OpTensorBinding bp_tensors({ { op_t_input, &t_input }, - { op_t_dst, &t_dst } - }); - - // Populate prepare and run pack-maps (including allocating aux tensors) - ClAuxTensorData aux_tensor_data{}; - TensorPackMap prepare_pack_map{}; - TensorPackMap run_pack_map{}; - bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); - - op.prepare(prepare_pack_map); - op.run(run_pack_map); - RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ - validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); -} - -TEST_SUITE_END() // DYNAMIC_FUSION -TEST_SUITE_END() // UNIT -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp b/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp deleted file mode 100644 index 3a8b7c8ce8..0000000000 --- a/tests/validation/CL/UNIT/dynamic_fusion/Integration_OperatorFuseMovenetSubGraph1.cpp +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#include "arm_compute/core/TensorInfo.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/experimental/ClWorkload.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/experimental/ClCompositeOperator.h" -#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h" -#include "src/gpu/cl/operators/ClAdd.h" -#include "src/gpu/cl/operators/ClConv2d.h" -#include "tests/CL/CLAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h" -#include "tests/validation/Validation.h" - -#include "tests/validation/reference/ConvolutionLayer.h" -#include "tests/validation/reference/ElementwiseOperations.h" -#include "tests/validation/reference/Permute.h" - -#ifdef ARM_COMPUTE_ASSERTS_ENABLED -#include "tests/SimpleTensorPrinter.h" -#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ - -using namespace arm_compute::experimental::dynamic_fusion; -using namespace arm_compute::test::validation::utils; - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(CL) -TEST_SUITE(INTEGRATION) -TEST_SUITE(DYNAMIC_FUSION) -TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL) -{ - // Please refer to: https://confluence.arm.com/pages/viewpage.action?pageId=886243697 - /* Computation: - * out = add_desc(addend, conv2d1x1(direct_conv)(input, weights, bias)) - */ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - const auto t_input_shape = TensorShape(384, 12, 12); - // const auto t_weight_shape = TensorShape(384, 1, 1, 64); - // const auto t_dst_shape = TensorShape(64, 12, 12); - const auto t_weight_shape = TensorShape(384, 1, 1, 16); - const auto t_dst_shape = TensorShape(16, 12, 12); - auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout); - auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout); - auto t_l1_addend_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - auto t_acc_info = TensorInfo(); // Intermediate tensor for cond3 - auto t_dst_info = TensorInfo(); - - Conv2dDescriptor conv2d_desc{}; - ElementwiseDescriptor add_desc{ ArithmeticOperation::ADD }; - - // Create reference - SimpleTensor ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_t_weight{ t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_t_bias_placeholder{ t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor ref_t_l1_addend{ t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - - // Fill reference - fill(ref_t_input, 0, library.get()); - fill(ref_t_weight, 1, library.get()); - fill(ref_t_l1_addend, 2, library.get()); - - auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U)); - auto ref_t_weight_nchw = reference::permute(ref_t_weight, PermutationVector(1U, 2U, 0U)); - auto ref_t_bias_placeholder_nchw = reference::permute(ref_t_bias_placeholder, PermutationVector(1U, 2U, 0U)); - auto ref_t_l1_addend_nchw = reference::permute(ref_t_l1_addend, PermutationVector(1U, 2U, 0U)); - auto t_dst_shape_nchw = t_dst_shape; - permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - - PadStrideInfo legacy_pad_stride(conv2d_desc.stride.x(), conv2d_desc.stride.y(), conv2d_desc.pad.left, conv2d_desc.pad.right, conv2d_desc.pad.top, conv2d_desc.pad.bottom, DimensionRoundingType{}); - auto ref_t_dst_nchw = reference::arithmetic_operation( - ArithmeticOperation::ADD, - ref_t_l1_addend_nchw, - reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw, t_dst_shape_nchw, legacy_pad_stride, conv2d_desc.dilation), - data_type, - ConvertPolicy{}); - const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); - - CLScheduler::get().default_reinit(); - const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - OperatorGraph op_graph; - - const auto op_t_input = add_tensor(op_graph, t_input_info); - const auto op_t_weight = add_tensor(op_graph, t_weight_info); - const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info); - const auto op_t_acc = add_tensor(op_graph, t_acc_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc); - force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); - add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - build(workload, op_graph, workload_ctx); - - ClCompositeOperator op; - op.configure(cl_compile_ctx, workload); - - // Construct tensors - CLTensor t_input{}; - CLTensor t_weight{}; - CLTensor t_l1_addend{}; - CLTensor t_dst{}; - - // Init tensors - t_input.allocator()->init(t_input_info); - t_weight.allocator()->init(t_weight_info); - t_l1_addend.allocator()->init(t_dst_info); - t_dst.allocator()->init(t_dst_info); - - // Allocate and fill tensors - t_input.allocator()->allocate(); - t_weight.allocator()->allocate(); - t_l1_addend.allocator()->allocate(); - t_dst.allocator()->allocate(); - fill(CLAccessor(t_input), 0, library.get()); - fill(CLAccessor(t_weight), 1, library.get()); - fill(CLAccessor(t_l1_addend), 2, library.get()); - // "Pack" tensors - OpTensorBinding bp_tensors({ { op_t_input, &t_input }, - { op_t_weight, &t_weight }, - { op_t_l1_addend, &t_l1_addend }, - { op_t_dst, &t_dst } - }); - - // Populate prepare and run pack-maps (including allocating aux tensors) - ClAuxTensorData aux_tensor_data{}; - TensorPackMap prepare_pack_map{}; - TensorPackMap run_pack_map{}; - bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors); - - op.prepare(prepare_pack_map); - op.run(run_pack_map); - RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ - validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); -} -TEST_SUITE(Unsupported) -TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL) -{ - const auto data_type = DataType::QASYMM8; - const auto data_layout = DataLayout::NHWC; - const auto t_input_shape = TensorShape(384, 12, 12); - const auto t_weight_shape = TensorShape(384, 1, 1, 64); - const auto t_dst_shape = TensorShape(64, 12, 12); - auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout); - auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout); - auto t_l1_addend_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - auto t_acc_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - - Conv2dDescriptor conv2d_desc{}; - ElementwiseDescriptor add_desc{}; - - OperatorGraph op_graph; - - const auto op_t_input = add_tensor(op_graph, t_input_info); - const auto op_t_weight = add_tensor(op_graph, t_weight_info); - const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info); - const auto op_t_acc = add_tensor(op_graph, t_acc_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc); - add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst); - force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - const auto success = build(workload, op_graph, workload_ctx); - - ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS); -} -TEST_CASE(DataLayout_NCHW, framework::DatasetMode::ALL) -{ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NCHW; - const auto t_input_shape = TensorShape(384, 12, 12); - const auto t_weight_shape = TensorShape(384, 1, 1, 64); - const auto t_dst_shape = TensorShape(64, 12, 12); - auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout); - auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout); - auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout); - - Conv2dDescriptor conv2d_desc{}; - - OperatorGraph op_graph; - - const auto op_t_input = add_tensor(op_graph, t_input_info); - const auto op_t_weight = add_tensor(op_graph, t_weight_info); - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_dst); - force_conv2d_method(op_graph, conv2d, ConvolutionMethod::DIRECT); - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - const auto success = build(workload, op_graph, workload_ctx); - - ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS); -} -TEST_SUITE_END() // Unsupported - -TEST_SUITE(Invalid) -TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) -{ - /* Computation: - * out = conv2d(conv2d(l0_input, l0_weight), l1_weight) - */ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - const auto t_l0_input_shape = TensorShape(1024, 56, 56); - const auto t_l0_weight_shape = TensorShape(512, 1024, 1, 1); - const auto t_l1_weight_shape = TensorShape(512, 256, 1, 1); - - auto t_l0_input_info = TensorInfo(t_l0_input_shape, 1, data_type, data_layout); - auto t_l0_weight_info = TensorInfo(t_l0_weight_shape, 1, data_type, data_layout); - auto t_l1_weight_info = TensorInfo(t_l1_weight_shape, 1, data_type, data_layout); - auto t_l0_dst_info = TensorInfo(); - auto t_dst_info = TensorInfo(); - - OperatorGraph op_graph; - const auto conv2d_desc = Conv2dDescriptor{}; - - const auto op_t_l0_input = add_tensor(op_graph, t_l0_input_info); - const auto op_t_l0_weight = add_tensor(op_graph, t_l0_weight_info); - const auto op_t_l1_weight = add_tensor(op_graph, t_l1_weight_info); - const auto op_t_l0_dst = add_tensor(op_graph, t_l0_dst_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - add_op_conv2d(op_graph, conv2d_desc, op_t_l0_input, op_t_l0_weight, op_t_l0_dst); - add_op_conv2d(op_graph, conv2d_desc, op_t_l0_dst, op_t_l1_weight, op_t_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - const auto success = build(workload, op_graph, workload_ctx); - - ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS); -} -TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL) -{ - /* Computation: - * out = add(l2_lhs, add(add(l0_lhs, l0_rhs), l1_rhs)) - */ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - const auto t_l0_lhs_shape = TensorShape(1, 256, 3); - const auto t_l0_rhs_shape = TensorShape(1, 256, 3); - const auto t_l1_rhs_shape = TensorShape(1, 1, 3); - const auto t_l2_lhs_shape = TensorShape(1024, 1, 3); - - auto t_l0_lhs_info = TensorInfo(t_l0_lhs_shape, 1, data_type, data_layout); - auto t_l0_rhs_info = TensorInfo(t_l0_rhs_shape, 1, data_type, data_layout); - auto t_l1_rhs_info = TensorInfo(t_l1_rhs_shape, 1, data_type, data_layout); - auto t_l2_lhs_info = TensorInfo(t_l2_lhs_shape, 1, data_type, data_layout); - auto t_l0_dst_info = TensorInfo(); - auto t_l1_dst_info = TensorInfo(); - auto t_dst_info = TensorInfo(); - - OperatorGraph op_graph; - const auto add_desc = ElementwiseDescriptor{}; - - const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info); - const auto op_t_l0_rhs = add_tensor(op_graph, t_l0_rhs_info); - const auto op_t_l1_rhs = add_tensor(op_graph, t_l1_rhs_info); - const auto op_t_l2_lhs = add_tensor(op_graph, t_l2_lhs_info); - const auto op_t_l0_dst = add_tensor(op_graph, t_l0_dst_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_l1_dst = add_tensor(op_graph, t_l1_dst_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - add_op_elementwise_op(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst); - add_op_elementwise_op(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst); - add_op_elementwise_op(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - const auto success = build(workload, op_graph, workload_ctx); - - ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS); -} -TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL) -{ - /* Computation: - * out = add(conv(l0_0_input, l0_0_weight), add(l0_1_lhs, l0_1_rhs)) - */ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - - const auto t_l0_0_input_shape = TensorShape(128, 21, 21); - const auto t_l0_0_weight_shape = TensorShape(144, 128, 1, 1); - const auto t_l0_1_lhs_shape = TensorShape(144, 21, 21); - const auto t_l0_1_rhs_shape = TensorShape(1, 1, 21); - - auto t_l0_0_input_info = TensorInfo(t_l0_0_input_shape, 1, data_type, data_layout); - auto t_l0_0_weight_info = TensorInfo(t_l0_0_weight_shape, 1, data_type, data_layout); - auto t_l0_1_lhs_info = TensorInfo(t_l0_1_lhs_shape, 1, data_type, data_layout); - auto t_l0_1_rhs_info = TensorInfo(t_l0_1_rhs_shape, 1, data_type, data_layout); - auto t_l0_0_dst_info = TensorInfo(); - auto t_l0_1_dst_info = TensorInfo(); - auto t_dst_info = TensorInfo(); - - OperatorGraph op_graph; - const auto conv2d_desc = Conv2dDescriptor{}; - const auto add_desc = ElementwiseDescriptor{}; - - const auto op_t_l0_0_input = add_tensor(op_graph, t_l0_0_input_info); - const auto op_t_l0_0_weight = add_tensor(op_graph, t_l0_0_weight_info); - const auto op_t_l0_1_lhs = add_tensor(op_graph, t_l0_1_lhs_info); - const auto op_t_l0_1_rhs = add_tensor(op_graph, t_l0_1_rhs_info); - const auto op_t_l0_0_dst = add_tensor(op_graph, t_l0_0_dst_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_l0_1_dst = add_tensor(op_graph, t_l0_1_dst_info); // temp accumulator; TensorInfo to be inferred - const auto op_t_dst = add_tensor(op_graph, t_dst_info); - - add_op_conv2d(op_graph, conv2d_desc, op_t_l0_0_input, op_t_l0_0_weight, op_t_l0_0_dst); - add_op_elementwise_op(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst); - add_op_elementwise_op(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - const auto success = build(workload, op_graph, workload_ctx); - - ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS); -} -TEST_CASE(Loop, framework::DatasetMode::ALL) -{ - /* Computation: - * tensor state0; - * state1 = add(l0_lhs, state0) - * state0 = add(l1_lhs, state1) - */ - const auto data_type = DataType::F32; - const auto data_layout = DataLayout::NHWC; - - const auto t_shape = TensorShape(13, 21); - - auto t_l0_lhs_info = TensorInfo(t_shape, 1, data_type, data_layout); - auto t_l1_lhs_info = TensorInfo(t_shape, 1, data_type, data_layout); - auto state0_info = TensorInfo(t_shape, 1, data_type, data_layout); - auto state1_info = TensorInfo(); - - OperatorGraph op_graph; - const auto conv2d_desc = Conv2dDescriptor{}; - const auto add_desc = ElementwiseDescriptor{}; - - const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info); - const auto op_t_l1_lhs = add_tensor(op_graph, t_l1_lhs_info); - const auto op_t_state0 = add_tensor(op_graph, state0_info); - const auto op_t_state1 = add_tensor(op_graph, state1_info); - - add_op_conv2d(op_graph, conv2d_desc, op_t_l0_lhs, op_t_state0, op_t_state1); - add_op_elementwise_op(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0); - - const ClWorkloadContext workload_ctx{ GpuInfo{ CLScheduler::get().target() } }; - ClWorkload workload; - const auto success = build(workload, op_graph, workload_ctx); - - ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS); -} -TEST_SUITE_END() // Invalid - -TEST_SUITE_END() // DYNAMIC_FUSION -TEST_SUITE_END() // INTEGRATION -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/tests/validation/CL/UNIT/dynamic_fusion/Utils.h b/tests/validation/CL/UNIT/dynamic_fusion/Utils.h deleted file mode 100644 index 4512305c1e..0000000000 --- a/tests/validation/CL/UNIT/dynamic_fusion/Utils.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TESTS_VALIDATION_CL_DYNAMICFUSION_UTILS -#define TESTS_VALIDATION_CL_DYNAMICFUSION_UTILS - -#include "tests/AssetsLibrary.h" -#include "utils/Utils.h" - -#include -#include -#include - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace utils -{ -/** A pair of macros which measures the wall clock time, and records it into a map measurement_map with name clock_name - * - */ -#define TICK(clock_name) \ - auto clock_name##_tick = std::chrono::high_resolution_clock::now(); -#define TOCK(clock_name, measurement_map) \ - auto clock_name##_tock = std::chrono::high_resolution_clock::now(); \ - measurement_map["\"" #clock_name "\""] = duration_cast(clock_name##_tock - clock_name##_tick); -#define TOCK_AVG(clock_name, measurement_map, num_iterations) \ - auto clock_name##_tock = std::chrono::high_resolution_clock::now(); \ - measurement_map["\"" #clock_name "\""] = duration_cast((clock_name##_tock - clock_name##_tick) / (num_iterations)); - -template -void fill(U &&tensor, int seed, AssetsLibrary *library) -{ - static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); - using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; - - DistributionType distribution{ T(-1.0f), T(1.0f) }; - library->fill(tensor, distribution, seed); - - // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) - DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; - library->fill_borders_with_garbage(tensor, distribution_inf, seed); -} -} // namespace utils -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif //TESTS_VALIDATION_CL_DYNAMICFUSION_UTILS \ No newline at end of file -- cgit v1.2.1