diff options
Diffstat (limited to 'tests/validation/dynamic_fusion/gpu/Integration.cpp')
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/Integration.cpp | 240 |
1 files changed, 128 insertions, 112 deletions
diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index 89cca5cd66..bb9c008f01 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,11 +37,10 @@ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" - #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h" + #include "tests/CL/CLAccessor.h" #include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" #include "tests/validation/dynamic_fusion/Utils.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/ConvolutionLayer.h" @@ -50,6 +49,7 @@ #include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/PixelWiseMultiplication.h" +#include "tests/validation/Validation.h" using namespace arm_compute::experimental::dynamic_fusion; using namespace arm_compute::test::validation::utils; @@ -79,18 +79,18 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Fuse conv2d Conv2dAttributes conv2d_attr{}; - TensorInfo input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); - TensorInfo weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); + ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); - ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr); + ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr); - TensorInfo dst_info = context.create_tensor_info(); - GpuOutput::create_op(sketch, conv_out_info, &dst_info); + ITensorInfo *dst_info = context.create_tensor_info(); + GpuOutput::create_op(sketch, conv_out_info, dst_info); // Configure runtime ClWorkloadRuntime runtime; @@ -98,7 +98,7 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) // (Important) Allocate auxiliary tensor memory if there are any // Instead of using ACL allocated memory, the user can choose to import memory into the tensors - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -115,9 +115,9 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) CLTensor t_dst{}; // Initialize user tensors - t_input.allocator()->init(input_info); - t_weight.allocator()->init(weight_info); - t_dst.allocator()->init(dst_info); + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_dst.allocator()->init(*dst_info); // Allocate and fill user tensors // Instead of using ACL allocator, the user can choose to import memory into the tensors @@ -128,12 +128,12 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) fill<float>(CLAccessor(t_weight), 1, library.get()); // Run runtime - runtime.run({ &t_input, &t_weight, &t_dst }); + runtime.run({&t_input, &t_weight, &t_dst}); // Create reference - SimpleTensor<float> ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor<float> ref_t_weight{ t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; - SimpleTensor<float> ref_t_bias_placeholder{ t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC }; + SimpleTensor<float> ref_t_input{t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC}; + SimpleTensor<float> ref_t_weight{t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC}; + SimpleTensor<float> ref_t_bias_placeholder{t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC}; // Fill reference fill<float>(ref_t_input, 0, library.get()); @@ -145,12 +145,15 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) auto t_dst_shape_nchw = t_dst_shape; permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U)); - PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, + PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, + conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, DimensionRoundingType{}); - auto ref_t_dst_nchw = reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw, t_dst_shape_nchw, legacy_pad_stride, conv2d_attr.dilation()); - const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); + auto ref_t_dst_nchw = reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw, + t_dst_shape_nchw, legacy_pad_stride, conv2d_attr.dilation()); + const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); - RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + RelativeTolerance<float> tolerance_f32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); } #endif // ACL_INTERNAL_TEST_CKW_IN_DF @@ -167,20 +170,20 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; - TensorInfo in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo out_0_info = context.create_tensor_info(); - TensorInfo out_1_info = context.create_tensor_info(); + ITensorInfo *out_0_info = context.create_tensor_info(); + ITensorInfo *out_1_info = context.create_tensor_info(); - ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info); - GpuOutput::create_op(sketch, ans_0_info, &out_0_info); - ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, &in_2_info); - GpuOutput::create_op(sketch, ans_1_info, &out_1_info); + ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info); + GpuOutput::create_op(sketch, ans_0_info, out_0_info); + ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info); + GpuOutput::create_op(sketch, ans_1_info, out_1_info); // Configure runtime ClWorkloadRuntime runtime; @@ -188,7 +191,7 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) // (Important) Allocate auxiliary tensor memory if there are any // Instead of using ACL allocated memory, the user can choose to import memory into the tensors - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -208,12 +211,12 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) CLTensor t_out_1{}; // Initialize user tensors - t_in_0.allocator()->init(in_0_info); - t_in_1.allocator()->init(in_1_info); - t_in_2.allocator()->init(in_2_info); + t_in_0.allocator()->init(*in_0_info); + t_in_1.allocator()->init(*in_1_info); + t_in_2.allocator()->init(*in_2_info); - t_out_0.allocator()->init(out_0_info); - t_out_1.allocator()->init(out_1_info); + t_out_0.allocator()->init(*out_0_info); + t_out_1.allocator()->init(*out_1_info); // Allocate and fill user tensors // Instead of using ACL allocator, the user can choose to import memory into the tensors @@ -229,15 +232,15 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) fill<float>(CLAccessor(t_in_2), 2, library.get()); // Run runtime - runtime.run({ &t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1 }); + runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1}); // Create reference - SimpleTensor<float> ref_t_in_0{ t_input_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<float> ref_t_in_1{ t_input_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<float> ref_t_in_2{ t_input_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()}; - SimpleTensor<float> ref_t_out_0{ t_input_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<float> ref_t_out_1{ t_input_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_out_1{t_input_shape, data_type, 1, QuantizationInfo()}; // Fill reference fill<float>(ref_t_in_0, 0, library.get()); @@ -245,9 +248,11 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) fill<float>(ref_t_in_2, 2, library.get()); reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP); - reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_out_1, ConvertPolicy::WRAP); + reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_out_1, + ConvertPolicy::WRAP); - RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + RelativeTolerance<float> tolerance_f32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_f32); validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_f32); } @@ -264,15 +269,15 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; - TensorInfo in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo out_0_info = context.create_tensor_info(); - TensorInfo out_1_info = context.create_tensor_info(); + ITensorInfo *out_0_info = context.create_tensor_info(); + ITensorInfo *out_1_info = context.create_tensor_info(); CastAttributes cast_0_attr; cast_0_attr.data_type(DataType::S32).convert_policy(ConvertPolicy::SATURATE); @@ -280,12 +285,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) CastAttributes cast_1_attr; cast_1_attr.data_type(DataType::F32).convert_policy(ConvertPolicy::SATURATE); - ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info); - GpuOutput::create_op(sketch, ans_0_info, &out_0_info); - ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, &in_2_info); + ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info); + GpuOutput::create_op(sketch, ans_0_info, out_0_info); + ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info); ITensorInfo *ans_2_info = GpuCast::create_op(sketch, ans_1_info, cast_0_attr); ITensorInfo *ans_3_info = GpuCast::create_op(sketch, ans_2_info, cast_1_attr); - GpuOutput::create_op(sketch, ans_3_info, &out_1_info); + GpuOutput::create_op(sketch, ans_3_info, out_1_info); // Configure runtime ClWorkloadRuntime runtime; @@ -293,7 +298,7 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) // (Important) Allocate auxiliary tensor memory if there are any // Instead of using ACL allocated memory, the user can choose to import memory into the tensors - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -313,12 +318,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) CLTensor t_out_1{}; // Initialize user tensors - t_in_0.allocator()->init(in_0_info); - t_in_1.allocator()->init(in_1_info); - t_in_2.allocator()->init(in_2_info); + t_in_0.allocator()->init(*in_0_info); + t_in_1.allocator()->init(*in_1_info); + t_in_2.allocator()->init(*in_2_info); - t_out_0.allocator()->init(out_0_info); - t_out_1.allocator()->init(out_1_info); + t_out_0.allocator()->init(*out_0_info); + t_out_1.allocator()->init(*out_1_info); // Allocate and fill user tensors // Instead of using ACL allocator, the user can choose to import memory into the tensors @@ -334,15 +339,15 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) fill<float>(CLAccessor(t_in_2), 2, library.get()); // Run runtime - runtime.run({ &t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1 }); + runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1}); // Create reference - SimpleTensor<float> ref_t_in_0{ t_input_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<float> ref_t_in_1{ t_input_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<float> ref_t_in_2{ t_input_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()}; - SimpleTensor<float> ref_t_out_0{ t_input_shape, data_type, 1, QuantizationInfo() }; - SimpleTensor<float> ref_t_ans_1{ t_input_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_ans_1{t_input_shape, data_type, 1, QuantizationInfo()}; // Fill reference fill<float>(ref_t_in_0, 0, library.get()); @@ -350,9 +355,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) fill<float>(ref_t_in_2, 2, library.get()); reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP); - reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1, ConvertPolicy::WRAP); - const auto ref_t_ans_2 = reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0); - const auto ref_t_out_1 = reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0); + reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1, + ConvertPolicy::WRAP); + const auto ref_t_ans_2 = + reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0); + const auto ref_t_out_1 = + reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0); RelativeTolerance<float> tolerance_add_f32(0.001f); AbsoluteTolerance<float> tolerance_cast_f32(1.0f); @@ -436,20 +444,22 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) Conv2dAttributes conv2d_attr; auto tensor1_info = context.create_tensor_info(conv2d_wei_shape, 1, DataType::F32, DataLayout::NHWC); auto tensor2_info = context.create_tensor_info(conv2d_bia_shape, 1, DataType::F32, DataLayout::NHWC); - ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, &tensor0_info, &tensor1_info, &tensor2_info, conv2d_attr), framework::LogLevel::ERRORS); - auto ans_info = GpuConv2d::create_op(sketch0, &tensor0_info, &tensor1_info, &tensor2_info, conv2d_attr); + ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr), + framework::LogLevel::ERRORS); + auto ans_info = GpuConv2d::create_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr); ARM_COMPUTE_EXPECT(GpuSigmoid::validate_op(sketch0, ans_info), framework::LogLevel::ERRORS); ans_info = GpuSigmoid::create_op(sketch0, ans_info); DepthwiseConv2dAttributes dwc_attr; - auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC); - auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC); - ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, &tensor3_info, &tensor4_info, dwc_attr), framework::LogLevel::ERRORS); + auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC); + auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC); + ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, tensor3_info, tensor4_info, dwc_attr), + framework::LogLevel::ERRORS); auto tensor5_info = context.create_tensor_info(); - ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, &tensor5_info), framework::LogLevel::ERRORS); - GpuOutput::create_op(sketch0, ans_info, &tensor5_info); + ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, tensor5_info), framework::LogLevel::ERRORS); + GpuOutput::create_op(sketch0, ans_info, tensor5_info); // Create the first workload runtime. ClWorkloadRuntime runtime0; @@ -458,15 +468,16 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) // Create the second sketch: dwc + sigmoid + output. GpuWorkloadSketch sketch1(&context); - ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, &tensor5_info, &tensor3_info, &tensor4_info, dwc_attr), framework::LogLevel::ERRORS); - ans_info = GpuDepthwiseConv2d::create_op(sketch1, &tensor5_info, &tensor3_info, &tensor4_info, dwc_attr); + ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr), + framework::LogLevel::ERRORS); + ans_info = GpuDepthwiseConv2d::create_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr); - ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, &tensor2_info), framework::LogLevel::ERRORS); - ans_info = GpuMul::create_op(sketch1, ans_info, &tensor2_info); + ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, tensor2_info), framework::LogLevel::ERRORS); + ans_info = GpuMul::create_op(sketch1, ans_info, tensor2_info); auto tensor6_info = context.create_tensor_info(); - ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, &tensor6_info), framework::LogLevel::ERRORS); - GpuOutput::create_op(sketch1, ans_info, &tensor6_info); + ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, tensor6_info), framework::LogLevel::ERRORS); + GpuOutput::create_op(sketch1, ans_info, tensor6_info); // Create the second workload runtime. ClWorkloadRuntime runtime1; @@ -481,13 +492,13 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) CLTensor tensor5; CLTensor tensor6; - tensor0.allocator()->init(tensor0_info); - tensor1.allocator()->init(tensor1_info); - tensor2.allocator()->init(tensor2_info); - tensor3.allocator()->init(tensor3_info); - tensor4.allocator()->init(tensor4_info); - tensor5.allocator()->init(tensor5_info); - tensor6.allocator()->init(tensor6_info); + tensor0.allocator()->init(*tensor0_info); + tensor1.allocator()->init(*tensor1_info); + tensor2.allocator()->init(*tensor2_info); + tensor3.allocator()->init(*tensor3_info); + tensor4.allocator()->init(*tensor4_info); + tensor5.allocator()->init(*tensor5_info); + tensor6.allocator()->init(*tensor6_info); tensor0.allocator()->allocate(); tensor1.allocator()->allocate(); @@ -498,7 +509,7 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) tensor6.allocator()->allocate(); // Allocate the auxiliary tensors. - for(auto &data : runtime0.get_auxiliary_tensors()) + for (auto &data : runtime0.get_auxiliary_tensors()) { auto tensor = std::get<0>(data); auto &tensor_info = std::get<1>(data); @@ -508,7 +519,7 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) tensor->allocator()->allocate(); } - for(auto &data : runtime1.get_auxiliary_tensors()) + for (auto &data : runtime1.get_auxiliary_tensors()) { auto tensor = std::get<0>(data); auto &tensor_info = std::get<1>(data); @@ -526,8 +537,8 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) fill<float>(CLAccessor(tensor4), 4, library.get()); // Run each runtime. - runtime0.run({ &tensor0, &tensor1, &tensor2, &tensor5 }); - runtime1.run({ &tensor5, &tensor3, &tensor4, &tensor2, &tensor6 }); + runtime0.run({&tensor0, &tensor1, &tensor2, &tensor5}); + runtime1.run({&tensor5, &tensor3, &tensor4, &tensor2, &tensor6}); // Compute the reference result. SimpleTensor<float> ref_conv2d_src(conv2d_src_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); @@ -549,18 +560,22 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) const auto ref_conv2d_src_nchw = reference::permute(ref_conv2d_src, nhwc_to_nchw); const auto ref_conv2d_wei_nchw = reference::permute(ref_conv2d_wei, nhwc_to_nchw); const auto ref_conv2d_bia_nchw = reference::permute(ref_conv2d_bia, nhwc_to_nchw); - const auto ref_conv2d_dst_nchw = reference::convolution_layer(ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo()); + const auto ref_conv2d_dst_nchw = reference::convolution_layer( + ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo()); - const auto ref_sigmoid_dst_nchw = reference::activation_layer(ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + const auto ref_sigmoid_dst_nchw = reference::activation_layer( + ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); auto dwc_dst_shape_nchw = dwc_dst_shape; permute(dwc_dst_shape_nchw, nhwc_to_nchw); const auto ref_dwc_wei_nchw = reference::permute(ref_dwc_wei, nhwc_to_nchw); const auto ref_dwc_bia_nchw = reference::permute(ref_dwc_bia, nhwc_to_nchw); - const auto ref_dwc_dst_nchw = reference::depthwise_convolution(ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1); + const auto ref_dwc_dst_nchw = reference::depthwise_convolution( + ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1); - const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>(ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, - DataType::F32); + const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>( + ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, + DataType::F32); constexpr RelativeTolerance<float> tolerance(0.001f); validate(CLAccessor(tensor6), ref_mul_dst_nchw, tolerance); @@ -587,34 +602,35 @@ TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Create tensor infos - TensorInfo input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); - TensorInfo weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); + ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); ITensorInfo *dst_info; // Fuse conv2d into the workload { // Validate operator - const Status success = GpuConv2d::validate_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr); + const Status success = GpuConv2d::validate_op(sketch, input_info, weight_info, nullptr, conv2d_attr); ARM_COMPUTE_EXPECT(bool(success), framework::LogLevel::ERRORS); - dst_info = GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr); + dst_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr); } // Create tensor infos - TensorInfo weight_info_2 = context.create_tensor_info(t_weight_info); + ITensorInfo *weight_info_2 = context.create_tensor_info(t_weight_info); // Fuse conv2d into the workload { // Validate operator, should fail - const Status success = GpuConv2d::validate_op(sketch, dst_info, &weight_info_2, nullptr, conv2d_attr); - const auto expected_error_str = "Operator fusion test failed. This operator cannot be fused into the workload"; + const Status success = GpuConv2d::validate_op(sketch, dst_info, weight_info_2, nullptr, conv2d_attr); + const auto expected_error_str = "Operator fusion test failed. This operator cannot be fused into the workload"; ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT((success.error_description().find(expected_error_str) != std::string::npos), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT((success.error_description().find(expected_error_str) != std::string::npos), + framework::LogLevel::ERRORS); } } TEST_SUITE_END() // Invalid_Fusion_Should_Fail |