aboutsummaryrefslogtreecommitdiff
path: root/tests/validation/dynamic_fusion/gpu/Integration.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validation/dynamic_fusion/gpu/Integration.cpp')
-rw-r--r--tests/validation/dynamic_fusion/gpu/Integration.cpp240
1 files changed, 128 insertions, 112 deletions
diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp
index 89cca5cd66..bb9c008f01 100644
--- a/tests/validation/dynamic_fusion/gpu/Integration.cpp
+++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,11 +37,10 @@
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
-
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h"
+
#include "tests/CL/CLAccessor.h"
#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
#include "tests/validation/dynamic_fusion/Utils.h"
#include "tests/validation/reference/ActivationLayer.h"
#include "tests/validation/reference/ConvolutionLayer.h"
@@ -50,6 +49,7 @@
#include "tests/validation/reference/ElementwiseOperations.h"
#include "tests/validation/reference/Permute.h"
#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include "tests/validation/Validation.h"
using namespace arm_compute::experimental::dynamic_fusion;
using namespace arm_compute::test::validation::utils;
@@ -79,18 +79,18 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL)
// Create a new workload sketch
auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
- auto context = GpuWorkloadContext{ &cl_compile_ctx };
- GpuWorkloadSketch sketch{ &context };
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
// Fuse conv2d
Conv2dAttributes conv2d_attr{};
- TensorInfo input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout);
- TensorInfo weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout));
+ ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout);
+ ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout));
- ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr);
+ ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr);
- TensorInfo dst_info = context.create_tensor_info();
- GpuOutput::create_op(sketch, conv_out_info, &dst_info);
+ ITensorInfo *dst_info = context.create_tensor_info();
+ GpuOutput::create_op(sketch, conv_out_info, dst_info);
// Configure runtime
ClWorkloadRuntime runtime;
@@ -98,7 +98,7 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL)
// (Important) Allocate auxiliary tensor memory if there are any
// Instead of using ACL allocated memory, the user can choose to import memory into the tensors
- for(auto &data : runtime.get_auxiliary_tensors())
+ for (auto &data : runtime.get_auxiliary_tensors())
{
CLTensor *tensor = std::get<0>(data);
TensorInfo info = std::get<1>(data);
@@ -115,9 +115,9 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL)
CLTensor t_dst{};
// Initialize user tensors
- t_input.allocator()->init(input_info);
- t_weight.allocator()->init(weight_info);
- t_dst.allocator()->init(dst_info);
+ t_input.allocator()->init(*input_info);
+ t_weight.allocator()->init(*weight_info);
+ t_dst.allocator()->init(*dst_info);
// Allocate and fill user tensors
// Instead of using ACL allocator, the user can choose to import memory into the tensors
@@ -128,12 +128,12 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL)
fill<float>(CLAccessor(t_weight), 1, library.get());
// Run runtime
- runtime.run({ &t_input, &t_weight, &t_dst });
+ runtime.run({&t_input, &t_weight, &t_dst});
// Create reference
- SimpleTensor<float> ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
- SimpleTensor<float> ref_t_weight{ t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
- SimpleTensor<float> ref_t_bias_placeholder{ t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
+ SimpleTensor<float> ref_t_input{t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC};
+ SimpleTensor<float> ref_t_weight{t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC};
+ SimpleTensor<float> ref_t_bias_placeholder{t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC};
// Fill reference
fill<float>(ref_t_input, 0, library.get());
@@ -145,12 +145,15 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL)
auto t_dst_shape_nchw = t_dst_shape;
permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U));
- PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom,
+ PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left,
+ conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom,
DimensionRoundingType{});
- auto ref_t_dst_nchw = reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw, t_dst_shape_nchw, legacy_pad_stride, conv2d_attr.dilation());
- const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U));
+ auto ref_t_dst_nchw = reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw,
+ t_dst_shape_nchw, legacy_pad_stride, conv2d_attr.dilation());
+ const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U));
- RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+ RelativeTolerance<float> tolerance_f32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32);
}
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -167,20 +170,20 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
// Create a new workload sketch
auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
- auto context = GpuWorkloadContext{ &cl_compile_ctx };
- GpuWorkloadSketch sketch{ &context };
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
- TensorInfo in_0_info = context.create_tensor_info(t_input_shape, 1, data_type);
- TensorInfo in_1_info = context.create_tensor_info(t_input_shape, 1, data_type);
- TensorInfo in_2_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type);
- TensorInfo out_0_info = context.create_tensor_info();
- TensorInfo out_1_info = context.create_tensor_info();
+ ITensorInfo *out_0_info = context.create_tensor_info();
+ ITensorInfo *out_1_info = context.create_tensor_info();
- ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info);
- GpuOutput::create_op(sketch, ans_0_info, &out_0_info);
- ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, &in_2_info);
- GpuOutput::create_op(sketch, ans_1_info, &out_1_info);
+ ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info);
+ GpuOutput::create_op(sketch, ans_0_info, out_0_info);
+ ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info);
+ GpuOutput::create_op(sketch, ans_1_info, out_1_info);
// Configure runtime
ClWorkloadRuntime runtime;
@@ -188,7 +191,7 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
// (Important) Allocate auxiliary tensor memory if there are any
// Instead of using ACL allocated memory, the user can choose to import memory into the tensors
- for(auto &data : runtime.get_auxiliary_tensors())
+ for (auto &data : runtime.get_auxiliary_tensors())
{
CLTensor *tensor = std::get<0>(data);
TensorInfo info = std::get<1>(data);
@@ -208,12 +211,12 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
CLTensor t_out_1{};
// Initialize user tensors
- t_in_0.allocator()->init(in_0_info);
- t_in_1.allocator()->init(in_1_info);
- t_in_2.allocator()->init(in_2_info);
+ t_in_0.allocator()->init(*in_0_info);
+ t_in_1.allocator()->init(*in_1_info);
+ t_in_2.allocator()->init(*in_2_info);
- t_out_0.allocator()->init(out_0_info);
- t_out_1.allocator()->init(out_1_info);
+ t_out_0.allocator()->init(*out_0_info);
+ t_out_1.allocator()->init(*out_1_info);
// Allocate and fill user tensors
// Instead of using ACL allocator, the user can choose to import memory into the tensors
@@ -229,15 +232,15 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
fill<float>(CLAccessor(t_in_2), 2, library.get());
// Run runtime
- runtime.run({ &t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1 });
+ runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1});
// Create reference
- SimpleTensor<float> ref_t_in_0{ t_input_shape, data_type, 1, QuantizationInfo() };
- SimpleTensor<float> ref_t_in_1{ t_input_shape, data_type, 1, QuantizationInfo() };
- SimpleTensor<float> ref_t_in_2{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()};
- SimpleTensor<float> ref_t_out_0{ t_input_shape, data_type, 1, QuantizationInfo() };
- SimpleTensor<float> ref_t_out_1{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_out_1{t_input_shape, data_type, 1, QuantizationInfo()};
// Fill reference
fill<float>(ref_t_in_0, 0, library.get());
@@ -245,9 +248,11 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
fill<float>(ref_t_in_2, 2, library.get());
reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP);
- reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_out_1, ConvertPolicy::WRAP);
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_out_1,
+ ConvertPolicy::WRAP);
- RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+ RelativeTolerance<float> tolerance_f32(
+ 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_f32);
validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_f32);
}
@@ -264,15 +269,15 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
// Create a new workload sketch
auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
- auto context = GpuWorkloadContext{ &cl_compile_ctx };
- GpuWorkloadSketch sketch{ &context };
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
- TensorInfo in_0_info = context.create_tensor_info(t_input_shape, 1, data_type);
- TensorInfo in_1_info = context.create_tensor_info(t_input_shape, 1, data_type);
- TensorInfo in_2_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type);
+ ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type);
- TensorInfo out_0_info = context.create_tensor_info();
- TensorInfo out_1_info = context.create_tensor_info();
+ ITensorInfo *out_0_info = context.create_tensor_info();
+ ITensorInfo *out_1_info = context.create_tensor_info();
CastAttributes cast_0_attr;
cast_0_attr.data_type(DataType::S32).convert_policy(ConvertPolicy::SATURATE);
@@ -280,12 +285,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
CastAttributes cast_1_attr;
cast_1_attr.data_type(DataType::F32).convert_policy(ConvertPolicy::SATURATE);
- ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info);
- GpuOutput::create_op(sketch, ans_0_info, &out_0_info);
- ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, &in_2_info);
+ ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info);
+ GpuOutput::create_op(sketch, ans_0_info, out_0_info);
+ ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info);
ITensorInfo *ans_2_info = GpuCast::create_op(sketch, ans_1_info, cast_0_attr);
ITensorInfo *ans_3_info = GpuCast::create_op(sketch, ans_2_info, cast_1_attr);
- GpuOutput::create_op(sketch, ans_3_info, &out_1_info);
+ GpuOutput::create_op(sketch, ans_3_info, out_1_info);
// Configure runtime
ClWorkloadRuntime runtime;
@@ -293,7 +298,7 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
// (Important) Allocate auxiliary tensor memory if there are any
// Instead of using ACL allocated memory, the user can choose to import memory into the tensors
- for(auto &data : runtime.get_auxiliary_tensors())
+ for (auto &data : runtime.get_auxiliary_tensors())
{
CLTensor *tensor = std::get<0>(data);
TensorInfo info = std::get<1>(data);
@@ -313,12 +318,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
CLTensor t_out_1{};
// Initialize user tensors
- t_in_0.allocator()->init(in_0_info);
- t_in_1.allocator()->init(in_1_info);
- t_in_2.allocator()->init(in_2_info);
+ t_in_0.allocator()->init(*in_0_info);
+ t_in_1.allocator()->init(*in_1_info);
+ t_in_2.allocator()->init(*in_2_info);
- t_out_0.allocator()->init(out_0_info);
- t_out_1.allocator()->init(out_1_info);
+ t_out_0.allocator()->init(*out_0_info);
+ t_out_1.allocator()->init(*out_1_info);
// Allocate and fill user tensors
// Instead of using ACL allocator, the user can choose to import memory into the tensors
@@ -334,15 +339,15 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
fill<float>(CLAccessor(t_in_2), 2, library.get());
// Run runtime
- runtime.run({ &t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1 });
+ runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1});
// Create reference
- SimpleTensor<float> ref_t_in_0{ t_input_shape, data_type, 1, QuantizationInfo() };
- SimpleTensor<float> ref_t_in_1{ t_input_shape, data_type, 1, QuantizationInfo() };
- SimpleTensor<float> ref_t_in_2{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()};
- SimpleTensor<float> ref_t_out_0{ t_input_shape, data_type, 1, QuantizationInfo() };
- SimpleTensor<float> ref_t_ans_1{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()};
+ SimpleTensor<float> ref_t_ans_1{t_input_shape, data_type, 1, QuantizationInfo()};
// Fill reference
fill<float>(ref_t_in_0, 0, library.get());
@@ -350,9 +355,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
fill<float>(ref_t_in_2, 2, library.get());
reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP);
- reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1, ConvertPolicy::WRAP);
- const auto ref_t_ans_2 = reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0);
- const auto ref_t_out_1 = reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0);
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1,
+ ConvertPolicy::WRAP);
+ const auto ref_t_ans_2 =
+ reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0);
+ const auto ref_t_out_1 =
+ reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0);
RelativeTolerance<float> tolerance_add_f32(0.001f);
AbsoluteTolerance<float> tolerance_cast_f32(1.0f);
@@ -436,20 +444,22 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
Conv2dAttributes conv2d_attr;
auto tensor1_info = context.create_tensor_info(conv2d_wei_shape, 1, DataType::F32, DataLayout::NHWC);
auto tensor2_info = context.create_tensor_info(conv2d_bia_shape, 1, DataType::F32, DataLayout::NHWC);
- ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, &tensor0_info, &tensor1_info, &tensor2_info, conv2d_attr), framework::LogLevel::ERRORS);
- auto ans_info = GpuConv2d::create_op(sketch0, &tensor0_info, &tensor1_info, &tensor2_info, conv2d_attr);
+ ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr),
+ framework::LogLevel::ERRORS);
+ auto ans_info = GpuConv2d::create_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr);
ARM_COMPUTE_EXPECT(GpuSigmoid::validate_op(sketch0, ans_info), framework::LogLevel::ERRORS);
ans_info = GpuSigmoid::create_op(sketch0, ans_info);
DepthwiseConv2dAttributes dwc_attr;
- auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC);
- auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC);
- ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, &tensor3_info, &tensor4_info, dwc_attr), framework::LogLevel::ERRORS);
+ auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC);
+ auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC);
+ ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, tensor3_info, tensor4_info, dwc_attr),
+ framework::LogLevel::ERRORS);
auto tensor5_info = context.create_tensor_info();
- ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, &tensor5_info), framework::LogLevel::ERRORS);
- GpuOutput::create_op(sketch0, ans_info, &tensor5_info);
+ ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, tensor5_info), framework::LogLevel::ERRORS);
+ GpuOutput::create_op(sketch0, ans_info, tensor5_info);
// Create the first workload runtime.
ClWorkloadRuntime runtime0;
@@ -458,15 +468,16 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
// Create the second sketch: dwc + sigmoid + output.
GpuWorkloadSketch sketch1(&context);
- ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, &tensor5_info, &tensor3_info, &tensor4_info, dwc_attr), framework::LogLevel::ERRORS);
- ans_info = GpuDepthwiseConv2d::create_op(sketch1, &tensor5_info, &tensor3_info, &tensor4_info, dwc_attr);
+ ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr),
+ framework::LogLevel::ERRORS);
+ ans_info = GpuDepthwiseConv2d::create_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr);
- ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, &tensor2_info), framework::LogLevel::ERRORS);
- ans_info = GpuMul::create_op(sketch1, ans_info, &tensor2_info);
+ ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, tensor2_info), framework::LogLevel::ERRORS);
+ ans_info = GpuMul::create_op(sketch1, ans_info, tensor2_info);
auto tensor6_info = context.create_tensor_info();
- ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, &tensor6_info), framework::LogLevel::ERRORS);
- GpuOutput::create_op(sketch1, ans_info, &tensor6_info);
+ ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, tensor6_info), framework::LogLevel::ERRORS);
+ GpuOutput::create_op(sketch1, ans_info, tensor6_info);
// Create the second workload runtime.
ClWorkloadRuntime runtime1;
@@ -481,13 +492,13 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
CLTensor tensor5;
CLTensor tensor6;
- tensor0.allocator()->init(tensor0_info);
- tensor1.allocator()->init(tensor1_info);
- tensor2.allocator()->init(tensor2_info);
- tensor3.allocator()->init(tensor3_info);
- tensor4.allocator()->init(tensor4_info);
- tensor5.allocator()->init(tensor5_info);
- tensor6.allocator()->init(tensor6_info);
+ tensor0.allocator()->init(*tensor0_info);
+ tensor1.allocator()->init(*tensor1_info);
+ tensor2.allocator()->init(*tensor2_info);
+ tensor3.allocator()->init(*tensor3_info);
+ tensor4.allocator()->init(*tensor4_info);
+ tensor5.allocator()->init(*tensor5_info);
+ tensor6.allocator()->init(*tensor6_info);
tensor0.allocator()->allocate();
tensor1.allocator()->allocate();
@@ -498,7 +509,7 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
tensor6.allocator()->allocate();
// Allocate the auxiliary tensors.
- for(auto &data : runtime0.get_auxiliary_tensors())
+ for (auto &data : runtime0.get_auxiliary_tensors())
{
auto tensor = std::get<0>(data);
auto &tensor_info = std::get<1>(data);
@@ -508,7 +519,7 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
tensor->allocator()->allocate();
}
- for(auto &data : runtime1.get_auxiliary_tensors())
+ for (auto &data : runtime1.get_auxiliary_tensors())
{
auto tensor = std::get<0>(data);
auto &tensor_info = std::get<1>(data);
@@ -526,8 +537,8 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
fill<float>(CLAccessor(tensor4), 4, library.get());
// Run each runtime.
- runtime0.run({ &tensor0, &tensor1, &tensor2, &tensor5 });
- runtime1.run({ &tensor5, &tensor3, &tensor4, &tensor2, &tensor6 });
+ runtime0.run({&tensor0, &tensor1, &tensor2, &tensor5});
+ runtime1.run({&tensor5, &tensor3, &tensor4, &tensor2, &tensor6});
// Compute the reference result.
SimpleTensor<float> ref_conv2d_src(conv2d_src_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
@@ -549,18 +560,22 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL)
const auto ref_conv2d_src_nchw = reference::permute(ref_conv2d_src, nhwc_to_nchw);
const auto ref_conv2d_wei_nchw = reference::permute(ref_conv2d_wei, nhwc_to_nchw);
const auto ref_conv2d_bia_nchw = reference::permute(ref_conv2d_bia, nhwc_to_nchw);
- const auto ref_conv2d_dst_nchw = reference::convolution_layer(ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo());
+ const auto ref_conv2d_dst_nchw = reference::convolution_layer(
+ ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo());
- const auto ref_sigmoid_dst_nchw = reference::activation_layer(ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+ const auto ref_sigmoid_dst_nchw = reference::activation_layer(
+ ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
auto dwc_dst_shape_nchw = dwc_dst_shape;
permute(dwc_dst_shape_nchw, nhwc_to_nchw);
const auto ref_dwc_wei_nchw = reference::permute(ref_dwc_wei, nhwc_to_nchw);
const auto ref_dwc_bia_nchw = reference::permute(ref_dwc_bia, nhwc_to_nchw);
- const auto ref_dwc_dst_nchw = reference::depthwise_convolution(ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1);
+ const auto ref_dwc_dst_nchw = reference::depthwise_convolution(
+ ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1);
- const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>(ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP,
- DataType::F32);
+ const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>(
+ ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP,
+ DataType::F32);
constexpr RelativeTolerance<float> tolerance(0.001f);
validate(CLAccessor(tensor6), ref_mul_dst_nchw, tolerance);
@@ -587,34 +602,35 @@ TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL)
// Create a new workload sketch
auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
- auto context = GpuWorkloadContext{ &cl_compile_ctx };
- GpuWorkloadSketch sketch{ &context };
+ auto context = GpuWorkloadContext{&cl_compile_ctx};
+ GpuWorkloadSketch sketch{&context};
// Create tensor infos
- TensorInfo input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout);
- TensorInfo weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout));
+ ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout);
+ ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout));
ITensorInfo *dst_info;
// Fuse conv2d into the workload
{
// Validate operator
- const Status success = GpuConv2d::validate_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr);
+ const Status success = GpuConv2d::validate_op(sketch, input_info, weight_info, nullptr, conv2d_attr);
ARM_COMPUTE_EXPECT(bool(success), framework::LogLevel::ERRORS);
- dst_info = GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr);
+ dst_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr);
}
// Create tensor infos
- TensorInfo weight_info_2 = context.create_tensor_info(t_weight_info);
+ ITensorInfo *weight_info_2 = context.create_tensor_info(t_weight_info);
// Fuse conv2d into the workload
{
// Validate operator, should fail
- const Status success = GpuConv2d::validate_op(sketch, dst_info, &weight_info_2, nullptr, conv2d_attr);
- const auto expected_error_str = "Operator fusion test failed. This operator cannot be fused into the workload";
+ const Status success = GpuConv2d::validate_op(sketch, dst_info, weight_info_2, nullptr, conv2d_attr);
+ const auto expected_error_str = "Operator fusion test failed. This operator cannot be fused into the workload";
ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT((success.error_description().find(expected_error_str) != std::string::npos), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT((success.error_description().find(expected_error_str) != std::string::npos),
+ framework::LogLevel::ERRORS);
}
}
TEST_SUITE_END() // Invalid_Fusion_Should_Fail