diff options
Diffstat (limited to 'tests/validation/dynamic_fusion/gpu/Integration.cpp')
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/Integration.cpp | 262 |
1 files changed, 238 insertions, 24 deletions
diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index 6a283f8082..3a915779c1 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -23,24 +23,33 @@ */ #include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h" #include "tests/CL/CLAccessor.h" #include "tests/framework/Macros.h" #include "tests/validation/Validation.h" #include "tests/validation/dynamic_fusion/Utils.h" +#include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/ConvolutionLayer.h" #include "tests/validation/reference/DepthConvertLayer.h" +#include "tests/validation/reference/DepthwiseConvolutionLayer.h" #include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" using namespace arm_compute::experimental::dynamic_fusion; using namespace arm_compute::test::validation::utils; @@ -69,17 +78,17 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &gpu_ctx }; + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; // Fuse conv2d Conv2dAttributes conv2d_attr{}; - TensorInfo input_info = sketch.create_tensor_info(t_input_shape, 1, data_type, data_layout); - TensorInfo weight_info = sketch.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); + TensorInfo input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); + TensorInfo weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, conv2d_attr); - TensorInfo dst_info = sketch.create_tensor_info(); + TensorInfo dst_info = context.create_tensor_info(); GpuOutput::create_op(sketch, conv_out_info, &dst_info); // Configure runtime @@ -156,15 +165,15 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &gpu_ctx }; + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; - TensorInfo in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo out_0_info = sketch.create_tensor_info(); - TensorInfo out_1_info = sketch.create_tensor_info(); + TensorInfo out_0_info = context.create_tensor_info(); + TensorInfo out_1_info = context.create_tensor_info(); ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info); GpuOutput::create_op(sketch, ans_0_info, &out_0_info); @@ -253,15 +262,15 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &gpu_ctx }; + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; - TensorInfo in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); - TensorInfo out_0_info = sketch.create_tensor_info(); - TensorInfo out_1_info = sketch.create_tensor_info(); + TensorInfo out_0_info = context.create_tensor_info(); + TensorInfo out_1_info = context.create_tensor_info(); CastAttributes cast_0_attr; cast_0_attr.data_type(DataType::S32).convert_policy(ConvertPolicy::SATURATE); @@ -348,6 +357,211 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_add_f32); validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_cast_f32); } + +TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) +{ + // (tensor0) + // | + // ======|============================================== Sketch 0 + // | (tensor1) +---- (tensor2) + // | | | | + // +-- input -- weights -- biases --+ | + // | | | + // | Conv2d | | + // | | | + // +----------- output -------------+ | + // | | + // +-- input ---+ | + // | | | + // | Sigmoid | | + // | | | + // +-- output --+ | + // | | + // +-- input ---+ | + // | | | + // | Output | | + // | | | + // +-- output --+ | + // | | + // (tensor5) | + // | | + // +--------+ | + // ======|=============================|================ Sketch 1 + // | (tensor3) (tensor4) | + // | | | | + // +-- input -- weights -- biases --+ | + // | | | + // | DepthwiseConv2d | | + // | | | + // +----------- output -------------+ | + // | | + // +--+ +----------------+ + // | | + // +-- lhs -- rhs --+ + // | | + // | Multiply | + // | | + // +---- output ----+ + // | + // +-- input ---+ + // | | + // | Output | + // | | + // +-- output --+ + // | + // (tensor6) + + TensorShape conv2d_src_shape(10, 20, 30); + TensorShape conv2d_wei_shape(10, 3, 3, 5); + TensorShape conv2d_bia_shape(5); + TensorShape conv2d_dst_shape(5, 18, 28); + TensorShape dwc_wei_shape(5, 3, 3); + TensorShape dwc_bia_shape(5); + TensorShape dwc_dst_shape(5, 16, 26); + + // Initialize the context. + CLScheduler::get().default_reinit(); + + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context(&cl_compile_ctx); + + auto tensor0_info = context.create_tensor_info(conv2d_src_shape, 1, DataType::F32, DataLayout::NHWC); + + // Create the first sketch: conv2d + cast + output. + GpuWorkloadSketch sketch0(&context); + + Conv2dAttributes conv2d_attr; + auto tensor1_info = context.create_tensor_info(conv2d_wei_shape, 1, DataType::F32, DataLayout::NHWC); + auto tensor2_info = context.create_tensor_info(conv2d_bia_shape, 1, DataType::F32, DataLayout::NHWC); + ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, &tensor0_info, &tensor1_info, &tensor2_info, conv2d_attr), framework::LogLevel::ERRORS); + auto ans_info = GpuConv2d::create_op(sketch0, &tensor0_info, &tensor1_info, &tensor2_info, conv2d_attr); + + ARM_COMPUTE_EXPECT(GpuSigmoid::validate_op(sketch0, ans_info), framework::LogLevel::ERRORS); + ans_info = GpuSigmoid::create_op(sketch0, ans_info); + + DepthwiseConv2dAttributes dwc_attr; + auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC); + auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC); + ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, &tensor3_info, &tensor4_info, dwc_attr), framework::LogLevel::ERRORS); + + auto tensor5_info = context.create_tensor_info(); + ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, &tensor5_info), framework::LogLevel::ERRORS); + GpuOutput::create_op(sketch0, ans_info, &tensor5_info); + + // Create the first workload runtime. + ClWorkloadRuntime runtime0; + runtime0.configure(sketch0); + + // Create the second sketch: dwc + sigmoid + output. + GpuWorkloadSketch sketch1(&context); + + ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, &tensor5_info, &tensor3_info, &tensor4_info, dwc_attr), framework::LogLevel::ERRORS); + ans_info = GpuDepthwiseConv2d::create_op(sketch1, &tensor5_info, &tensor3_info, &tensor4_info, dwc_attr); + + ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, &tensor2_info), framework::LogLevel::ERRORS); + ans_info = GpuMul::create_op(sketch1, ans_info, &tensor2_info); + + auto tensor6_info = context.create_tensor_info(); + ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, &tensor6_info), framework::LogLevel::ERRORS); + GpuOutput::create_op(sketch1, ans_info, &tensor6_info); + + // Create the second workload runtime. + ClWorkloadRuntime runtime1; + runtime1.configure(sketch1); + + // Create the user tensors. + CLTensor tensor0; + CLTensor tensor1; + CLTensor tensor2; + CLTensor tensor3; + CLTensor tensor4; + CLTensor tensor5; + CLTensor tensor6; + + tensor0.allocator()->init(tensor0_info); + tensor1.allocator()->init(tensor1_info); + tensor2.allocator()->init(tensor2_info); + tensor3.allocator()->init(tensor3_info); + tensor4.allocator()->init(tensor4_info); + tensor5.allocator()->init(tensor5_info); + tensor6.allocator()->init(tensor6_info); + + tensor0.allocator()->allocate(); + tensor1.allocator()->allocate(); + tensor2.allocator()->allocate(); + tensor3.allocator()->allocate(); + tensor4.allocator()->allocate(); + tensor5.allocator()->allocate(); + tensor6.allocator()->allocate(); + + // Allocate the auxiliary tensors. + for(auto &data : runtime0.get_auxiliary_tensors()) + { + auto tensor = std::get<0>(data); + auto &tensor_info = std::get<1>(data); + auto mem_req = std::get<2>(data); + + tensor->allocator()->init(tensor_info, mem_req.alignment); + tensor->allocator()->allocate(); + } + + for(auto &data : runtime1.get_auxiliary_tensors()) + { + auto tensor = std::get<0>(data); + auto &tensor_info = std::get<1>(data); + auto mem_req = std::get<2>(data); + + tensor->allocator()->init(tensor_info, mem_req.alignment); + tensor->allocator()->allocate(); + } + + // Fill the input tensors with random data. + fill<float>(CLAccessor(tensor0), 0, library.get()); + fill<float>(CLAccessor(tensor1), 1, library.get()); + fill<float>(CLAccessor(tensor2), 2, library.get()); + fill<float>(CLAccessor(tensor3), 3, library.get()); + fill<float>(CLAccessor(tensor4), 4, library.get()); + + // Run each runtime. + runtime0.run({ &tensor0, &tensor1, &tensor2, &tensor5 }); + runtime1.run({ &tensor5, &tensor3, &tensor4, &tensor2, &tensor6 }); + + // Compute the reference result. + SimpleTensor<float> ref_conv2d_src(conv2d_src_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_conv2d_wei(conv2d_wei_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_conv2d_bia(conv2d_bia_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_dwc_wei(dwc_wei_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_dwc_bia(dwc_bia_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + + fill<float>(ref_conv2d_src, 0, library.get()); + fill<float>(ref_conv2d_wei, 1, library.get()); + fill<float>(ref_conv2d_bia, 2, library.get()); + fill<float>(ref_dwc_wei, 3, library.get()); + fill<float>(ref_dwc_bia, 4, library.get()); + + PermutationVector nhwc_to_nchw(1, 2, 0); + + auto conv2d_dst_shape_nchw = conv2d_dst_shape; + permute(conv2d_dst_shape_nchw, nhwc_to_nchw); + const auto ref_conv2d_src_nchw = reference::permute(ref_conv2d_src, nhwc_to_nchw); + const auto ref_conv2d_wei_nchw = reference::permute(ref_conv2d_wei, nhwc_to_nchw); + const auto ref_conv2d_bia_nchw = reference::permute(ref_conv2d_bia, nhwc_to_nchw); + const auto ref_conv2d_dst_nchw = reference::convolution_layer(ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo()); + + const auto ref_sigmoid_dst_nchw = reference::activation_layer(ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + + auto dwc_dst_shape_nchw = dwc_dst_shape; + permute(dwc_dst_shape_nchw, nhwc_to_nchw); + const auto ref_dwc_wei_nchw = reference::permute(ref_dwc_wei, nhwc_to_nchw); + const auto ref_dwc_bia_nchw = reference::permute(ref_dwc_bia, nhwc_to_nchw); + const auto ref_dwc_dst_nchw = reference::depthwise_convolution(ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1); + + const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>(ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, DataType::F32); + + constexpr RelativeTolerance<float> tolerance(0.001f); + validate(CLAccessor(tensor6), ref_mul_dst_nchw, tolerance); +} + TEST_SUITE(Invalid_Fusion_Should_Fail) TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) { @@ -368,12 +582,12 @@ TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &gpu_ctx }; + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; // Create tensor infos - TensorInfo input_info = sketch.create_tensor_info(t_input_shape, 1, data_type, data_layout); - TensorInfo weight_info = sketch.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); + TensorInfo input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); + TensorInfo weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); ITensorInfo *dst_info; // Fuse conv2d into the workload @@ -386,7 +600,7 @@ TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) } // Create tensor infos - TensorInfo weight_info_2 = sketch.create_tensor_info(t_weight_info); + TensorInfo weight_info_2 = context.create_tensor_info(t_weight_info); // Fuse conv2d into the workload { |