diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-12-13 13:09:10 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-12-16 15:17:51 +0000 |
commit | b84e25313e5dc7acbc03623e1e071e845047c111 (patch) | |
tree | fbee083f1262017555c64c3280da45e2b638992e /tests/validation | |
parent | a0ae8d2e6c57fd95c0edaf659b9df8b8c540d051 (diff) | |
download | ComputeLibrary-b84e25313e5dc7acbc03623e1e071e845047c111.tar.gz |
Add output operator for dynamic fusion
* The output of the fused operator must be explicitly specified
using GpuOutput operator.
* Any temporary tensors used to connect the output of an operator
to the input of another operator will be marked as no-alloc
and won't be allocated as a tensor in the memory.
Resolves: COMPMID-5771
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I5ae8e800f8f737db23a055a92b01c4f1d78c3bb8
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8794
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/validation')
6 files changed, 67 insertions, 84 deletions
diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index 0b81dac1f0..58d2215e64 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -28,6 +28,7 @@ #include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/CL/CLAccessor.h" #include "tests/framework/Macros.h" @@ -70,8 +71,11 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) Conv2dAttributes conv2d_attr{}; auto input_info = sketch.create_tensor_info(t_input_shape, 1, data_type, data_layout); auto weight_info = sketch.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); - auto dst_info = sketch.create_tensor_info(); - GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, &dst_info, conv2d_attr); + auto ans_info = sketch.create_tensor_info(); + GpuConv2d::create_op(sketch, &input_info, &weight_info, nullptr, &ans_info, conv2d_attr); + + auto dst_info = sketch.create_tensor_info(); + GpuOutput::create_op(sketch, &ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h index c7600e082e..630b664b78 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -33,6 +33,7 @@ #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/CL/CLAccessor.h" @@ -133,7 +134,11 @@ protected: auto weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); auto bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); auto dst_info = sketch.create_tensor_info(); - FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, &dst_info, dwc_conv2d_attr); + + auto ans_info = sketch.create_tensor_info(); + + FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, &ans_info, dwc_conv2d_attr); + GpuOutput::create_op(sketch, &ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h index e437c440d0..1a2676c438 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -32,6 +32,7 @@ #include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/CL/CLAccessor.h" #include "tests/framework/Fixture.h" @@ -113,7 +114,11 @@ protected: auto weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); auto bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); auto dst_info = sketch.create_tensor_info(); - FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, &dst_info, conv2d_attr); + + auto ans_info = sketch.create_tensor_info(); + + FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, &ans_info, conv2d_attr); + GpuOutput::create_op(sketch, &ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h index d11237748f..f97b541ce3 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -29,6 +29,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/CL/CLAccessor.h" #include "tests/framework/Fixture.h" @@ -102,32 +103,30 @@ protected: auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - TensorInfo dst_info{}; - TensorInfo dst_info_fuse{}; // Fuse first element wise binary Op auto lhs_info = sketch.create_tensor_info(shape0, 1, _data_type); auto rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + + auto ans_info = sketch.create_tensor_info(); + auto dst_info = sketch.create_tensor_info(); + TensorInfo rhs_info_fuse; + TensorInfo ans2_info; + + FunctionType::create_op(sketch, &lhs_info, &rhs_info, &ans_info); - // Testing root case while in-place - if(!_is_inplace) + if(_fuse) { - dst_info = sketch.create_tensor_info(TensorInfo(1, _data_type)); + rhs_info_fuse = sketch.create_tensor_info(shape2, 1, _data_type); + ans2_info = sketch.create_tensor_info(); - FunctionType::create_op(sketch, &lhs_info, &rhs_info, &dst_info); + FunctionType::create_op(sketch, &ans_info, &rhs_info_fuse, &ans2_info); + GpuOutput::create_op(sketch, &ans2_info, &dst_info); } else { - FunctionType::create_op(sketch, &lhs_info, &rhs_info, &lhs_info); - } - - if(_fuse) - { - // Fuse first element wise binary Op - rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type)); - dst_info_fuse = sketch.create_tensor_info(); - FunctionType::create_op(sketch, &dst_info, &rhs_info_fuse, &dst_info_fuse); + GpuOutput::create_op(sketch, &ans_info, &dst_info); } // Configure runtime @@ -148,33 +147,24 @@ protected: TensorType t_rhs{}; TensorType t_rhs_fuse{}; TensorType t_dst{}; - TensorType t_dst_fuse{}; // Initialize user tensors t_lhs.allocator()->init(lhs_info); t_rhs.allocator()->init(rhs_info); - if(!_is_inplace) + t_dst.allocator()->init(dst_info); + if(_fuse) { - t_dst.allocator()->init(dst_info); - if(_fuse) - { - t_rhs_fuse.allocator()->init(rhs_info_fuse); - t_dst_fuse.allocator()->init(dst_info_fuse); - } + t_rhs_fuse.allocator()->init(rhs_info_fuse); } // Allocate and fill user tensors // Instead of using ACL allocator, the user can choose to import memory into the tensors t_lhs.allocator()->allocate(); t_rhs.allocator()->allocate(); - if(!_is_inplace) + t_dst.allocator()->allocate(); + if(_fuse) { - t_dst.allocator()->allocate(); - if(_fuse) - { - t_rhs_fuse.allocator()->allocate(); - t_dst_fuse.allocator()->allocate(); - } + t_rhs_fuse.allocator()->allocate(); } fill(AccessorType(t_lhs), 0); @@ -183,31 +173,17 @@ protected: { fill(AccessorType(t_rhs_fuse), 2); } + // Run runtime - if(_is_inplace) + if(_fuse) { - runtime.run({ &t_lhs, &t_rhs, &t_lhs }); + runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst }); } else { - if(_fuse) - { - runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst_fuse }); - } - else - { - runtime.run({ &t_lhs, &t_rhs, &t_dst }); - } + runtime.run({ &t_lhs, &t_rhs, &t_dst }); } - if(_is_inplace) - { - return t_lhs; - } - else if(_fuse) - { - return t_dst_fuse; - } return t_dst; } diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h index 8553472fb9..418cf4fe04 100644 --- a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h @@ -30,6 +30,7 @@ #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/DepthConvertLayer.h" @@ -121,7 +122,10 @@ protected: CastAttributes attributes; attributes.convert_policy(policy).data_type(dt_out); - FunctionType::create_op(sketch, &src_info, &dst_info, attributes); + auto ans_info = sketch.create_tensor_info(); + + FunctionType::create_op(sketch, &src_info, &ans_info, attributes); + GpuOutput::create_op(sketch, &ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h index dbac29fd22..fe87d9a022 100644 --- a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h @@ -29,6 +29,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/ActivationLayer.h" @@ -108,14 +109,23 @@ protected: // Create sketch tensors TensorInfo src_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type)); - TensorInfo dst_0_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type)); - TensorInfo dst_1_info; + TensorInfo dst_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type)); + + auto ans_0_info = sketch.create_tensor_info(); + TensorInfo ans_1_info; + + FunctionType::create_op(sketch, &src_info, &ans_0_info, attributes); - FunctionType::create_op(sketch, &src_info, &dst_0_info, attributes); if(_fuse) { - dst_1_info = sketch.create_tensor_info(shape, 1, _data_type); - FunctionType::create_op(sketch, &dst_0_info, &dst_1_info, attributes); + ans_1_info = sketch.create_tensor_info(); + + FunctionType::create_op(sketch, &ans_0_info, &ans_1_info, attributes); + GpuOutput::create_op(sketch, &ans_1_info, &dst_info); + } + else + { + GpuOutput::create_op(sketch, &ans_0_info, &dst_info); } // Configure runtime @@ -124,43 +134,22 @@ protected: // Construct user tensors TensorType t_src{}; - TensorType t_dst_0{}; - TensorType t_dst_1{}; + TensorType t_dst{}; // Initialize user tensors t_src.allocator()->init(src_info); - t_dst_0.allocator()->init(dst_0_info); - if(_fuse) - { - t_dst_1.allocator()->init(dst_1_info); - } + t_dst.allocator()->init(dst_info); // Allocate and fill user tensors t_src.allocator()->allocate(); - t_dst_0.allocator()->allocate(); - if(_fuse) - { - t_dst_1.allocator()->allocate(); - } + t_dst.allocator()->allocate(); fill(AccessorType(t_src)); // Run runtime - if(_fuse) - { - runtime.run({ &t_src, &t_dst_1 }); - } - else - { - runtime.run({ &t_src, &t_dst_0 }); - } - - if(_fuse) - { - return t_dst_1; - } + runtime.run({ &t_src, &t_dst }); - return t_dst_0; + return t_dst; } SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info) |