aboutsummaryrefslogtreecommitdiff
path: root/tests/validation/dynamic_fusion/gpu/Integration.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validation/dynamic_fusion/gpu/Integration.cpp')
-rw-r--r--tests/validation/dynamic_fusion/gpu/Integration.cpp117
1 files changed, 116 insertions, 1 deletions
diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp
index a5716ce1e1..0a689fa4b6 100644
--- a/tests/validation/dynamic_fusion/gpu/Integration.cpp
+++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp
@@ -26,9 +26,11 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
#include "tests/CL/CLAccessor.h"
@@ -38,6 +40,7 @@
#include "tests/validation/reference/ConvolutionLayer.h"
#include "tests/validation/reference/Permute.h"
#include "tests/validation/reference/ElementwiseOperations.h"
+#include "tests/validation/reference/DepthConvertLayer.h"
using namespace arm_compute::experimental::dynamic_fusion;
using namespace arm_compute::test::validation::utils;
@@ -148,7 +151,7 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
CLScheduler::get().default_reinit();
const auto data_type = DataType::F32;
- const auto t_input_shape = TensorShape(8, 2, 1);
+ const auto t_input_shape = TensorShape(33, 3, 2);
// Create a new workload sketch
auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
@@ -238,6 +241,118 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL)
validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_f32);
validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_f32);
}
+TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
+{
+ /* Computation:
+ * out_0 = in_0 + in_1
+ * out_1 = float(int32_t(out_0 + in_2))
+ */
+ CLScheduler::get().default_reinit();
+
+ const auto data_type = DataType::F32;
+ const auto t_input_shape = TensorShape(3, 8, 5);
+
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &gpu_ctx };
+
+ auto in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type);
+ auto in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type);
+ auto in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type);
+
+ auto out_0_info = sketch.create_tensor_info();
+ auto out_1_info = sketch.create_tensor_info();
+
+ auto ans_0_info = sketch.create_tensor_info();
+ auto ans_1_info = sketch.create_tensor_info();
+ auto ans_2_info = sketch.create_tensor_info();
+ auto ans_3_info = sketch.create_tensor_info();
+
+ CastAttributes cast_0_attr;
+ cast_0_attr.data_type(DataType::S32).convert_policy(ConvertPolicy::SATURATE);
+
+ CastAttributes cast_1_attr;
+ cast_1_attr.data_type(DataType::F32).convert_policy(ConvertPolicy::SATURATE);
+
+ GpuAdd::create_op(sketch, &in_0_info, &in_1_info, &ans_0_info);
+ GpuOutput::create_op(sketch, &ans_0_info, &out_0_info);
+ GpuAdd::create_op(sketch, &ans_0_info, &in_2_info, &ans_1_info);
+ GpuCast::create_op(sketch, &ans_1_info, &ans_2_info, cast_0_attr);
+ GpuCast::create_op(sketch, &ans_2_info, &ans_3_info, cast_1_attr);
+ GpuOutput::create_op(sketch, &ans_3_info, &out_1_info);
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+ for(auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = data.first;
+ AuxMemoryInfo aux_mem_req = data.second;
+ tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ // auto buf = cl::Buffer();
+ // tensor->allocator()->import_memory(buf); // Or, import external memory
+ }
+
+ // Construct user tensors
+ CLTensor t_in_0{};
+ CLTensor t_in_1{};
+ CLTensor t_in_2{};
+
+ CLTensor t_out_0{};
+ CLTensor t_out_1{};
+
+ // Initialize user tensors
+ t_in_0.allocator()->init(in_0_info);
+ t_in_1.allocator()->init(in_1_info);
+ t_in_2.allocator()->init(in_2_info);
+
+ t_out_0.allocator()->init(out_0_info);
+ t_out_1.allocator()->init(out_1_info);
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_in_0.allocator()->allocate();
+ t_in_1.allocator()->allocate();
+ t_in_2.allocator()->allocate();
+
+ t_out_0.allocator()->allocate();
+ t_out_1.allocator()->allocate();
+
+ fill<float>(CLAccessor(t_in_0), 0, library.get());
+ fill<float>(CLAccessor(t_in_1), 1, library.get());
+ fill<float>(CLAccessor(t_in_2), 2, library.get());
+
+ // Run runtime
+ runtime.run({ &t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1 });
+
+ // Create reference
+ SimpleTensor<float> ref_t_in_0{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_in_1{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_in_2{ t_input_shape, data_type, 1, QuantizationInfo() };
+
+ SimpleTensor<float> ref_t_out_0{ t_input_shape, data_type, 1, QuantizationInfo() };
+ SimpleTensor<float> ref_t_ans_1{ t_input_shape, data_type, 1, QuantizationInfo() };
+
+ // Fill reference
+ fill<float>(ref_t_in_0, 0, library.get());
+ fill<float>(ref_t_in_1, 1, library.get());
+ fill<float>(ref_t_in_2, 2, library.get());
+
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP);
+ reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1, ConvertPolicy::WRAP);
+ const auto ref_t_ans_2 = reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0);
+ const auto ref_t_out_1 = reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0);
+
+ RelativeTolerance<float> tolerance_add_f32(0.001f);
+ AbsoluteTolerance<float> tolerance_cast_f32(1.0f);
+ validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_add_f32);
+ validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_cast_f32);
+}
TEST_SUITE(Invalid_Fusion_Should_Fail)
TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL)
{