Add post ops to ClGemmMatrixMultiplyReshapedOnlyRHSKernel and ClGemmMatrixMultiplyNativeKernel Part 3

Partially resolves: COMPMID-4435 Change-Id: Ifc5affa3a24a70942ca2d001380205df09b03ad7 Signed-off-by: SiCongLi <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6550 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: SiCongLi <sicong.li@arm.com> 2021-10-24 19:12:33 +0100
committer: SiCong Li <sicong.li@arm.com> 2021-11-02 10:41:11 +0000
commit: afa19725f7f3feb2c21a6aed02ade49d08e3097b (patch)
tree: d796239f542cf447060e6fa6d1240fecb3d6c7a6 /tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
parent: 579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (diff)
download: ComputeLibrary-afa19725f7f3feb2c21a6aed02ade49d08e3097b.tar.gz
1 files changed, 270 insertions, 0 deletions
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
index 0f86a70e0f..9e1a185a10 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/experimental/PostOp.h"
 #include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
 #include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
@@ -61,6 +62,11 @@ using CLGEMMMatrixMultiplyReshapedOnlyRHSFixture = GEMMMatrixMultiplyReshapedOnl
 template <typename T>
 using CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture = GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
 
+// Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS with post ops
+template <typename T>
+using CLGEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsFixture =
+    GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
+
 namespace
 {
 // *INDENT-OFF*
@@ -157,6 +163,81 @@ const auto boundary_handling_cases = combine(combine(combine(combine(combine(com
                                     broadcast_bias_values),
                                     framework::dataset::make("Activation", ActivationLayerInfo()));
 
+/** Post Ops */
+using PostOpArgBroadcast =  CLGEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsFixture<float>::PostOpArgBroadcast;
+experimental::PostOpList<PostOpArgBroadcast> post_ops_1()
+{
+    experimental::PostOpList<PostOpArgBroadcast> post_ops{};
+    post_ops.push_back_op<experimental::PostOpAct<PostOpArgBroadcast>>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::LINEAR, 0.5F, 0.0F});
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<PostOpArgBroadcast>>(
+        std::make_tuple(true, true, false),   // If broadcast in dims 0, 1 and 2
+        0,
+        ConvertPolicy::SATURATE);
+    post_ops.push_back_op<experimental::PostOpAct<PostOpArgBroadcast>>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU, 2.1F, 1.3F});
+    return post_ops;
+}
+experimental::PostOpList<PostOpArgBroadcast> post_ops_2()
+{
+    experimental::PostOpList<PostOpArgBroadcast> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<PostOpArgBroadcast>>(
+        std::make_tuple(false, true, true),   // If broadcast in dims 0, 1 and 2
+        1,
+        ConvertPolicy::SATURATE);
+    post_ops.push_back_op<experimental::PostOpAct<PostOpArgBroadcast>>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU, 2.1F, 1.3F});
+    return post_ops;
+}
+experimental::PostOpList<PostOpArgBroadcast> post_ops_3()
+{
+    experimental::PostOpList<PostOpArgBroadcast> post_ops{};
+    post_ops.push_back_op<experimental::PostOpAct<PostOpArgBroadcast>>(ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU, 2.1F, 1.3F});
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<PostOpArgBroadcast>>(
+        std::make_tuple(false, false, true),  // If broadcast in dims 0, 1 and 2
+        1,
+        ConvertPolicy::SATURATE);
+    return post_ops;
+}
+
+/** Different Post Op Lists */
+const auto post_op_lists = framework::dataset::make("post_op_lists", {
+    post_ops_1(),
+    post_ops_2(),
+    post_ops_3(),
+ } );
+
+ bool is_post_op_list_valid(unsigned int m, unsigned int n, unsigned int k, unsigned int batch, DataType data_type, const experimental::PostOpList<ITensorInfo*>& post_ops)
+{
+    const auto lhs_info = GEMMLHSMatrixInfo(4,4,1,false,true);
+    const auto rhs_info = GEMMRHSMatrixInfo(4,4,1,true,true,false);
+
+    // Create TensorInfo for post op arguments
+    TensorInfo input0_info(TensorShape(k, m, batch), 1, data_type);
+    TensorInfo input1_info(TensorShape(n, k, batch), 1, data_type);
+    TensorInfo input2_info(TensorShape(n), 1, data_type);
+    TensorInfo output_info(TensorShape(n, m, batch), 1, data_type);
+
+    const TensorInfo reshaped_input1_info = input1_info.clone()->set_tensor_shape(misc::shape_calculator::compute_rhs_reshaped_shape(input1_info, rhs_info));
+
+    GEMMKernelInfo gemm_info(m, n, k, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
+             false /**< reinterpret the input as 3D */,
+             true  /**< Flag used to broadcast the bias addition */,
+             false /**< wider accumm */,
+             false /**< has pad y */,
+           ActivationLayerInfo::ActivationFunction::IDENTITY,
+             1   /**< Multiplication factor for the width of the 1xW transposed block */,
+             1   /**< Multiplication factor for the height of the 4x4 interleaved block */,
+             lhs_info,
+             rhs_info,
+             0  /**< Offset to be added to each element of the matrix A */,
+             0 /**< Offset to be added to each element of the matrix B */,
+             post_ops);
+    return bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(&input0_info.clone()->set_is_resizable(true),
+                                                          &reshaped_input1_info.clone()->set_is_resizable(true),
+                                                          &input2_info.clone()->set_is_resizable(true),
+                                                          &output_info.clone()->set_is_resizable(true),1.f,1.f,
+                                                          lhs_info,
+                                                          rhs_info,
+                                                          gemm_info));
+}
 /** Configuration test */
 bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value,
                             unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value,
@@ -211,6 +292,7 @@ bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned
     CLGEMMMatrixMultiplyReshapedOnlyRHS gemm;
     return bool(gemm.validate(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info));
 }
+
 } // namespace
 
 TEST_SUITE(CL)
@@ -262,6 +344,119 @@ b_value, m0_value, n0_value, k0_value, broadcast_bias, input_as_3d, depth_output
     ARM_COMPUTE_EXPECT(status == expected_value, framework::LogLevel::ERRORS);
 }
 
+TEST_SUITE(ValidateFusedPostOpsConfigs)
+TEST_SUITE(Invalid)
+TEST_CASE(UnsupportedPostOpSequence, framework::DatasetMode::ALL)
+{
+    const auto data_type = DataType::F32;
+    const unsigned int m = 17;
+    const unsigned int n = 1;
+    const unsigned int k = 13;
+    const unsigned int batch = 2;
+    TensorShape post_op_arg0_shape(n, m, batch);
+    TensorInfo post_op_arg_info(post_op_arg0_shape, 1, data_type);
+    auto post_op_arg1_info = post_op_arg_info.clone();
+
+    // Unsupported sequence of post ops
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>(
+        &post_op_arg_info,
+        1,
+        ConvertPolicy::SATURATE);
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>(
+        post_op_arg1_info.get(),
+        0,
+        ConvertPolicy::SATURATE);
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
+}
+TEST_CASE(OutputWidened, framework::DatasetMode::ALL)
+{
+    // Invalid broadcast: post op tensors "widen" the output tensor
+    const auto data_type = DataType::F32;
+    const unsigned int m = 17;
+    const unsigned int n = 1;
+    const unsigned int k = 1;
+    const unsigned int batch = 1;
+    TensorShape post_op_arg_shape(n, m, batch + 4); // output's batch dimension is "widened", which is not allowed
+    TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>( &post_op_arg_info, 0, ConvertPolicy::SATURATE);
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
+}
+TEST_CASE(BroadcastInXDimOnly, framework::DatasetMode::ALL)
+{
+    // Invalid broadcast: post op tensors broadcast in the first dimension (X) only
+    const auto data_type = DataType::F32;
+    const unsigned int m = 22;
+    const unsigned int n = 16;
+    const unsigned int k = 15;
+    const unsigned int batch = 3;
+    TensorShape post_op_arg_shape(1, m, batch);
+    TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>( &post_op_arg_info, 0, ConvertPolicy::SATURATE);
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // Invalid
+TEST_SUITE(Valid)
+TEST_CASE(EmptyPostOpList, framework::DatasetMode::ALL)
+{
+    const auto data_type = DataType::F32;
+    const unsigned int m = 22;
+    const unsigned int n = 16;
+    const unsigned int k = 15;
+    const unsigned int batch = 3;
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
+}
+TEST_CASE(BroadcastInYDimOnly, framework::DatasetMode::ALL)
+{
+    const auto data_type = DataType::F32;
+    const unsigned int m = 22;
+    const unsigned int n = 16;
+    const unsigned int k = 15;
+    const unsigned int batch = 3;
+    TensorShape post_op_arg_shape(n, 1, batch);
+    TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>( &post_op_arg_info, 0, ConvertPolicy::SATURATE);
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
+}
+TEST_CASE(BroadcastInBothXandYDims, framework::DatasetMode::ALL)
+{
+    const auto data_type = DataType::F32;
+    const unsigned int m = 22;
+    const unsigned int n = 16;
+    const unsigned int k = 15;
+    const unsigned int batch = 3;
+    TensorShape post_op_arg_shape(1, 1, batch);
+    TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>( &post_op_arg_info, 0, ConvertPolicy::SATURATE);
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
+}
+TEST_CASE(BroadcastInAllDims, framework::DatasetMode::ALL)
+{
+    const auto data_type = DataType::F32;
+    const unsigned int m = 22;
+    const unsigned int n = 16;
+    const unsigned int k = 15;
+    const unsigned int batch = 3;
+    TensorShape post_op_arg_shape(1, 1, 1);
+    TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
+    experimental::PostOpList<ITensorInfo*> post_ops{};
+    post_ops.push_back_op<experimental::PostOpEltwiseAdd<ITensorInfo*>>( &post_op_arg_info, 0, ConvertPolicy::SATURATE);
+
+    ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // Valid
+TEST_SUITE_END() // ValidateFusedPostOps
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
@@ -462,6 +657,44 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
+
+TEST_SUITE(FusedPostOps)
+
+FIXTURE_DATA_TEST_CASE(RunPrecommit, CLGEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   framework::dataset::make("H0", {1})),
+                                                                   framework::dataset::make("interleave_rhs", { true })),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", false, true)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values),
+                                                                   beta_values),
+                                                                   framework::dataset::make("broadcast_bias", { false } )),
+                                                                   act_values),
+                                                                   post_op_lists)
+                                                                   )
+{
+    // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_SUITE_END() //  FusedPostOps
+
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
@@ -590,6 +823,43 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
+TEST_SUITE(FusedPostOps)
+
+FIXTURE_DATA_TEST_CASE(RunPrecommit, CLGEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   framework::dataset::make("H0", {1})),
+                                                                   framework::dataset::make("interleave_rhs", { true })),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("export_to_cl_image_rhs", true)),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values),
+                                                                   beta_values),
+                                                                   framework::dataset::make("broadcast_bias", { false } )),
+                                                                   act_values),
+                                                                   post_op_lists)
+                                                                   )
+{
+    // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_SUITE_END() //  FusedPostOps
+
 TEST_SUITE_END() // FP16
 
 TEST_SUITE_END() // Float
author	SiCongLi <sicong.li@arm.com>	2021-10-24 19:12:33 +0100
committer	SiCong Li <sicong.li@arm.com>	2021-11-02 10:41:11 +0000
commit	afa19725f7f3feb2c21a6aed02ade49d08e3097b (patch)
tree	d796239f542cf447060e6fa6d1240fecb3d6c7a6 /tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
parent	579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (diff)
download	ComputeLibrary-afa19725f7f3feb2c21a6aed02ade49d08e3097b.tar.gz