From 4a578b923ed000c67fe0bc1433f945aea634ca9c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 25 Jun 2021 12:13:49 +0100 Subject: Port the ClGemmLowp kernels to the new API Ported kernels: - CLGEMMLowpMatrixMultiplyNativeKernel - CLGEMMLowpMatrixMultiplyReshapedKernel - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - CLGEMMLowpOffsetContributionKernel - CLGEMMLowpOffsetContributionOutputStageKernel - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel - CLGEMMLowpQuantizeDownInt32ScaleKernel Signed-off-by: Georgios Pinitas Change-Id: I9d5a744d6a2dd2f2726fdfb291bad000b6970de2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5870 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- examples/gemm_tuner/cl_gemmlowp_reshaped.cpp | 19 +++++++------- ...aped_rhs_only_fused_output_stage_fixedpoint.cpp | 30 +++++++++++++--------- 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'examples') diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp index 3d3f7fef1e..3c8976fddd 100644 --- a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp @@ -33,7 +33,7 @@ #include "arm_compute/runtime/CL/CLTuner.h" #include "examples/gemm_tuner/CommonGemmExampleOptions.h" #include "examples/gemm_tuner/GemmTunerHelpers.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" +#include "src/core/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h" #include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" #include "tests/CL/Helper.h" #include "utils/Utils.h" @@ -168,8 +168,8 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) } // namespace -using CLGEMMReshapeLHSMatrix = test::CLSynthetizeOperator; -using CLGEMMLowpMatrixMultiplyReshaped = test::CLSynthetizeFunction; +using ClGemmReshapeLHSMatrix = test::CLSynthetizeOperator; +using ClGemmLowpMatrixMultiplyReshaped = test::CLSynthetizeOperator; class CLGEMMLowpMatrixMultiplyReshapedExample : public Example { @@ -269,20 +269,20 @@ public: // Validate argments if(!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d())) { - std::cerr << "Invalid arguments for CLGEMMReshapeLHSMatrixKernel." << std::endl; + std::cerr << "Invalid arguments for ClGemmReshapeLHSMatrixKernel." << std::endl; return false; } if(!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info)) { - std::cerr << "Invalid arguments for CLGEMMLowpMatrixMultiplyReshapedKernel." << std::endl; + std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedKernel." << std::endl; return false; } // Configure functions reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, lhs_info, rhs_info, gemm_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info); // Allocate tensors lhs.allocator()->allocate(); @@ -298,7 +298,8 @@ public: ITensorPack reshape_lsh_pack({ { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }); reshape_lhs.run(reshape_lsh_pack); - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: CLScheduler::get().sync(); @@ -315,8 +316,8 @@ private: CLTensor rhs_reshaped{}; CLTensor dst{}; CLTuner tuner{}; - CLGEMMReshapeLHSMatrix reshape_lhs{}; - CLGEMMLowpMatrixMultiplyReshaped gemm{}; + ClGemmReshapeLHSMatrix reshape_lhs{}; + ClGemmLowpMatrixMultiplyReshaped gemm{}; }; /** Main test program for gemmlowp reshaped diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp index ca7b7a5f04..15c1b86c61 100644 --- a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp @@ -35,8 +35,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h" +#include "src/core/gpu/cl/kernels/ClGemmLowpReductionKernel.h" #include "tests/CL/Helper.h" #include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" @@ -47,6 +47,7 @@ using namespace arm_compute; using namespace utils; +using namespace arm_compute::opencl::kernels; using namespace arm_compute::misc::shape_calculator; using namespace gemm_tuner; @@ -146,8 +147,8 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) } // namespace -using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = test::CLSynthetizeFunction; -using CLGEMMLowpMatrixAReduction = test::CLSynthetizeFunction; +using ClGemmLowpMatrixMultiplyReshapedOnlyRhs = test::CLSynthetizeOperator; +using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator; class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample : public Example { @@ -289,7 +290,7 @@ public: const TensorInfo info_vector_sum_row(compute_reductionB_shape(*lhs.info()), 1, DataType::S32); vector_sum_row.allocator()->init(info_vector_sum_row); - mtx_a_reduction = std::make_unique(); + mtx_a_reduction = std::make_unique(); if(!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{})) { @@ -297,7 +298,7 @@ public: return false; } - mtx_a_reduction->configure(&lhs, &vector_sum_row, GEMMLowpReductionKernelInfo{}); + mtx_a_reduction->configure(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{}); } // Initialize matrix B reduction kernel only if _a_offset is not equal to 0 if(gemm_info.a_offset != 0) @@ -311,12 +312,14 @@ public: if(!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(), dst_shifts.info())) { - std::cerr << "Invalid arguments for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel." << std::endl; + std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel." << std::endl; return false; } // Configure function - gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info, gemm_info.a_offset == 0 ? nullptr : &vector_sum_col, gemm_info.b_offset == 0 ? nullptr : &vector_sum_row, &bias, &dst_multipliers, &dst_shifts); + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, + gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), + bias.info(), dst_multipliers.info(), dst_shifts.info()); // Allocate tensors lhs.allocator()->allocate(); @@ -335,9 +338,12 @@ public: { if(mtx_a_reduction != nullptr) { - mtx_a_reduction->run(); + ITensorPack red_pack({ { ACL_SRC, &lhs }, { ACL_DST, &dst } }); + mtx_a_reduction->run(red_pack); } - gemm.run(); + + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_BIAS, &bias }, { ACL_VEC_COL_SUM, &vector_sum_col }, { ACL_VEC_ROW_SUM, &vector_sum_row }, { ACL_SHIFTS, &dst_shifts }, { ACL_MULTIPLIERS, &dst_multipliers }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: CLScheduler::get().sync(); @@ -358,8 +364,8 @@ private: CLTensor dst_multipliers{}; CLTensor dst_shifts{}; CLTuner tuner{}; - CLGEMMLowpMatrixMultiplyReshapedOnlyRHS gemm{}; - std::unique_ptr mtx_a_reduction{ nullptr }; + ClGemmLowpMatrixMultiplyReshapedOnlyRhs gemm{}; + std::unique_ptr mtx_a_reduction{ nullptr }; }; /** Main test program for gemmlowp reshaped rhs only with fused output stage fixedpoint -- cgit v1.2.1