From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- examples/gemm_tuner/CommonGemmExampleOptions.cpp | 16 ++-- examples/gemm_tuner/CommonGemmExampleOptions.h | 28 +++---- examples/gemm_tuner/GemmTunerHelpers.h | 6 +- examples/gemm_tuner/cl_gemm_native.cpp | 26 +++---- examples/gemm_tuner/cl_gemm_reshaped.cpp | 77 ++++++++++--------- examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp | 59 +++++++-------- examples/gemm_tuner/cl_gemmlowp_reshaped.cpp | 85 +++++++++++---------- ...aped_rhs_only_fused_output_stage_fixedpoint.cpp | 87 ++++++++++++---------- 8 files changed, 196 insertions(+), 188 deletions(-) (limited to 'examples/gemm_tuner') diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.cpp b/examples/gemm_tuner/CommonGemmExampleOptions.cpp index bee202b99e..c2a465604a 100644 --- a/examples/gemm_tuner/CommonGemmExampleOptions.cpp +++ b/examples/gemm_tuner/CommonGemmExampleOptions.cpp @@ -39,7 +39,8 @@ using namespace utils; return os; } -CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, arm_compute::DataType default_data_type) +CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, + arm_compute::DataType default_data_type) : help(parser.add_option("help")), M(parser.add_positional_option>("M", 100)), N(parser.add_positional_option>("N", 100)), @@ -48,21 +49,16 @@ CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLi data_type(), tuner_mode() { - const std::set supported_data_types - { + const std::set supported_data_types{ DataType::F16, DataType::F32, DataType::QASYMM8, }; - const std::set supported_tuner_modes - { - CLTunerMode::EXHAUSTIVE, - CLTunerMode::NORMAL, - CLTunerMode::RAPID - }; + const std::set supported_tuner_modes{CLTunerMode::EXHAUSTIVE, CLTunerMode::NORMAL, CLTunerMode::RAPID}; - ARM_COMPUTE_ERROR_ON_MSG(supported_data_types.find(default_data_type) == supported_data_types.end(), "Default data type unsupported"); + ARM_COMPUTE_ERROR_ON_MSG(supported_data_types.find(default_data_type) == supported_data_types.end(), + "Default data type unsupported"); data_type = parser.add_option>("type", supported_data_types, default_data_type); tuner_mode = parser.add_option>("tuner-mode", supported_tuner_modes, CLTunerMode::RAPID); diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.h b/examples/gemm_tuner/CommonGemmExampleOptions.h index f7447e3db3..38178bcef8 100644 --- a/examples/gemm_tuner/CommonGemmExampleOptions.h +++ b/examples/gemm_tuner/CommonGemmExampleOptions.h @@ -27,21 +27,22 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/runtime/CL/CLTuner.h" -#include "utils/TypePrinter.h" + #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/TypePrinter.h" namespace gemm_tuner { /** Structure holding all the common gemm example parameters */ struct CommonGemmExampleParams { - size_t M{ 100 }; /**< Number of lhs matrix rows */ - size_t N{ 100 }; /**< Number of rhs matrix columns */ - size_t K{ 50 }; /**< Number of lhs matrix columns/rhs matrix rows */ - size_t B{ 1 }; /**< Batch size */ - arm_compute::DataType data_type{ arm_compute::DataType::F32 }; /**< Data type */ - arm_compute::CLTunerMode tuner_mode{ arm_compute::CLTunerMode::RAPID }; /**< OpenCL tuner mode */ + size_t M{100}; /**< Number of lhs matrix rows */ + size_t N{100}; /**< Number of rhs matrix columns */ + size_t K{50}; /**< Number of lhs matrix columns/rhs matrix rows */ + size_t B{1}; /**< Batch size */ + arm_compute::DataType data_type{arm_compute::DataType::F32}; /**< Data type */ + arm_compute::CLTunerMode tuner_mode{arm_compute::CLTunerMode::RAPID}; /**< OpenCL tuner mode */ }; /** Formatted output of the CommonGemmExampleParams type @@ -70,7 +71,8 @@ public: * @param[in,out] parser A parser on which "parse()" hasn't been called yet. * @param[in] default_data_type Default data type if unspecified. */ - CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, arm_compute::DataType default_data_type = arm_compute::DataType::F32); + CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, + arm_compute::DataType default_data_type = arm_compute::DataType::F32); /** Prevent instances of this class from being copied (As this class contains pointers) */ CommonGemmExampleOptions(const CommonGemmExampleOptions &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -82,11 +84,11 @@ public: /** Default destructor */ ~CommonGemmExampleOptions() = default; - arm_compute::utils::ToggleOption *help; /**< Show help option */ - arm_compute::utils::SimpleOption *M; /**< Number of lhs matrix rows option */ - arm_compute::utils::SimpleOption *N; /**< Number of rhs matrix columns option */ - arm_compute::utils::SimpleOption *K; /**< Number of lhs matrix columns/rhs matrix rows option */ - arm_compute::utils::SimpleOption *B; /**< Batch size option */ + arm_compute::utils::ToggleOption *help; /**< Show help option */ + arm_compute::utils::SimpleOption *M; /**< Number of lhs matrix rows option */ + arm_compute::utils::SimpleOption *N; /**< Number of rhs matrix columns option */ + arm_compute::utils::SimpleOption *K; /**< Number of lhs matrix columns/rhs matrix rows option */ + arm_compute::utils::SimpleOption *B; /**< Batch size option */ arm_compute::utils::EnumOption *data_type; /**< Data type */ arm_compute::utils::EnumOption *tuner_mode; /**< OpenCL tuner mode */ }; diff --git a/examples/gemm_tuner/GemmTunerHelpers.h b/examples/gemm_tuner/GemmTunerHelpers.h index ae5cfbb19e..dbff9e2dff 100644 --- a/examples/gemm_tuner/GemmTunerHelpers.h +++ b/examples/gemm_tuner/GemmTunerHelpers.h @@ -36,9 +36,9 @@ bool update_padding_for_cl_image(arm_compute::ITensorInfo *tensor) constexpr unsigned int num_floats_per_pixel = 4; const unsigned int stride_y_in_elements = tensor->strides_in_bytes()[1] / tensor->element_size(); - const unsigned int pixel_aligment = arm_compute::get_cl_image_pitch_alignment( - arm_compute::CLKernelLibrary::get().get_device()); - if(pixel_aligment == 0) + const unsigned int pixel_aligment = + arm_compute::get_cl_image_pitch_alignment(arm_compute::CLKernelLibrary::get().get_device()); + if (pixel_aligment == 0) { return false; } diff --git a/examples/gemm_tuner/cl_gemm_native.cpp b/examples/gemm_tuner/cl_gemm_native.cpp index dd03873921..7daa0b07d3 100644 --- a/examples/gemm_tuner/cl_gemm_native.cpp +++ b/examples/gemm_tuner/cl_gemm_native.cpp @@ -25,19 +25,20 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "CommonGemmExampleOptions.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + #include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" +#include "CommonGemmExampleOptions.h" #include using namespace arm_compute; @@ -51,9 +52,9 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ }; /** Formatted output of the GemmConfigs type @@ -145,13 +146,13 @@ public: // Parse command line options parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { // Print help message parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -198,8 +199,9 @@ public: // Validate argments Status status{}; - status = gemm.validate(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - if(!status) + status = gemm.validate(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, + kernel_info); + if (!status) { // Unsupported arguments std::cerr << "Unsupported arguments." << std::endl; @@ -221,11 +223,7 @@ public: void do_run() override { // Execute the function - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, - { ACL_SRC_1, &rhs }, - { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}); gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp index 59044477bf..75f3539cb9 100644 --- a/examples/gemm_tuner/cl_gemm_reshaped.cpp +++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp @@ -31,14 +31,15 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + #include "examples/gemm_tuner/CommonGemmExampleOptions.h" #include "examples/gemm_tuner/GemmTunerHelpers.h" #include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h" #include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" #include @@ -53,16 +54,16 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ - size_t v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ - size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool interleave_lhs{ true }; /**< Interleave lhs matrix */ - bool transpose_lhs{ true }; /**< Transpose lhs matrix. */ - bool interleave_rhs{ true }; /**< Interleave rhs matrix */ - bool transpose_rhs{ true }; /**< Transpose rhs matrix. */ - bool export_to_cl_image_rhs{ true }; /**< Export rhs matrix to cl_image. */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_lhs{true}; /**< Interleave lhs matrix */ + bool transpose_lhs{true}; /**< Transpose lhs matrix. */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix. */ + bool export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image. */ }; /** Formatted output of the GemmConfigs type @@ -119,8 +120,10 @@ public: // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)"); - export_to_cl_image_rhs->set_help("Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)"); + transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do " + "transpose lhs matrix (0)"); + export_to_cl_image_rhs->set_help( + "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)"); } /** Prevent instances of this class from being copied (As this class contains pointers) */ GemmConfigOptions(const GemmConfigOptions &) = delete; @@ -133,17 +136,18 @@ public: /** Default destructor */ ~GemmConfigOptions() = default; - SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ - SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ - SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ - SimpleOption *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ - SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ + SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ SimpleOption *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */ SimpleOption *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - SimpleOption *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ + SimpleOption * + transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ SimpleOption *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/ }; @@ -198,13 +202,13 @@ public: // Parse command line options parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { // Print help message parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -256,20 +260,22 @@ public: kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; - if(rhs_info.h0 == 0) + if (rhs_info.h0 == 0) { rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U); } // Initialise lhs_reshaped tensor info - lhs_reshaped.allocator()->init(TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type)); + lhs_reshaped.allocator()->init( + TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type)); // Initialise rhs_reshaped tensor info - rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); - if(rhs_info.export_to_cl_image) + if (rhs_info.export_to_cl_image) { - if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) { std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; return false; @@ -279,7 +285,7 @@ public: // Validate argments Status status{}; status = reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, kernel_info.reinterpret_input_as_3d); - if(!status) + if (!status) { // Unsupported arguments std::cerr << "Unsupported arguments." << std::endl; @@ -287,8 +293,9 @@ public: return false; } - status = gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - if(!status) + status = gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, + rhs_info, kernel_info); + if (!status) { // Unsupported arguments std::cerr << "Unsupported arguments." << std::endl; @@ -300,7 +307,8 @@ public: reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); // Configure function - gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, + rhs_info, kernel_info); // Allocate tensors lhs.allocator()->allocate(); @@ -315,14 +323,11 @@ public: void do_run() override { // Execute the functions - ITensorPack reshape_lsh_pack({ { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }); + ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}}); reshape_lhs.run(reshape_lsh_pack); - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, - { ACL_SRC_1, &rhs_reshaped }, - { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + ITensorPack gemm_pack( + {{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}); gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp index 0ad2a65dc2..cfea2c9bac 100644 --- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp +++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp @@ -25,20 +25,21 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "CommonGemmExampleOptions.h" -#include "GemmTunerHelpers.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + #include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" +#include "CommonGemmExampleOptions.h" +#include "GemmTunerHelpers.h" #include using namespace arm_compute; @@ -52,13 +53,13 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ - size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool interleave_rhs{ true }; /**< Interleave rhs matrix */ - bool transpose_rhs{ true }; /**< Transpose rhs matrix */ - bool export_to_cl_image_rhs{ true }; /**< Export rhs matrix to cl_image.*/ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix */ + bool export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image.*/ }; /** Formatted output of the GemmConfigs type @@ -106,7 +107,8 @@ public: h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row"); interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)"); transpose_rhs->set_help("Transpose rhs matrix (1) / Do not transpose rhs matrix (0)"); - export_to_cl_image_rhs->set_help("Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)"); + export_to_cl_image_rhs->set_help( + "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)"); } /** Prevent instances of this class from being copied (As this class contains pointers) */ GemmConfigOptions(const GemmConfigOptions &) = delete; @@ -119,10 +121,10 @@ public: /** Default destructor */ ~GemmConfigOptions() = default; - SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ - SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ - SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ - SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ SimpleOption *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ SimpleOption *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */ SimpleOption *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/ @@ -170,13 +172,13 @@ public: // Parse command line options parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { // Print help message parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -225,17 +227,18 @@ public: kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; - if(rhs_info.h0 == 0) + if (rhs_info.h0 == 0) { rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U); } // Initialise rhs_reshaped tensor info - rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); - if(rhs_info.export_to_cl_image) + if (rhs_info.export_to_cl_image) { - if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) { std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; return false; @@ -244,8 +247,9 @@ public: // Validate argments Status status{}; - status = gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - if(!status) + status = gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, + rhs_info, kernel_info); + if (!status) { // Unsupported arguments std::cerr << "Unsupported arguments." << std::endl; @@ -254,7 +258,8 @@ public: } // Configure function - gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, + kernel_info); // Allocate tensors lhs.allocator()->allocate(); @@ -268,11 +273,7 @@ public: void do_run() override { // Execute the function - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, - { ACL_SRC_1, &rhs_reshaped }, - { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}); gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp index 9cf9c9fed0..3808b98b7d 100644 --- a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp @@ -31,14 +31,15 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + #include "examples/gemm_tuner/CommonGemmExampleOptions.h" #include "examples/gemm_tuner/GemmTunerHelpers.h" #include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h" #include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" #include @@ -53,15 +54,15 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ - size_t v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ - size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool interleave_lhs{ true }; /**< Interleave lhs matrix */ - bool transpose_lhs{ true }; /**< Transpose lhs matrix. */ - bool interleave_rhs{ true }; /**< Interleave rhs matrix */ - bool transpose_rhs{ true }; /**< Transpose rhs matrix. */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_lhs{true}; /**< Interleave lhs matrix */ + bool transpose_lhs{true}; /**< Transpose lhs matrix. */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix. */ }; /** Formatted output of the GemmConfigs type @@ -116,7 +117,8 @@ public: // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)"); + transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do " + "transpose lhs matrix (0)"); } /** Prevent instances of this class from being copied (As this class contains pointers) */ GemmConfigOptions(const GemmConfigOptions &) = delete; @@ -129,17 +131,18 @@ public: /** Default destructor */ ~GemmConfigOptions() = default; - SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ - SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ - SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ - SimpleOption *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ - SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ + SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ SimpleOption *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */ SimpleOption *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - SimpleOption *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ + SimpleOption * + transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ }; /** Consumes the gemm configuration options and creates a structure containing all information @@ -186,12 +189,12 @@ public: GemmConfigOptions config_options(parser); parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -217,10 +220,7 @@ public: rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type)); // Set arbitrary quantization information - const QuantizationInfo q_info - { - 0.012, 3 - }; + const QuantizationInfo q_info{0.012, 3}; lhs.info()->set_quantization_info(q_info); rhs.info()->set_quantization_info(q_info); dst.info()->set_quantization_info(q_info); @@ -240,45 +240,44 @@ public: rhs_info.transpose = configs.transpose_rhs; rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet - if(rhs_info.h0 == 0) + if (rhs_info.h0 == 0) { rhs_info.h0 = std::max(static_cast(params.N) / rhs_info.n0, 1U); } - lhs_reshaped.allocator()->init(TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type)); - rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + lhs_reshaped.allocator()->init( + TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type)); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); lhs_reshaped.info()->set_quantization_info(q_info); rhs_reshaped.info()->set_quantization_info(q_info); - if(rhs_info.export_to_cl_image) + if (rhs_info.export_to_cl_image) { - if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) { std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; return false; } } - GEMMReshapeInfo gemm_info - { - static_cast(params.M), - static_cast(params.N), - static_cast(params.K), - static_cast(configs.h0), - static_cast(configs.v0), - 0, - false, - true - }; + GEMMReshapeInfo gemm_info{static_cast(params.M), + static_cast(params.N), + static_cast(params.K), + static_cast(configs.h0), + static_cast(configs.v0), + 0, + false, + true}; // Validate argments - if(!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d())) + if (!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d())) { std::cerr << "Invalid arguments for ClGemmReshapeLHSMatrixKernel." << std::endl; return false; } - if(!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info)) + if (!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info)) { std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedKernel." << std::endl; return false; @@ -300,10 +299,10 @@ public: } void do_run() override { - ITensorPack reshape_lsh_pack({ { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }); + ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}}); reshape_lhs.run(reshape_lsh_pack); - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + ITensorPack gemm_pack({{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_DST, &dst}}); gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp index 94f3c93166..4acb316a3c 100644 --- a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp @@ -25,23 +25,23 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "CommonGemmExampleOptions.h" -#include "GemmTunerHelpers.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + #include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h" #include "src/gpu/cl/kernels/ClGemmLowpReductionKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" +#include "CommonGemmExampleOptions.h" +#include "GemmTunerHelpers.h" #include #include @@ -56,12 +56,12 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ - size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool interleave_rhs{ true }; /**< Interleave rhs matrix */ - bool transpose_rhs{ true }; /**< Transpose rhs matrix */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix */ }; /** Formatted output of the GemmConfigs type @@ -119,10 +119,10 @@ public: /** Default destructor */ ~GemmConfigOptions() = default; - SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ - SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ - SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ - SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ SimpleOption *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ SimpleOption *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */ }; @@ -147,8 +147,9 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) } // namespace -using ClGemmLowpMatrixMultiplyReshapedOnlyRhs = test::CLSynthetizeOperator; -using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator; +using ClGemmLowpMatrixMultiplyReshapedOnlyRhs = + test::CLSynthetizeOperator; +using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator; class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample : public Example { @@ -165,12 +166,12 @@ public: GemmConfigOptions config_options(parser); parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -199,10 +200,7 @@ public: // Set arbitrary quantization information (non-zero offset to ensure offset contribution stage is included) // Could be extended in the future to include a user-controlled option for offset == 0 - const QuantizationInfo q_info - { - 0.012, 3 - }; + const QuantizationInfo q_info{0.012, 3}; lhs.info()->set_quantization_info(q_info); rhs.info()->set_quantization_info(q_info); bias.info()->set_quantization_info(q_info); @@ -220,16 +218,17 @@ public: rhs_info.transpose = configs.transpose_rhs; rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet - if(rhs_info.h0 == 0) + if (rhs_info.h0 == 0) { rhs_info.h0 = std::max(static_cast(params.N) / rhs_info.n0, 1U); } - rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); rhs_reshaped.info()->set_quantization_info(q_info); - if(rhs_info.export_to_cl_image) + if (rhs_info.export_to_cl_image) { - if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) { std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; return false; @@ -251,9 +250,7 @@ public: gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters); gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters); - quantization::compute_quantized_multipliers_and_shifts(lhs.info(), - rhs.info(), - dst.info(), + quantization::compute_quantized_multipliers_and_shifts(lhs.info(), rhs.info(), dst.info(), gemmlowp_output_stage.gemmlowp_multipliers.data(), gemmlowp_output_stage.gemmlowp_shifts.data()); gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0]; @@ -290,14 +287,14 @@ public: gemm_info.output_stage = gemmlowp_output_stage; // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 - if(gemm_info.b_offset != 0) + if (gemm_info.b_offset != 0) { const TensorInfo info_vector_sum_row(compute_reductionB_shape(*lhs.info()), 1, DataType::S32); vector_sum_row.allocator()->init(info_vector_sum_row); mtx_a_reduction = std::make_unique(); - if(!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{})) + if (!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{})) { std::cerr << "Invalid arguments for CLGEMMLowpMatrixAReductionKernel." << std::endl; return false; @@ -306,7 +303,7 @@ public: mtx_a_reduction->configure(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{}); } // Initialize matrix B reduction kernel only if _a_offset is not equal to 0 - if(gemm_info.a_offset != 0) + if (gemm_info.a_offset != 0) { const TensorInfo info_vector_sum_col(compute_reductionA_shape(*rhs.info()), 1, DataType::S32); vector_sum_col.allocator()->init(info_vector_sum_col); @@ -314,8 +311,10 @@ public: } // Validate argments - if(!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), - gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(), dst_shifts.info())) + if (!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, + gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), + gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), + dst_multipliers.info(), dst_shifts.info())) { std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel." << std::endl; return false; @@ -323,8 +322,9 @@ public: // Configure function gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, - gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), - bias.info(), dst_multipliers.info(), dst_shifts.info()); + gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), + gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(), + dst_shifts.info()); // Allocate tensors lhs.allocator()->allocate(); @@ -341,13 +341,20 @@ public: } void do_run() override { - if(mtx_a_reduction != nullptr) + if (mtx_a_reduction != nullptr) { - ITensorPack red_pack({ { ACL_SRC, &lhs }, { ACL_DST, &dst } }); + ITensorPack red_pack({{ACL_SRC, &lhs}, {ACL_DST, &dst}}); mtx_a_reduction->run(red_pack); } - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_BIAS, &bias }, { ACL_VEC_COL_SUM, &vector_sum_col }, { ACL_VEC_ROW_SUM, &vector_sum_row }, { ACL_SHIFTS, &dst_shifts }, { ACL_MULTIPLIERS, &dst_multipliers }, { ACL_DST, &dst } }); + ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, + {ACL_SRC_1, &rhs}, + {ACL_BIAS, &bias}, + {ACL_VEC_COL_SUM, &vector_sum_col}, + {ACL_VEC_ROW_SUM, &vector_sum_row}, + {ACL_SHIFTS, &dst_shifts}, + {ACL_MULTIPLIERS, &dst_multipliers}, + {ACL_DST, &dst}}); gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: @@ -370,7 +377,7 @@ private: CLTensor dst_shifts{}; CLTuner tuner{}; ClGemmLowpMatrixMultiplyReshapedOnlyRhs gemm{}; - std::unique_ptr mtx_a_reduction{ nullptr }; + std::unique_ptr mtx_a_reduction{nullptr}; }; /** Main test program for gemmlowp reshaped rhs only with fused output stage fixedpoint -- cgit v1.2.1