aboutsummaryrefslogtreecommitdiff
path: root/examples/gemm_tuner
diff options
context:
space:
mode:
authorFelix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-27 17:46:17 +0100
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-28 12:08:05 +0000
commitafd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree03bc7d5a762099989b16a656fa8d397b490ed70e /examples/gemm_tuner
parentbdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
downloadComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
Apply clang-format on repository
Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'examples/gemm_tuner')
-rw-r--r--examples/gemm_tuner/CommonGemmExampleOptions.cpp16
-rw-r--r--examples/gemm_tuner/CommonGemmExampleOptions.h28
-rw-r--r--examples/gemm_tuner/GemmTunerHelpers.h6
-rw-r--r--examples/gemm_tuner/cl_gemm_native.cpp26
-rw-r--r--examples/gemm_tuner/cl_gemm_reshaped.cpp77
-rw-r--r--examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp59
-rw-r--r--examples/gemm_tuner/cl_gemmlowp_reshaped.cpp85
-rw-r--r--examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp87
8 files changed, 196 insertions, 188 deletions
diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.cpp b/examples/gemm_tuner/CommonGemmExampleOptions.cpp
index bee202b99e..c2a465604a 100644
--- a/examples/gemm_tuner/CommonGemmExampleOptions.cpp
+++ b/examples/gemm_tuner/CommonGemmExampleOptions.cpp
@@ -39,7 +39,8 @@ using namespace utils;
return os;
}
-CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, arm_compute::DataType default_data_type)
+CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser,
+ arm_compute::DataType default_data_type)
: help(parser.add_option<ToggleOption>("help")),
M(parser.add_positional_option<SimpleOption<size_t>>("M", 100)),
N(parser.add_positional_option<SimpleOption<size_t>>("N", 100)),
@@ -48,21 +49,16 @@ CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLi
data_type(),
tuner_mode()
{
- const std::set<DataType> supported_data_types
- {
+ const std::set<DataType> supported_data_types{
DataType::F16,
DataType::F32,
DataType::QASYMM8,
};
- const std::set<CLTunerMode> supported_tuner_modes
- {
- CLTunerMode::EXHAUSTIVE,
- CLTunerMode::NORMAL,
- CLTunerMode::RAPID
- };
+ const std::set<CLTunerMode> supported_tuner_modes{CLTunerMode::EXHAUSTIVE, CLTunerMode::NORMAL, CLTunerMode::RAPID};
- ARM_COMPUTE_ERROR_ON_MSG(supported_data_types.find(default_data_type) == supported_data_types.end(), "Default data type unsupported");
+ ARM_COMPUTE_ERROR_ON_MSG(supported_data_types.find(default_data_type) == supported_data_types.end(),
+ "Default data type unsupported");
data_type = parser.add_option<EnumOption<DataType>>("type", supported_data_types, default_data_type);
tuner_mode = parser.add_option<EnumOption<CLTunerMode>>("tuner-mode", supported_tuner_modes, CLTunerMode::RAPID);
diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.h b/examples/gemm_tuner/CommonGemmExampleOptions.h
index f7447e3db3..38178bcef8 100644
--- a/examples/gemm_tuner/CommonGemmExampleOptions.h
+++ b/examples/gemm_tuner/CommonGemmExampleOptions.h
@@ -27,21 +27,22 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLTuner.h"
-#include "utils/TypePrinter.h"
+
#include "utils/command_line/CommandLineOptions.h"
#include "utils/command_line/CommandLineParser.h"
+#include "utils/TypePrinter.h"
namespace gemm_tuner
{
/** Structure holding all the common gemm example parameters */
struct CommonGemmExampleParams
{
- size_t M{ 100 }; /**< Number of lhs matrix rows */
- size_t N{ 100 }; /**< Number of rhs matrix columns */
- size_t K{ 50 }; /**< Number of lhs matrix columns/rhs matrix rows */
- size_t B{ 1 }; /**< Batch size */
- arm_compute::DataType data_type{ arm_compute::DataType::F32 }; /**< Data type */
- arm_compute::CLTunerMode tuner_mode{ arm_compute::CLTunerMode::RAPID }; /**< OpenCL tuner mode */
+ size_t M{100}; /**< Number of lhs matrix rows */
+ size_t N{100}; /**< Number of rhs matrix columns */
+ size_t K{50}; /**< Number of lhs matrix columns/rhs matrix rows */
+ size_t B{1}; /**< Batch size */
+ arm_compute::DataType data_type{arm_compute::DataType::F32}; /**< Data type */
+ arm_compute::CLTunerMode tuner_mode{arm_compute::CLTunerMode::RAPID}; /**< OpenCL tuner mode */
};
/** Formatted output of the CommonGemmExampleParams type
@@ -70,7 +71,8 @@ public:
* @param[in,out] parser A parser on which "parse()" hasn't been called yet.
* @param[in] default_data_type Default data type if unspecified.
*/
- CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, arm_compute::DataType default_data_type = arm_compute::DataType::F32);
+ CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser,
+ arm_compute::DataType default_data_type = arm_compute::DataType::F32);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CommonGemmExampleOptions(const CommonGemmExampleOptions &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -82,11 +84,11 @@ public:
/** Default destructor */
~CommonGemmExampleOptions() = default;
- arm_compute::utils::ToggleOption *help; /**< Show help option */
- arm_compute::utils::SimpleOption<size_t> *M; /**< Number of lhs matrix rows option */
- arm_compute::utils::SimpleOption<size_t> *N; /**< Number of rhs matrix columns option */
- arm_compute::utils::SimpleOption<size_t> *K; /**< Number of lhs matrix columns/rhs matrix rows option */
- arm_compute::utils::SimpleOption<size_t> *B; /**< Batch size option */
+ arm_compute::utils::ToggleOption *help; /**< Show help option */
+ arm_compute::utils::SimpleOption<size_t> *M; /**< Number of lhs matrix rows option */
+ arm_compute::utils::SimpleOption<size_t> *N; /**< Number of rhs matrix columns option */
+ arm_compute::utils::SimpleOption<size_t> *K; /**< Number of lhs matrix columns/rhs matrix rows option */
+ arm_compute::utils::SimpleOption<size_t> *B; /**< Batch size option */
arm_compute::utils::EnumOption<arm_compute::DataType> *data_type; /**< Data type */
arm_compute::utils::EnumOption<arm_compute::CLTunerMode> *tuner_mode; /**< OpenCL tuner mode */
};
diff --git a/examples/gemm_tuner/GemmTunerHelpers.h b/examples/gemm_tuner/GemmTunerHelpers.h
index ae5cfbb19e..dbff9e2dff 100644
--- a/examples/gemm_tuner/GemmTunerHelpers.h
+++ b/examples/gemm_tuner/GemmTunerHelpers.h
@@ -36,9 +36,9 @@ bool update_padding_for_cl_image(arm_compute::ITensorInfo *tensor)
constexpr unsigned int num_floats_per_pixel = 4;
const unsigned int stride_y_in_elements = tensor->strides_in_bytes()[1] / tensor->element_size();
- const unsigned int pixel_aligment = arm_compute::get_cl_image_pitch_alignment(
- arm_compute::CLKernelLibrary::get().get_device());
- if(pixel_aligment == 0)
+ const unsigned int pixel_aligment =
+ arm_compute::get_cl_image_pitch_alignment(arm_compute::CLKernelLibrary::get().get_device());
+ if (pixel_aligment == 0)
{
return false;
}
diff --git a/examples/gemm_tuner/cl_gemm_native.cpp b/examples/gemm_tuner/cl_gemm_native.cpp
index dd03873921..7daa0b07d3 100644
--- a/examples/gemm_tuner/cl_gemm_native.cpp
+++ b/examples/gemm_tuner/cl_gemm_native.cpp
@@ -25,19 +25,20 @@
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
-#include "CommonGemmExampleOptions.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+
#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
#include "tests/CL/Helper.h"
-#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
+#include "CommonGemmExampleOptions.h"
#include <cstdlib>
using namespace arm_compute;
@@ -51,9 +52,9 @@ namespace
/** Structure holding all tunable gemm configs specific to this example/strategy */
struct GemmConfigs
{
- size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */
- size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */
- size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */
+ size_t m0{4}; /**< Number of rows processed by the matrix multiplication */
+ size_t n0{4}; /**< Number of columns processed by the matrix multiplication */
+ size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */
};
/** Formatted output of the GemmConfigs type
@@ -145,13 +146,13 @@ public:
// Parse command line options
parser.parse(argc, argv);
- if(param_options.help->is_set() && param_options.help->value())
+ if (param_options.help->is_set() && param_options.help->value())
{
// Print help message
parser.print_help(argv[0]);
return false;
}
- if(!parser.validate())
+ if (!parser.validate())
{
// Invalid arguments. Use default parameters and configs
std::cerr << "Invalid arguments." << std::endl;
@@ -198,8 +199,9 @@ public:
// Validate argments
Status status{};
- status = gemm.validate(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- if(!status)
+ status = gemm.validate(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info,
+ kernel_info);
+ if (!status)
{
// Unsupported arguments
std::cerr << "Unsupported arguments." << std::endl;
@@ -221,11 +223,7 @@ public:
void do_run() override
{
// Execute the function
- ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
- { ACL_SRC_1, &rhs },
- { ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}});
gemm.run(gemm_pack);
// Make sure all the OpenCL jobs are done executing:
diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp
index 59044477bf..75f3539cb9 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp
@@ -31,14 +31,15 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+
#include "examples/gemm_tuner/CommonGemmExampleOptions.h"
#include "examples/gemm_tuner/GemmTunerHelpers.h"
#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
#include "tests/CL/Helper.h"
-#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
#include <cstdlib>
@@ -53,16 +54,16 @@ namespace
/** Structure holding all tunable gemm configs specific to this example/strategy */
struct GemmConfigs
{
- size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */
- size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */
- size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */
- size_t v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
- size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool interleave_lhs{ true }; /**< Interleave lhs matrix */
- bool transpose_lhs{ true }; /**< Transpose lhs matrix. */
- bool interleave_rhs{ true }; /**< Interleave rhs matrix */
- bool transpose_rhs{ true }; /**< Transpose rhs matrix. */
- bool export_to_cl_image_rhs{ true }; /**< Export rhs matrix to cl_image. */
+ size_t m0{4}; /**< Number of rows processed by the matrix multiplication */
+ size_t n0{4}; /**< Number of columns processed by the matrix multiplication */
+ size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */
+ size_t v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+ size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool interleave_lhs{true}; /**< Interleave lhs matrix */
+ bool transpose_lhs{true}; /**< Transpose lhs matrix. */
+ bool interleave_rhs{true}; /**< Interleave rhs matrix */
+ bool transpose_rhs{true}; /**< Transpose rhs matrix. */
+ bool export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image. */
};
/** Formatted output of the GemmConfigs type
@@ -119,8 +120,10 @@ public:
// FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
// transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
// 2 variants (both transposed and none transposed)
- transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)");
- export_to_cl_image_rhs->set_help("Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)");
+ transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do "
+ "transpose lhs matrix (0)");
+ export_to_cl_image_rhs->set_help(
+ "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)");
}
/** Prevent instances of this class from being copied (As this class contains pointers) */
GemmConfigOptions(const GemmConfigOptions &) = delete;
@@ -133,17 +136,18 @@ public:
/** Default destructor */
~GemmConfigOptions() = default;
- SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
- SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
- SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
- SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
- SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+ SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+ SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+ SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+ SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
+ SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
SimpleOption<size_t> *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */
SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
// FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
// transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
// 2 variants (both transposed and none transposed)
- SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
+ SimpleOption<size_t> *
+ transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
SimpleOption<size_t> *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/
};
@@ -198,13 +202,13 @@ public:
// Parse command line options
parser.parse(argc, argv);
- if(param_options.help->is_set() && param_options.help->value())
+ if (param_options.help->is_set() && param_options.help->value())
{
// Print help message
parser.print_help(argv[0]);
return false;
}
- if(!parser.validate())
+ if (!parser.validate())
{
// Invalid arguments. Use default parameters and configs
std::cerr << "Invalid arguments." << std::endl;
@@ -256,20 +260,22 @@ public:
kernel_info.broadcast_bias = true;
kernel_info.activation_info = act_info;
- if(rhs_info.h0 == 0)
+ if (rhs_info.h0 == 0)
{
rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U);
}
// Initialise lhs_reshaped tensor info
- lhs_reshaped.allocator()->init(TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
+ lhs_reshaped.allocator()->init(
+ TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
// Initialise rhs_reshaped tensor info
- rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+ rhs_reshaped.allocator()->init(
+ TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
- if(rhs_info.export_to_cl_image)
+ if (rhs_info.export_to_cl_image)
{
- if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+ if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
{
std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
return false;
@@ -279,7 +285,7 @@ public:
// Validate argments
Status status{};
status = reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, kernel_info.reinterpret_input_as_3d);
- if(!status)
+ if (!status)
{
// Unsupported arguments
std::cerr << "Unsupported arguments." << std::endl;
@@ -287,8 +293,9 @@ public:
return false;
}
- status = gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- if(!status)
+ status = gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info,
+ rhs_info, kernel_info);
+ if (!status)
{
// Unsupported arguments
std::cerr << "Unsupported arguments." << std::endl;
@@ -300,7 +307,8 @@ public:
reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
// Configure function
- gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+ gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info,
+ rhs_info, kernel_info);
// Allocate tensors
lhs.allocator()->allocate();
@@ -315,14 +323,11 @@ public:
void do_run() override
{
// Execute the functions
- ITensorPack reshape_lsh_pack({ { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } });
+ ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}});
reshape_lhs.run(reshape_lsh_pack);
- ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
- { ACL_SRC_1, &rhs_reshaped },
- { ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ ITensorPack gemm_pack(
+ {{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}});
gemm.run(gemm_pack);
// Make sure all the OpenCL jobs are done executing:
diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
index 0ad2a65dc2..cfea2c9bac 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
@@ -25,20 +25,21 @@
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
-#include "CommonGemmExampleOptions.h"
-#include "GemmTunerHelpers.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+
#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
#include "tests/CL/Helper.h"
-#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
+#include "CommonGemmExampleOptions.h"
+#include "GemmTunerHelpers.h"
#include <cstdlib>
using namespace arm_compute;
@@ -52,13 +53,13 @@ namespace
/** Structure holding all tunable gemm configs specific to this example/strategy */
struct GemmConfigs
{
- size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */
- size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */
- size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */
- size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool interleave_rhs{ true }; /**< Interleave rhs matrix */
- bool transpose_rhs{ true }; /**< Transpose rhs matrix */
- bool export_to_cl_image_rhs{ true }; /**< Export rhs matrix to cl_image.*/
+ size_t m0{4}; /**< Number of rows processed by the matrix multiplication */
+ size_t n0{4}; /**< Number of columns processed by the matrix multiplication */
+ size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */
+ size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool interleave_rhs{true}; /**< Interleave rhs matrix */
+ bool transpose_rhs{true}; /**< Transpose rhs matrix */
+ bool export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image.*/
};
/** Formatted output of the GemmConfigs type
@@ -106,7 +107,8 @@ public:
h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row");
interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)");
transpose_rhs->set_help("Transpose rhs matrix (1) / Do not transpose rhs matrix (0)");
- export_to_cl_image_rhs->set_help("Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)");
+ export_to_cl_image_rhs->set_help(
+ "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)");
}
/** Prevent instances of this class from being copied (As this class contains pointers) */
GemmConfigOptions(const GemmConfigOptions &) = delete;
@@ -119,10 +121,10 @@ public:
/** Default destructor */
~GemmConfigOptions() = default;
- SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
- SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
- SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
- SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+ SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+ SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+ SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+ SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */
SimpleOption<size_t> *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/
@@ -170,13 +172,13 @@ public:
// Parse command line options
parser.parse(argc, argv);
- if(param_options.help->is_set() && param_options.help->value())
+ if (param_options.help->is_set() && param_options.help->value())
{
// Print help message
parser.print_help(argv[0]);
return false;
}
- if(!parser.validate())
+ if (!parser.validate())
{
// Invalid arguments. Use default parameters and configs
std::cerr << "Invalid arguments." << std::endl;
@@ -225,17 +227,18 @@ public:
kernel_info.broadcast_bias = true;
kernel_info.activation_info = act_info;
- if(rhs_info.h0 == 0)
+ if (rhs_info.h0 == 0)
{
rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U);
}
// Initialise rhs_reshaped tensor info
- rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+ rhs_reshaped.allocator()->init(
+ TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
- if(rhs_info.export_to_cl_image)
+ if (rhs_info.export_to_cl_image)
{
- if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+ if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
{
std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
return false;
@@ -244,8 +247,9 @@ public:
// Validate argments
Status status{};
- status = gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- if(!status)
+ status = gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info,
+ rhs_info, kernel_info);
+ if (!status)
{
// Unsupported arguments
std::cerr << "Unsupported arguments." << std::endl;
@@ -254,7 +258,8 @@ public:
}
// Configure function
- gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+ gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info,
+ kernel_info);
// Allocate tensors
lhs.allocator()->allocate();
@@ -268,11 +273,7 @@ public:
void do_run() override
{
// Execute the function
- ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
- { ACL_SRC_1, &rhs_reshaped },
- { ACL_SRC_2, &bias },
- { ACL_DST, &dst }
- });
+ ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}});
gemm.run(gemm_pack);
// Make sure all the OpenCL jobs are done executing:
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
index 9cf9c9fed0..3808b98b7d 100644
--- a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
@@ -31,14 +31,15 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+
#include "examples/gemm_tuner/CommonGemmExampleOptions.h"
#include "examples/gemm_tuner/GemmTunerHelpers.h"
#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h"
#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
#include "tests/CL/Helper.h"
-#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
#include <cstdlib>
@@ -53,15 +54,15 @@ namespace
/** Structure holding all tunable gemm configs specific to this example/strategy */
struct GemmConfigs
{
- size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */
- size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */
- size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */
- size_t v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
- size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool interleave_lhs{ true }; /**< Interleave lhs matrix */
- bool transpose_lhs{ true }; /**< Transpose lhs matrix. */
- bool interleave_rhs{ true }; /**< Interleave rhs matrix */
- bool transpose_rhs{ true }; /**< Transpose rhs matrix. */
+ size_t m0{4}; /**< Number of rows processed by the matrix multiplication */
+ size_t n0{4}; /**< Number of columns processed by the matrix multiplication */
+ size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */
+ size_t v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+ size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool interleave_lhs{true}; /**< Interleave lhs matrix */
+ bool transpose_lhs{true}; /**< Transpose lhs matrix. */
+ bool interleave_rhs{true}; /**< Interleave rhs matrix */
+ bool transpose_rhs{true}; /**< Transpose rhs matrix. */
};
/** Formatted output of the GemmConfigs type
@@ -116,7 +117,8 @@ public:
// FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
// transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
// 2 variants (both transposed and none transposed)
- transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)");
+ transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do "
+ "transpose lhs matrix (0)");
}
/** Prevent instances of this class from being copied (As this class contains pointers) */
GemmConfigOptions(const GemmConfigOptions &) = delete;
@@ -129,17 +131,18 @@ public:
/** Default destructor */
~GemmConfigOptions() = default;
- SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
- SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
- SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
- SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
- SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+ SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+ SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+ SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+ SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
+ SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
SimpleOption<size_t> *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */
SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
// FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
// transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
// 2 variants (both transposed and none transposed)
- SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
+ SimpleOption<size_t> *
+ transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
};
/** Consumes the gemm configuration options and creates a structure containing all information
@@ -186,12 +189,12 @@ public:
GemmConfigOptions config_options(parser);
parser.parse(argc, argv);
- if(param_options.help->is_set() && param_options.help->value())
+ if (param_options.help->is_set() && param_options.help->value())
{
parser.print_help(argv[0]);
return false;
}
- if(!parser.validate())
+ if (!parser.validate())
{
// Invalid arguments. Use default parameters and configs
std::cerr << "Invalid arguments." << std::endl;
@@ -217,10 +220,7 @@ public:
rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type));
// Set arbitrary quantization information
- const QuantizationInfo q_info
- {
- 0.012, 3
- };
+ const QuantizationInfo q_info{0.012, 3};
lhs.info()->set_quantization_info(q_info);
rhs.info()->set_quantization_info(q_info);
dst.info()->set_quantization_info(q_info);
@@ -240,45 +240,44 @@ public:
rhs_info.transpose = configs.transpose_rhs;
rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet
- if(rhs_info.h0 == 0)
+ if (rhs_info.h0 == 0)
{
rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U);
}
- lhs_reshaped.allocator()->init(TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
- rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+ lhs_reshaped.allocator()->init(
+ TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
+ rhs_reshaped.allocator()->init(
+ TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
lhs_reshaped.info()->set_quantization_info(q_info);
rhs_reshaped.info()->set_quantization_info(q_info);
- if(rhs_info.export_to_cl_image)
+ if (rhs_info.export_to_cl_image)
{
- if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+ if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
{
std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
return false;
}
}
- GEMMReshapeInfo gemm_info
- {
- static_cast<int>(params.M),
- static_cast<int>(params.N),
- static_cast<int>(params.K),
- static_cast<int>(configs.h0),
- static_cast<int>(configs.v0),
- 0,
- false,
- true
- };
+ GEMMReshapeInfo gemm_info{static_cast<int>(params.M),
+ static_cast<int>(params.N),
+ static_cast<int>(params.K),
+ static_cast<int>(configs.h0),
+ static_cast<int>(configs.v0),
+ 0,
+ false,
+ true};
// Validate argments
- if(!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d()))
+ if (!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d()))
{
std::cerr << "Invalid arguments for ClGemmReshapeLHSMatrixKernel." << std::endl;
return false;
}
- if(!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info))
+ if (!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info))
{
std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedKernel." << std::endl;
return false;
@@ -300,10 +299,10 @@ public:
}
void do_run() override
{
- ITensorPack reshape_lsh_pack({ { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } });
+ ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}});
reshape_lhs.run(reshape_lsh_pack);
- ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+ ITensorPack gemm_pack({{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_DST, &dst}});
gemm.run(gemm_pack);
// Make sure all the OpenCL jobs are done executing:
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
index 94f3c93166..4acb316a3c 100644
--- a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
@@ -25,23 +25,23 @@
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
-#include "CommonGemmExampleOptions.h"
-#include "GemmTunerHelpers.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+
#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h"
#include "src/gpu/cl/kernels/ClGemmLowpReductionKernel.h"
#include "tests/CL/Helper.h"
-#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
+#include "CommonGemmExampleOptions.h"
+#include "GemmTunerHelpers.h"
#include <cstdlib>
#include <memory>
@@ -56,12 +56,12 @@ namespace
/** Structure holding all tunable gemm configs specific to this example/strategy */
struct GemmConfigs
{
- size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */
- size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */
- size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */
- size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool interleave_rhs{ true }; /**< Interleave rhs matrix */
- bool transpose_rhs{ true }; /**< Transpose rhs matrix */
+ size_t m0{4}; /**< Number of rows processed by the matrix multiplication */
+ size_t n0{4}; /**< Number of columns processed by the matrix multiplication */
+ size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */
+ size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool interleave_rhs{true}; /**< Interleave rhs matrix */
+ bool transpose_rhs{true}; /**< Transpose rhs matrix */
};
/** Formatted output of the GemmConfigs type
@@ -119,10 +119,10 @@ public:
/** Default destructor */
~GemmConfigOptions() = default;
- SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
- SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
- SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
- SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+ SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+ SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+ SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+ SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */
};
@@ -147,8 +147,9 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options)
} // namespace
-using ClGemmLowpMatrixMultiplyReshapedOnlyRhs = test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>;
-using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator<ClGemmLowpMatrixAReductionKernel>;
+using ClGemmLowpMatrixMultiplyReshapedOnlyRhs =
+ test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>;
+using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator<ClGemmLowpMatrixAReductionKernel>;
class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample : public Example
{
@@ -165,12 +166,12 @@ public:
GemmConfigOptions config_options(parser);
parser.parse(argc, argv);
- if(param_options.help->is_set() && param_options.help->value())
+ if (param_options.help->is_set() && param_options.help->value())
{
parser.print_help(argv[0]);
return false;
}
- if(!parser.validate())
+ if (!parser.validate())
{
// Invalid arguments. Use default parameters and configs
std::cerr << "Invalid arguments." << std::endl;
@@ -199,10 +200,7 @@ public:
// Set arbitrary quantization information (non-zero offset to ensure offset contribution stage is included)
// Could be extended in the future to include a user-controlled option for offset == 0
- const QuantizationInfo q_info
- {
- 0.012, 3
- };
+ const QuantizationInfo q_info{0.012, 3};
lhs.info()->set_quantization_info(q_info);
rhs.info()->set_quantization_info(q_info);
bias.info()->set_quantization_info(q_info);
@@ -220,16 +218,17 @@ public:
rhs_info.transpose = configs.transpose_rhs;
rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet
- if(rhs_info.h0 == 0)
+ if (rhs_info.h0 == 0)
{
rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U);
}
- rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+ rhs_reshaped.allocator()->init(
+ TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
rhs_reshaped.info()->set_quantization_info(q_info);
- if(rhs_info.export_to_cl_image)
+ if (rhs_info.export_to_cl_image)
{
- if(!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+ if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
{
std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
return false;
@@ -251,9 +250,7 @@ public:
gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
- quantization::compute_quantized_multipliers_and_shifts(lhs.info(),
- rhs.info(),
- dst.info(),
+ quantization::compute_quantized_multipliers_and_shifts(lhs.info(), rhs.info(), dst.info(),
gemmlowp_output_stage.gemmlowp_multipliers.data(),
gemmlowp_output_stage.gemmlowp_shifts.data());
gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
@@ -290,14 +287,14 @@ public:
gemm_info.output_stage = gemmlowp_output_stage;
// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
- if(gemm_info.b_offset != 0)
+ if (gemm_info.b_offset != 0)
{
const TensorInfo info_vector_sum_row(compute_reductionB_shape(*lhs.info()), 1, DataType::S32);
vector_sum_row.allocator()->init(info_vector_sum_row);
mtx_a_reduction = std::make_unique<ClGemmLowpMatrixAReduction>();
- if(!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{}))
+ if (!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{}))
{
std::cerr << "Invalid arguments for CLGEMMLowpMatrixAReductionKernel." << std::endl;
return false;
@@ -306,7 +303,7 @@ public:
mtx_a_reduction->configure(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{});
}
// Initialize matrix B reduction kernel only if _a_offset is not equal to 0
- if(gemm_info.a_offset != 0)
+ if (gemm_info.a_offset != 0)
{
const TensorInfo info_vector_sum_col(compute_reductionA_shape(*rhs.info()), 1, DataType::S32);
vector_sum_col.allocator()->init(info_vector_sum_col);
@@ -314,8 +311,10 @@ public:
}
// Validate argments
- if(!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(),
- gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(), dst_shifts.info()))
+ if (!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info,
+ gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(),
+ gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(),
+ dst_multipliers.info(), dst_shifts.info()))
{
std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel." << std::endl;
return false;
@@ -323,8 +322,9 @@ public:
// Configure function
gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info,
- gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(),
- bias.info(), dst_multipliers.info(), dst_shifts.info());
+ gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(),
+ gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(),
+ dst_shifts.info());
// Allocate tensors
lhs.allocator()->allocate();
@@ -341,13 +341,20 @@ public:
}
void do_run() override
{
- if(mtx_a_reduction != nullptr)
+ if (mtx_a_reduction != nullptr)
{
- ITensorPack red_pack({ { ACL_SRC, &lhs }, { ACL_DST, &dst } });
+ ITensorPack red_pack({{ACL_SRC, &lhs}, {ACL_DST, &dst}});
mtx_a_reduction->run(red_pack);
}
- ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_BIAS, &bias }, { ACL_VEC_COL_SUM, &vector_sum_col }, { ACL_VEC_ROW_SUM, &vector_sum_row }, { ACL_SHIFTS, &dst_shifts }, { ACL_MULTIPLIERS, &dst_multipliers }, { ACL_DST, &dst } });
+ ITensorPack gemm_pack({{ACL_SRC_0, &lhs},
+ {ACL_SRC_1, &rhs},
+ {ACL_BIAS, &bias},
+ {ACL_VEC_COL_SUM, &vector_sum_col},
+ {ACL_VEC_ROW_SUM, &vector_sum_row},
+ {ACL_SHIFTS, &dst_shifts},
+ {ACL_MULTIPLIERS, &dst_multipliers},
+ {ACL_DST, &dst}});
gemm.run(gemm_pack);
// Make sure all the OpenCL jobs are done executing:
@@ -370,7 +377,7 @@ private:
CLTensor dst_shifts{};
CLTuner tuner{};
ClGemmLowpMatrixMultiplyReshapedOnlyRhs gemm{};
- std::unique_ptr<ClGemmLowpMatrixAReduction> mtx_a_reduction{ nullptr };
+ std::unique_ptr<ClGemmLowpMatrixAReduction> mtx_a_reduction{nullptr};
};
/** Main test program for gemmlowp reshaped rhs only with fused output stage fixedpoint