aboutsummaryrefslogtreecommitdiff
path: root/tests/benchmark/NEON/GEMM.cpp
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2017-06-22 15:46:40 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:14:20 +0100
commit10c672c2e21bb77b7234d9d3611267400dce7ae0 (patch)
treee3a61be142225c38e36dc7db4719948a42afe32d /tests/benchmark/NEON/GEMM.cpp
parent84e3120f6803f66cd272729b1f3542cfd3bc75a5 (diff)
downloadComputeLibrary-10c672c2e21bb77b7234d9d3611267400dce7ae0.tar.gz
COMPMID-399 Add MatrixMultiply to benchmark
Change-Id: I86c3f808c0047c8d97211d21f61c4e79e2d2abb1 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78617 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'tests/benchmark/NEON/GEMM.cpp')
-rw-r--r--tests/benchmark/NEON/GEMM.cpp69
1 files changed, 69 insertions, 0 deletions
diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp
index 9190309f1c..50131b4998 100644
--- a/tests/benchmark/NEON/GEMM.cpp
+++ b/tests/benchmark/NEON/GEMM.cpp
@@ -49,11 +49,14 @@ namespace
#ifdef ENABLE_FP16
using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F16>;
using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F16>;
+using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F16>;
#endif /* ENABLE_FP16 */
using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F32>;
using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F32>;
using GEMMQS8GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
using GEMMQS8GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
+using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F32>;
+using QS8MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
} // namespace
#ifdef ENABLE_FP16
BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, neon_googlenet)
@@ -272,6 +275,28 @@ BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
->Threads(1)
->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
+
+BENCHMARK_DEFINE_F(FP16MatrixMultiply, neon_matrix_multiply)
+(::benchmark::State &state)
+{
+ while(state.KeepRunning())
+ {
+ // Run function
+ profiler.start();
+ gemm_layer->run();
+ profiler.stop();
+ }
+}
+
+BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
+BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
+BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
#endif /* ENABLE_FP16 */
BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, neon_googlenet)
@@ -707,3 +732,47 @@ BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
->Threads(1)
->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
+
+BENCHMARK_DEFINE_F(FP32MatrixMultiply, neon_matrix_multiply)
+(::benchmark::State &state)
+{
+ while(state.KeepRunning())
+ {
+ // Run function
+ profiler.start();
+ gemm_layer->run();
+ profiler.stop();
+ }
+}
+
+BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
+BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
+BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
+
+BENCHMARK_DEFINE_F(QS8MatrixMultiply, neon_matrix_multiply)
+(::benchmark::State &state)
+{
+ while(state.KeepRunning())
+ {
+ // Run function
+ profiler.start();
+ gemm_layer->run();
+ profiler.stop();
+ }
+}
+
+BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
+BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
+BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply)
+->Threads(1)
+->Apply(DataSetArg<MatrixMultiplyDataset, 2>);