diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/benchmark/CL/GEMM.cpp | 48 | ||||
-rw-r--r-- | tests/benchmark/NEON/GEMM.cpp | 69 | ||||
-rw-r--r-- | tests/dataset/GEMMDataset.h | 32 |
3 files changed, 141 insertions, 8 deletions
diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp index b90556df48..a10da09cbe 100644 --- a/tests/benchmark/CL/GEMM.cpp +++ b/tests/benchmark/CL/GEMM.cpp @@ -51,6 +51,8 @@ using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLG using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>; using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>; using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>; +using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F16>; +using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F32>; } // namespace BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet) @@ -490,3 +492,49 @@ BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet) ->Threads(1) ->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); + +BENCHMARK_DEFINE_F(FP16MatrixMultiply, cl_matrix_multiply) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 0>); +BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 1>); +BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 2>); + +BENCHMARK_DEFINE_F(FP32MatrixMultiply, cl_matrix_multiply) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + CLScheduler::get().sync(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 0>); +BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 1>); +BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 2>); diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp index 9190309f1c..50131b4998 100644 --- a/tests/benchmark/NEON/GEMM.cpp +++ b/tests/benchmark/NEON/GEMM.cpp @@ -49,11 +49,14 @@ namespace #ifdef ENABLE_FP16 using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F16>; using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F16>; +using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F16>; #endif /* ENABLE_FP16 */ using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F32>; using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F32>; using GEMMQS8GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::QS8>; using GEMMQS8GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::QS8>; +using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::F32>; +using QS8MatrixMultiply = GEMM<MatrixMultiplyDataset, Tensor, NEAccessor, NEGEMM, DataType::QS8>; } // namespace #ifdef ENABLE_FP16 BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, neon_googlenet) @@ -272,6 +275,28 @@ BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet) ->Threads(1) ->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); + +BENCHMARK_DEFINE_F(FP16MatrixMultiply, neon_matrix_multiply) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 0>); +BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 1>); +BENCHMARK_REGISTER_F(FP16MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 2>); #endif /* ENABLE_FP16 */ BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, neon_googlenet) @@ -707,3 +732,47 @@ BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet) ->Threads(1) ->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>); + +BENCHMARK_DEFINE_F(FP32MatrixMultiply, neon_matrix_multiply) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 0>); +BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 1>); +BENCHMARK_REGISTER_F(FP32MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 2>); + +BENCHMARK_DEFINE_F(QS8MatrixMultiply, neon_matrix_multiply) +(::benchmark::State &state) +{ + while(state.KeepRunning()) + { + // Run function + profiler.start(); + gemm_layer->run(); + profiler.stop(); + } +} + +BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 0>); +BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 1>); +BENCHMARK_REGISTER_F(QS8MatrixMultiply, neon_matrix_multiply) +->Threads(1) +->Apply(DataSetArg<MatrixMultiplyDataset, 2>); diff --git a/tests/dataset/GEMMDataset.h b/tests/dataset/GEMMDataset.h index f45bc3e838..ddd4a3424f 100644 --- a/tests/dataset/GEMMDataset.h +++ b/tests/dataset/GEMMDataset.h @@ -82,10 +82,10 @@ public: SmallGEMMDataset() : GenericDataset { - GEMMDataObject{ TensorShape(21u, 13u), TensorShape(33u, 21u), TensorShape(33u, 13u), TensorShape(33u, 13u), 1.0f, 0.0f }, - GEMMDataObject{ TensorShape(31u, 1u), TensorShape(23u, 31u), TensorShape(23u, 1u), TensorShape(23u, 1u), 1.0f, 0.0f }, - GEMMDataObject{ TensorShape(38u, 12u), TensorShape(21u, 38u), TensorShape(21u, 12u), TensorShape(21u, 12u), 0.2f, 1.2f }, - GEMMDataObject{ TensorShape(32u, 1u), TensorShape(17u, 32u), TensorShape(17u, 1u), TensorShape(17u, 1u), 0.4f, 0.7f }, + GEMMDataObject{ TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), TensorShape(33U, 13U), 1.0f, 0.0f }, + GEMMDataObject{ TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U), TensorShape(23U, 1U), 1.0f, 0.0f }, + GEMMDataObject{ TensorShape(38U, 12U), TensorShape(21U, 38U), TensorShape(21U, 12U), TensorShape(21U, 12U), 0.2f, 1.2f }, + GEMMDataObject{ TensorShape(32U, 1U), TensorShape(17U, 32U), TensorShape(17U, 1U), TensorShape(17U, 1U), 0.4f, 0.7f }, } { } @@ -99,10 +99,10 @@ public: LargeGEMMDataset() : GenericDataset { - GEMMDataObject{ TensorShape(923u, 429u), TensorShape(871u, 923u), TensorShape(871u, 429u), TensorShape(871u, 429u), 1.0f, 0.0f }, - GEMMDataObject{ TensorShape(1021u, 1u), TensorShape(783u, 1021u), TensorShape(783u, 1u), TensorShape(783u, 1u), 1.0f, 0.0f }, - GEMMDataObject{ TensorShape(681u, 1023u), TensorShape(213u, 681u), TensorShape(213u, 1023u), TensorShape(213u, 1023u), 0.2f, 1.2f }, - GEMMDataObject{ TensorShape(941u, 1u), TensorShape(623u, 941u), TensorShape(623u, 1u), TensorShape(623u, 1u), 0.4f, 0.7f }, + GEMMDataObject{ TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), TensorShape(871U, 429U), 1.0f, 0.0f }, + GEMMDataObject{ TensorShape(1021U, 1U), TensorShape(783U, 1021U), TensorShape(783U, 1U), TensorShape(783U, 1U), 1.0f, 0.0f }, + GEMMDataObject{ TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), TensorShape(213U, 1023U), 0.2f, 1.2f }, + GEMMDataObject{ TensorShape(941U, 1U), TensorShape(623U, 941U), TensorShape(623U, 1U), TensorShape(623U, 1U), 0.4f, 0.7f }, } { } @@ -199,6 +199,22 @@ public: ~GoogLeNetGEMMDataset2() = default; }; + +class MatrixMultiplyDataset : public GenericDataset<GEMMDataObject, 3> +{ +public: + MatrixMultiplyDataset() + : GenericDataset + { + GEMMDataObject{ TensorShape(1024U, 1U), TensorShape(1000U, 1024U), TensorShape(1000U, 1U), TensorShape(1000U, 1U), 1.0f, 0.0f }, + GEMMDataObject{ TensorShape(256U, 784U), TensorShape(64U, 256U), TensorShape(64U, 784U), TensorShape(64U, 784U), 1.0f, 0.0f }, + GEMMDataObject{ TensorShape(1152U, 2704U), TensorShape(256U, 1152U), TensorShape(256U, 2704U), TensorShape(256U, 2704U), 1.0f, 0.0f }, + } + { + } + + ~MatrixMultiplyDataset() = default; +}; } // namespace test } // namespace arm_compute #endif //__ARM_COMPUTE_TEST_DATASET_GEMM_DATASET_H__ |