From ebf6b8a00b77ea796d877bc1d0e6850c055318a6 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 24 Sep 2018 16:31:08 +0100 Subject: COMPMID-1518: Add support for GEMM3D in CLGEMMLowpMatrixMultiplyCore Change-Id: Ib14ac821ee5d4aff80bd602cd3e76e7018abb5e6 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150268 Tested-by: bsgcomp Reviewed-by: Isabella Gottardi Reviewed-by: Michele DiGiorgio --- tests/validation/CL/GEMMLowp.cpp | 30 ++++++++- .../validation/fixtures/GEMMLowpAssemblyFixture.h | 2 +- tests/validation/fixtures/GEMMLowpFixture.h | 24 +++++--- tests/validation/reference/GEMMLowp.cpp | 71 +++++++++++++--------- tests/validation/reference/GEMMLowp.h | 6 +- 5 files changed, 89 insertions(+), 44 deletions(-) (limited to 'tests/validation') diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index 5148a31936..42bb2123bf 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,7 @@ TEST_SUITE(CL) TEST_SUITE(GEMMLowp) TEST_SUITE(MatrixMultiplyCore) + using CLGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()), @@ -81,6 +82,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework: validate(CLAccessor(_target), _reference); } +TEST_SUITE(Output3D) +using CLGEMMLowpMatrixMultiplyCoreOutput3DFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreOutput3DFixture, framework::DatasetMode::PRECOMMIT, datasets::SmallGEMMLowpOutput3DDataset()) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreOutput3DFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpOutput3DDataset()) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // Output3D + +TEST_SUITE(InputOutput3D) +using CLGEMMLowpMatrixMultiplyCoreInputOutput3DFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreInputOutput3DFixture, framework::DatasetMode::PRECOMMIT, datasets::SmallGEMMLowpInputOutput3DDataset()) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreInputOutput3DFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpInputOutput3DDataset()) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // InputOutput3D TEST_SUITE_END() // MatrixMultiplyCore TEST_SUITE(OutputStage) diff --git a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h index d6b94a197d..519932f3b2 100644 --- a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h +++ b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h @@ -128,7 +128,7 @@ protected: fill(b, 1, 0, 255); } - return reference::gemmlowp(a, b); + return reference::gemmlowp(a, b, shape_c); } TensorType _target{}; diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 06d6be3fa4..73cb8328ea 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ namespace test { namespace validation { -template +template class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture { public: @@ -62,8 +62,7 @@ protected: library->fill(tensor, distribution, i); } - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, - int32_t a_offset, int32_t b_offset) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, int32_t a_offset, int32_t b_offset) { // Create tensors TensorType a = create_tensor(shape_a, DataType::QASYMM8, 1); @@ -74,8 +73,9 @@ protected: b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); // Create and configure function + // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output FunctionType gemmlowp; - gemmlowp.configure(&a, &b, &c); + gemmlowp.configure(&a, &b, &c, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? shape_c[2] : 1), reinterpret_input_as_3d)); ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -99,18 +99,24 @@ protected: return c; } - SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, - int32_t a_offset, int32_t b_offset) + SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, int32_t a_offset, int32_t b_offset) { + TensorShape shape_a_to_use = shape_a; + if(reinterpret_input_as_3d) + { + // Collapse the second and third dimension if the input is 3D + shape_a_to_use.collapse(2U, 1U); + } + // Create reference - SimpleTensor a{ shape_a, DataType::QASYMM8, 1 }; + SimpleTensor a{ shape_a_to_use, DataType::QASYMM8, 1 }; SimpleTensor b{ shape_b, DataType::QASYMM8, 1 }; // Fill reference fill(a, 0); fill(b, 1); - return reference::gemmlowp_matrix_multiply_core(a, b, a_offset, b_offset); + return reference::gemmlowp_matrix_multiply_core(a, b, shape_c, a_offset, b_offset); } TensorType _target{}; diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 8e41aef46a..9a7e409e8a 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -98,41 +98,52 @@ void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor *in, } // namespace template -SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset) +SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset) { static_assert(std::is_same::type, int32_t>::value, "Only int32_t is allowed for the output"); - TensorShape shape(b.shape()[0], a.shape()[1]); DataType dt = std::is_same::value ? DataType::S32 : DataType::U32; - SimpleTensor c(shape, dt); + SimpleTensor c(shape_c, dt); - const int K = a.shape().x(); - const int b_width = b.shape().x(); - const int rows = c.shape().y(); //M - const int cols = c.shape().x(); //N + const int K = a.shape().x(); + const int M = a.shape().y(); + const int N = b.shape().x(); + const int D = a.shape().z(); // Number of matrices in a batch + + const int a_stride_z = K * M; + // Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions + const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; + const int c_stride_z = N * M; std::vector acc; - acc.resize(cols); + acc.resize(N); - for(int i = 0; i < rows; ++i) + for(int depth = 0; depth < D; ++depth) { - for(int j = 0; j < cols; ++j) - { - acc[j] = 0; - } - for(int k = 0; k < K; ++k) + const int base_addr_a = depth * a_stride_z; + const int base_addr_b = depth * b_stride_z; + const int base_addr_c = depth * c_stride_z; + + for(int i = 0; i < M; ++i) { - const T_out tmp_a = a_offset + static_cast(a[k + i * K]); - for(int j = 0; j < b_width; ++j) + for(int j = 0; j < N; ++j) { - const T_out tmp_b = b_offset + static_cast(b[j + k * b_width]); - const T_out mult_as_int = tmp_a * tmp_b; - acc[j] += mult_as_int; + acc[j] = 0; + } + for(int k = 0; k < K; ++k) + { + const T_out tmp_a = a_offset + static_cast(a[base_addr_a + k + i * K]); + for(int j = 0; j < N; ++j) + { + const T_out tmp_b = b_offset + static_cast(b[base_addr_b + j + k * N]); + const T_out mult_as_int = tmp_a * tmp_b; + acc[j] += mult_as_int; + } + } + for(int j = 0; j < N; ++j) + { + c[base_addr_c + j + i * N] = acc[j]; } - } - for(int j = 0; j < cols; ++j) - { - c[j + i * cols] = acc[j]; } } @@ -141,9 +152,9 @@ SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, c // used to validate assembly kernels which don't know anything about offsets template -SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b) +SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c) { - return gemmlowp_matrix_multiply_core(a, b, 0, 0); + return gemmlowp_matrix_multiply_core(a, b, shape_c, 0, 0); } template @@ -198,10 +209,10 @@ template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const int32_t max); template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &a, const SimpleTensor &b, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max); -template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); -template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); -template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); -template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); +template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c); +template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index a3d0bebe3f..4396155b96 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,13 +38,13 @@ namespace reference template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min = 0, int32_t max = 0); template -SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); +SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift); template -SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); +SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c); template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, const SimpleTensor &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, -- cgit v1.2.1