From f3dfa279d536906dac3e618244b2c1d33e5ff28a Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 21 Nov 2017 17:52:12 +0000 Subject: COMPMID-632 Assembly: Integrate gemmlowp assembly version Integrate generic gemmlowp assembly version for u8. Change-Id: I17ed4494c25a132b2bac581febe1544e49b4f352 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110114 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Pablo Tello --- tests/validation/CPP/GEMMLowp.cpp | 28 +++++++++++++++++----------- tests/validation/CPP/GEMMLowp.h | 11 +++++++---- 2 files changed, 24 insertions(+), 15 deletions(-) (limited to 'tests/validation/CPP') diff --git a/tests/validation/CPP/GEMMLowp.cpp b/tests/validation/CPP/GEMMLowp.cpp index bf002cf2b5..35b8a6486e 100644 --- a/tests/validation/CPP/GEMMLowp.cpp +++ b/tests/validation/CPP/GEMMLowp.cpp @@ -63,19 +63,21 @@ void quantize_down_int32_to_uint8_scale(const SimpleTensor *in, const SimpleT } } // namespace -template -SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset) +template +SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset) { - TensorShape shape(b.shape()[0], a.shape()[1]); + static_assert(std::is_same::type, int32_t>::value, "Only int32_t is allowed for the output"); - SimpleTensor c(shape, DataType::S32); + TensorShape shape(b.shape()[0], a.shape()[1]); + DataType dt = std::is_same::value ? DataType::S32 : DataType::U32; + SimpleTensor c(shape, dt); const int K = a.shape().x(); const int b_width = b.shape().x(); const int rows = c.shape().y(); //M const int cols = c.shape().x(); //N - std::vector acc; + std::vector acc; acc.resize(cols); for(int i = 0; i < rows; ++i) @@ -86,11 +88,11 @@ SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, co } for(int k = 0; k < K; ++k) { - const int32_t tmp_a = a_offset + static_cast(a[k + i * K]); + const T_out tmp_a = a_offset + static_cast(a[k + i * K]); for(int j = 0; j < b_width; ++j) { - const int32_t tmp_b = b_offset + static_cast(b[j + k * b_width]); - const int32_t mult_as_int = tmp_a * tmp_b; + const T_out tmp_b = b_offset + static_cast(b[j + k * b_width]); + const T_out mult_as_int = tmp_a * tmp_b; acc[j] += mult_as_int; } } @@ -104,9 +106,10 @@ SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, co } // used to validate assembly kernels which don't know anything about offsets -SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b) +template +SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b) { - return gemmlowp_matrix_multiply_core(a, b, 0, 0); + return gemmlowp_matrix_multiply_core(a, b, 0, 0); } template @@ -130,11 +133,14 @@ SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTe return dst; } -template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max); template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &a, const SimpleTensor &b, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max); +template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); +template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); +template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); +template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/CPP/GEMMLowp.h b/tests/validation/CPP/GEMMLowp.h index ee33d8e0c0..6c72b56e7a 100644 --- a/tests/validation/CPP/GEMMLowp.h +++ b/tests/validation/CPP/GEMMLowp.h @@ -35,13 +35,16 @@ namespace validation { namespace reference { -SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); - template -SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); +SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min = 0, int32_t max = 0); +template +SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, int32_t a_offset, int32_t b_offset); template -SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min = 0, int32_t max = 0); +SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift); + +template +SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b); template SimpleTensor gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor &in, const SimpleTensor &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, -- cgit v1.2.1