2 files changed, 27 insertions, 46 deletions
diff --git a/tests/validation/CPP/GEMMLowp.cpp b/tests/validation/CPP/GEMMLowp.cpp
index e1d76503cd..bac3a20c8e 100644
--- a/tests/validation/CPP/GEMMLowp.cpp
+++ b/tests/validation/CPP/GEMMLowp.cpp
@@ -21,10 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "GEMM.h"
+#include "GEMMLowp.h"
 
 #include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
 
 namespace arm_compute
 {
@@ -34,17 +33,21 @@ namespace validation
 {
 namespace reference
 {
-SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, SimpleTensor<int32_t> &c)
+template <typename T>
+SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<T> &a, const SimpleTensor<T> &b, int32_t a_offset, int32_t b_offset)
 {
-    ARM_COMPUTE_UNUSED(a);
-    ARM_COMPUTE_UNUSED(b);
-    ARM_COMPUTE_UNUSED(c);
-    const int            K       = a.shape().x();
-    const int            b_width = b.shape().x();
-    const int            rows    = c.shape().y(); //M
-    const int            cols    = c.shape().x(); //N
+    TensorShape shape(b.shape()[0], a.shape()[1]);
+
+    SimpleTensor<int32_t> c(shape, DataType::S32);
+
+    const int K       = a.shape().x();
+    const int b_width = b.shape().x();
+    const int rows    = c.shape().y(); //M
+    const int cols    = c.shape().x(); //N
+
     std::vector<int32_t> acc;
     acc.resize(cols);
+
     for(int i = 0; i < rows; ++i)
     {
         for(int j = 0; j < cols; ++j)
@@ -53,10 +56,10 @@ SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor
         }
         for(int k = 0; k < K; ++k)
         {
-            auto tmp_a = static_cast<int32_t>(a[k + i * K]);
+            const int32_t tmp_a = a_offset + static_cast<int32_t>(a[k + i * K]);
             for(int j = 0; j < b_width; ++j)
             {
-                auto          tmp_b       = static_cast<int32_t>(b[j + k * b_width]);
+                const int32_t tmp_b       = b_offset + static_cast<int32_t>(b[j + k * b_width]);
                 const int32_t mult_as_int = tmp_a * tmp_b;
                 acc[j] += mult_as_int;
             }
@@ -71,43 +74,21 @@ SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor
 }
 
 template <typename T>
-SimpleTensor<T> gemmlowp(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &c,
-                         int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift)
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift)
 {
-    const int            K       = a.shape().x();
-    const int            b_width = b.shape().x();
-    const int            rows    = c.shape().y(); //M
-    const int            cols    = c.shape().x(); //N
-    std::vector<int32_t> acc;
-    acc.resize(cols);
-    for(int i = 0; i < rows; ++i)
+    SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
+
+    for(int i = 0; i < in.num_elements(); ++i)
     {
-        for(int j = 0; j < cols; ++j)
-        {
-            acc[j] = 0;
-        }
-        for(int k = 0; k < K; ++k)
-        {
-            const int32_t tmp_a = a_offset + static_cast<int32_t>(a[k + i * K]);
-            for(int j = 0; j < b_width; ++j)
-            {
-                const int32_t tmp_b       = b_offset + static_cast<int32_t>(b[j + k * b_width]);
-                const int32_t mult_as_int = tmp_a * tmp_b;
-                acc[j] += mult_as_int;
-            }
-        }
-        for(int j = 0; j < cols; ++j)
-        {
-            const int32_t result = ((c_offset + acc[j]) * c_mult_int) >> out_shift;
-            c[j + i * cols]      = static_cast<int8_t>(std::min(127, std::max(-128, result)));
-        }
+        const int32_t result = ((in[i] + result_offset) * result_mult_int) >> result_shift;
+        dst[i]               = static_cast<uint8_t>(std::max(0, std::min(255, result)));
     }
 
-    return c;
+    return dst;
 }
 
-template SimpleTensor<int8_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, SimpleTensor<int8_t> &c,
-                                       int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift);
+template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, int32_t a_offset, int32_t b_offset);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/GEMMLowp.h b/tests/validation/CPP/GEMMLowp.h
index 2f903f2fe2..c09d8f6176 100644
--- a/tests/validation/CPP/GEMMLowp.h
+++ b/tests/validation/CPP/GEMMLowp.h
@@ -35,11 +35,11 @@ namespace validation
 {
 namespace reference
 {
-SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, SimpleTensor<int32_t> &c);
+template <typename T>
+SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<T> &a, const SimpleTensor<T> &b, int32_t a_offset, int32_t b_offset);
 
 template <typename T>
-SimpleTensor<T> gemmlowp(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &c,
-                         int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift);
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift);
 } // namespace reference
 } // namespace validation
 } // namespace test