aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/merges
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-07-02 20:02:20 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-07-06 16:51:32 +0000
commit5aa1a0b7ca5eed010e4b297a95b1c4851f741328 (patch)
treeba882de9e86589dfdd33937d538a89bbdf01c40e /src/core/NEON/kernels/arm_gemm/merges
parent42550c039105597ff6acd4e5efc0ee3c7c20b08e (diff)
downloadComputeLibrary-5aa1a0b7ca5eed010e4b297a95b1c4851f741328.tar.gz
COMPID-3324: Clean GEMM kernels
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I170de1671e061a78740caee31fb4a1b8642c1369 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3505 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/merges')
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp16_24x8.hpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp32_12x8.hpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_12x8.hpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_4x4.hpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_12x8.hpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_4x4.hpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/list.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp13
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp13
11 files changed, 31 insertions, 81 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp16_24x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp16_24x8.hpp
index 5d8eae4866..a81d4504ae 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp16_24x8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp16_24x8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -1130,11 +1130,7 @@ void MergeResults<24, 8, false>(__fp16 *out, const __fp16 *in, const int ldout,
}
else
{
- const __fp16 *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const __fp16 *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp32_12x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp32_12x8.hpp
index 088353e5f3..284f2dc1a0 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp32_12x8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_fp32_12x8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,8 +30,8 @@ void MergeResults<12, 8, false>(float *out, const float *in, const int ldout, co
{
const float *inptr = in;
float nullbias[12];
- float minval = - std::numeric_limits<float>::infinity();
- float maxval = std::numeric_limits<float>::infinity();
+ float minval = - static_cast<float>(std::numeric_limits<float>::infinity());
+ float maxval = static_cast<float>(std::numeric_limits<float>::infinity());
switch(act.type)
{
@@ -1106,11 +1106,7 @@ void MergeResults<12, 8, false>(float *out, const float *in, const int ldout, co
}
else
{
- const float *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const float *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_12x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_12x8.hpp
index 2e45d8b5d1..fcf08e4e15 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_12x8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_12x8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,10 +26,8 @@
#ifdef __aarch64__
template<>
-void MergeResults<12, 8, false>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation act, bool append)
+void MergeResults<12, 8, false>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append)
{
- UNUSED(act);
-
const int32_t *inptr = in;
int32_t nullbias[12];
@@ -862,11 +860,7 @@ void MergeResults<12, 8, false>(int32_t *out, const int32_t *in, const int ldout
}
else
{
- const int32_t *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const int32_t *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_4x4.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_4x4.hpp
index 6d869af803..88eaa5f07c 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_4x4.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_s32_4x4.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,10 +26,8 @@
#ifdef __aarch64__
template<>
-void MergeResults<4, 4, false>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation act, bool append)
+void MergeResults<4, 4, false>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append)
{
- UNUSED(act);
-
const int32_t *inptr = in;
int32_t nullbias[4];
@@ -240,11 +238,7 @@ void MergeResults<4, 4, false>(int32_t *out, const int32_t *in, const int ldout,
}
else
{
- const int32_t *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const int32_t *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_12x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_12x8.hpp
index 0a05944102..adc02f19eb 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_12x8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_12x8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,10 +26,8 @@
#ifdef __aarch64__
template<>
-void MergeResults<12, 8, false>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation act, bool append)
+void MergeResults<12, 8, false>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append)
{
- UNUSED(act);
-
const uint32_t *inptr = in;
uint32_t nullbias[12];
@@ -862,11 +860,7 @@ void MergeResults<12, 8, false>(uint32_t *out, const uint32_t *in, const int ldo
}
else
{
- const uint32_t *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const uint32_t *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_4x4.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_4x4.hpp
index efb17dc9e9..32e1eebaa4 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_4x4.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_u32_4x4.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,10 +26,8 @@
#ifdef __aarch64__
template<>
-void MergeResults<4, 4, false>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation act, bool append)
+void MergeResults<4, 4, false>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append)
{
- UNUSED(act);
-
const uint32_t *inptr = in;
uint32_t nullbias[4];
@@ -240,11 +238,7 @@ void MergeResults<4, 4, false>(uint32_t *out, const uint32_t *in, const int ldou
}
else
{
- const uint32_t *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const uint32_t *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/list.hpp b/src/core/NEON/kernels/arm_gemm/merges/list.hpp
index 4edb497967..825c2fd020 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/list.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/list.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
index a44ef55a86..cf1d10329b 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -1010,11 +1010,7 @@ void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, co
}
else
{
- const __fp16 *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const __fp16 *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
index bb073e4868..b0d10c085d 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -1010,11 +1010,7 @@ void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, cons
}
else
{
- const float *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const float *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
index d4c5073f8d..34b6fe3ef5 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,13 +26,12 @@
#ifdef __ARM_FEATURE_SVE
template<>
-void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation act, bool append)
+void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append)
{
- UNUSED(act);
-
const int32_t *inptr = in;
int32_t nullbias[192];
+
if (!append && !bias)
{
memset(nullbias, 0, (3 * get_vector_length<int32_t>() * sizeof(int32_t)));
@@ -765,11 +764,7 @@ void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout,
}
else
{
- const int32_t *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const int32_t *biasptr = bias ? bias + i : nullbias;
switch(height)
{
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
index f2a28fa004..c4b2bb56d6 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,13 +26,12 @@
#ifdef __ARM_FEATURE_SVE
template<>
-void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation act, bool append)
+void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append)
{
- UNUSED(act);
-
const uint32_t *inptr = in;
uint32_t nullbias[192];
+
if (!append && !bias)
{
memset(nullbias, 0, (3 * get_vector_length<uint32_t>() * sizeof(uint32_t)));
@@ -765,11 +764,7 @@ void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout
}
else
{
- const uint32_t *biasptr = nullbias;
- if (bias)
- {
- biasptr = bias + i;
- }
+ const uint32_t *biasptr = bias ? bias + i : nullbias;
switch(height)
{