aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-04 15:31:25 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-10 17:04:36 +0000
commita26e166829f4d4c48864b1b7243e4e267373d0fd (patch)
tree66cc076a656025c1ccca65e07f86673551ae45ec /src/core/NEON/kernels
parent0cdbda5e51e6ef9e03017231e56ee85ede69bb9a (diff)
downloadComputeLibrary-a26e166829f4d4c48864b1b7243e4e267373d0fd.tar.gz
COMPMID-3203: Fix build failure with GCC 9.2
Rework convolve3x3 to avoid erroneous behavior by the register allocator. Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Ifff2f4ae3a95b894462c7457ffba1f710cce0577 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2839 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp54
2 files changed, 3 insertions, 54 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index 1dd05d2cf1..03b962291d 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -116,8 +116,7 @@ public:
{
if(dilation == Size2D(1U, 1U))
{
- auto vres = detail::convolve_3x3(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, stridex, input_offset);
- detail::store_results<stridex>(p_out, vres);
+ detail::convolve_3x3<false>(in_top, in_mid, in_low, p_out, vw_r0, vw_r1, vw_r2, stridex, input_offset);
}
else
{
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index fe3eb92b1b..7f393d619c 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -321,32 +321,6 @@ public:
}
};
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-template <unsigned int stridex>
-void accumulate_results(float16_t *buffer, const float16x8x2_t &values);
-
-template <>
-void accumulate_results<1>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1q_f16(buffer, vaddq_f16(vld1q_f16(buffer), values.val[0]));
- vst1q_f16(buffer + 8, vaddq_f16(vld1q_f16(buffer + 8), values.val[1]));
-}
-
-template <>
-void accumulate_results<2>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1q_f16(buffer, vaddq_f16(vld1q_f16(buffer), values.val[0]));
-}
-
-template <>
-void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values)
-{
- vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0])));
-}
-
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
template <unsigned int stridex>
float32x4x2_t convolve_5x5(const float *in_0, const float *in_1, const float *in_2, const float *in_3, const float *in_4,
const float *m0, const float *m1, const float *m2, const float *m3, const float *m4);
@@ -498,28 +472,6 @@ inline float32x4x2_t convolve_5x5<3>(const float *in_0, const float *in_1, const
return out;
}
-template <unsigned int stridex>
-void accumulate_results(float *buffer, const float32x4x2_t &values);
-
-template <>
-void accumulate_results<1>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, vaddq_f32(vld1q_f32(buffer), values.val[0]));
- vst1q_f32(buffer + 4, vaddq_f32(vld1q_f32(buffer + 4), values.val[1]));
-}
-
-template <>
-void accumulate_results<2>(float *buffer, const float32x4x2_t &values)
-{
- vst1q_f32(buffer, vaddq_f32(vld1q_f32(buffer), values.val[0]));
-}
-
-template <>
-void accumulate_results<3>(float *buffer, const float32x4x2_t &values)
-{
- vst1_f32(buffer, vadd_f32(vld1_f32(buffer), vget_low_f32(values.val[0])));
-}
-
template <typename T1>
class convolver_nhwc
{
@@ -718,8 +670,7 @@ public:
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_3x3(in_top, in_mid, in_low, vk_r0, vk_r1, vk_r2, stridex);
- store_results<stridex>(p_out, vres);
+ convolve_3x3<false>(in_top, in_mid, in_low, p_out, vk_r0, vk_r1, vk_r2, stridex);
}
}
}
@@ -743,8 +694,7 @@ public:
for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration)
{
- auto vres = convolve_3x3(in_top, in_mid, in_low, vk_r0, vk_r1, vk_r2, stridex);
- accumulate_results<stridex>(p_out, vres);
+ convolve_3x3<true>(in_top, in_mid, in_low, p_out, vk_r0, vk_r1, vk_r2, stridex);
}
}
}