aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index f525d93e83..162c4b1ace 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -36,6 +36,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include <algorithm>
#include <arm_neon.h>
@@ -603,10 +604,9 @@ public:
out_values = internal_vmlal(out_values, in_values, we_values);
}
- out_val += out_values[0];
- out_val += out_values[1];
- out_val += out_values[2];
- out_val += out_values[3];
+ auto carry_addition = wrapper::vpadd(wrapper::vgethigh(out_values), wrapper::vgetlow(out_values));
+ carry_addition = wrapper::vpadd(carry_addition, carry_addition);
+ out_val += wrapper::vgetlane(carry_addition, 0);
// Leftover
for(; x < input_width; ++x)