aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-01-30 14:47:39 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:42 +0000
commitf525eab6412053ccd8321af3e6cc2fbcb60ac65e (patch)
treedee9d1cc6665b35b63eb7efb0ae4ae6f9b6a8a15 /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parent6259e5f9204abf31b811b1d002f68ce6504197bd (diff)
downloadComputeLibrary-f525eab6412053ccd8321af3e6cc2fbcb60ac65e.tar.gz
COMPMID-871: Remove vst4q/vld4q from NEActivationLayer.
Change-Id: Iebd2a8fece1af87c93d6795e176d8c37ca64bbf6 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118187 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp17
1 files changed, 14 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 37d694d1b4..a6dbfe6551 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -361,8 +361,16 @@ typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationL
const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
const auto output_ptr = reinterpret_cast<float *>(output.ptr());
- const float32x4x4_t in = vld4q_f32(input_ptr);
- float32x4x4_t tmp = { {} };
+ const float32x4x4_t in =
+ {
+ {
+ vld1q_f32(input_ptr),
+ vld1q_f32(input_ptr + 4),
+ vld1q_f32(input_ptr + 8),
+ vld1q_f32(input_ptr + 12)
+ }
+ };
+ float32x4x4_t tmp = { {} };
switch(F)
{
@@ -491,7 +499,10 @@ typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationL
break;
}
- vst4q_f32(output_ptr, tmp);
+ vst1q_f32(output_ptr, tmp.val[0]);
+ vst1q_f32(output_ptr + 4, tmp.val[1]);
+ vst1q_f32(output_ptr + 8, tmp.val[2]);
+ vst1q_f32(output_ptr + 12, tmp.val[3]);
},
input, output);
}