COMPMID-3870: Create ActivationLayer SVE/SVE2

Adds support for ActivationLayer for SVE and SVE2. Datatypes supported: *FP32 *FP16 *QASYMM8 *QASYMM8_SIGNED *QSYMM16 Change-Id: Ia3583891795cda4ca2f9fa27c440731a5c27710d Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4566 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2020-11-22 00:49:42 +0000
committer: Michalis Spyrou <michalis.spyrou@arm.com> 2020-12-14 16:02:26 +0000
commit: aa51a5ba9a3f05be08b94859b53c398edee5d2e3 (patch)
tree: b28829b483421b210cd7c8a256c7feafed736b36
parent: 3737c7934da929003bda446291489cf352e43751 (diff)
download: ComputeLibrary-aa51a5ba9a3f05be08b94859b53c398edee5d2e3.tar.gz
24 files changed, 1752 insertions, 21 deletions
diff --git a/Android.bp b/Android.bp
index 404c1d54c8..9f7f447fe3 100644
--- a/Android.bp
+++ b/Android.bp
@@ -343,11 +343,16 @@ cc_library_static {
         "src/core/NEON/kernels/NEWeightsReshapeKernel.cpp",
         "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp",
         "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
-        "src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp",
-        "src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp",
-        "src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp",
-        "src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp",
-        "src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp",
+        "src/core/NEON/kernels/activation/impl/NEON/fp16.cpp",
+        "src/core/NEON/kernels/activation/impl/NEON/fp32.cpp",
+        "src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp",
+        "src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp",
+        "src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp",
+        "src/core/NEON/kernels/activation/impl/SVE/fp16.cpp",
+        "src/core/NEON/kernels/activation/impl/SVE/fp32.cpp",
+        "src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp",
+        "src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp",
+        "src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp",
         "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp",
         "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
         "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
@@ -400,8 +405,8 @@ cc_library_static {
         "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
         "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
         "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp",
-        "src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp",
+        "src/core/NEON/kernels/floor/impl/NEON/fp16.cpp",
+        "src/core/NEON/kernels/floor/impl/NEON/fp32.cpp",
         "src/core/PyramidInfo.cpp",
         "src/core/Rounding.cpp",
         "src/core/Size2D.cpp",
diff --git a/SConscript b/SConscript
index f1fe9b288c..656336d555 100644
--- a/SConscript
+++ b/SConscript
@@ -244,15 +244,15 @@ if env['neon']:
              core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/sve_*/*.cpp')
 
     if any(i in env['data_type_support'] for i in ['all', 'fp16']):
-        core_files += Glob('src/core/NEON/kernels/*/impl/fp16_*.cpp')
+        core_files += Glob('src/core/NEON/kernels/*/impl/*/fp16.cpp')
     if any(i in env['data_type_support'] for i in ['all', 'fp32']):
-        core_files += Glob('src/core/NEON/kernels/*/impl/fp32_*.cpp')
+        core_files += Glob('src/core/NEON/kernels/*/impl/*/fp32.cpp')
     if any(i in env['data_type_support'] for i in ['all', 'qasymm8']):
-        core_files += Glob('src/core/NEON/kernels/*/impl/qasymm8_neon*.cpp')
+        core_files += Glob('src/core/NEON/kernels/*/impl/*/qasymm8.cpp')
     if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']):
-        core_files += Glob('src/core/NEON/kernels/*/impl/qasymm8_signed_*.cpp')
+        core_files += Glob('src/core/NEON/kernels/*/impl/*/qasymm8_signed.cpp')
     if any(i in env['data_type_support'] for i in ['all', 'qsymm16']):
-        core_files += Glob('src/core/NEON/kernels/*/impl/qsymm16_*.cpp')
+        core_files += Glob('src/core/NEON/kernels/*/impl/*/qsymm16.cpp')
 
     runtime_files += Glob('src/runtime/NEON/*.cpp')
     runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
diff --git a/scripts/check_bad_style.sh b/scripts/check_bad_style.sh
index acc496b00a..26524d7ec9 100755
--- a/scripts/check_bad_style.sh
+++ b/scripts/check_bad_style.sh
@@ -37,7 +37,7 @@ then
     exit -1
 fi
 
-grep -HnR --exclude-dir=assembly --exclude-dir=convolution --exclude-dir=arm_gemm "float32_t" $FILES | tee bad_style.log
+grep -HnR --exclude-dir=assembly --exclude-dir=convolution --exclude-dir=arm_gemm "/^float32_t/" $FILES | tee bad_style.log
 if [[ $(cat bad_style.log | wc -l) > 0 ]]
 then
     echo ""
diff --git a/src/core/NEON/SVEAsymm.h b/src/core/NEON/SVEAsymm.h
new file mode 100644
index 0000000000..4b0ecd9eea
--- /dev/null
+++ b/src/core/NEON/SVEAsymm.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_SVEASYMM_H
+#define ARM_COMPUTE_SVEASYMM_H
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+/** Perform a multiply-accumulate on all components of a QASYMM8 vector
+ *
+ * vd*vs + vo
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] vd Input vector value in QASYMM8 format
+ * @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes.
+ * @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes.
+ *
+ * @return A vector in QASYMM8 format, saturated to fit
+ */
+svuint8_t svmla_qasymm8_z(svbool_t pg, svuint8_t vd, svfloat32_t vs, svfloat32_t vo);
+
+/** Perform a multiply-accumulate on all components of a QASYMM8_SIGNED vector
+ *
+ * vd*vs + vo
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] vd Input vector value in QASYMM8_SIGNED format
+ * @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes.
+ * @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes.
+ *
+ * @return A vector in QASYMM8_SIGNED format, saturated to fit
+ */
+svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, svfloat32_t vo);
+
+/** Dequantize following an asymmetric quantization scheme a sve vector.
+ *
+ * @param[in] pg     Predicate value.
+ * @param[in] qv     Input values to be dequantized.
+ * @param[in] scale  Quantization scaling factor.
+ * @param[in] offset Zero quantization offset.
+ *
+ * @return Dequantized values in an sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, float scale, int32_t offset)
+{
+    const auto          voffset = svdup_n_s32(offset);
+    const auto          vscale  = svdup_n_f32(scale);
+    const svfloat32x4_t vdequantized_input =
+    {
+        { {
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale),
+            }
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Dequantize an sve vector
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return Dequantized values in an sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, const UniformQuantizationInfo &qi)
+{
+    return svdequantize_z(pg, qv, qi.scale, qi.offset);
+}
+
+/** Dequantize an sve vector stored as signed asymmetric.
+ *
+ * @param[in] pg     Predicate value.
+ * @param[in] qv     Input values to be dequantized.
+ * @param[in] scale  Quantization scaling factor.
+ * @param[in] offset Zero quantization offset.
+ *
+ * @return Dequantized values in a sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale, int32_t offset)
+{
+    const auto          voffset = svdup_n_s32(offset);
+    const auto          vscale  = svdup_n_f32(scale);
+    const svfloat32x4_t vdequantized_input =
+    {
+        { {
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale),
+            }
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Dequantize an sve vector.
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return Dequantized values in an sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const UniformQuantizationInfo &qi)
+{
+    return svdequantize_z(pg, qv, qi.scale, qi.offset);
+}
+
+/** Dequantize following symmetric quantization scheme on an sve vector.
+ *
+ * @param[in] pg     Predicate value.
+ * @param[in] qv     Input values to be dequantized.
+ * @param[in] vscale Vector containing quantization scaling factors.
+ *
+ * @return Dequantized values in a sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const svfloat32x4_t vscale)
+{
+    const svfloat32x4_t vdequantized_input =
+    {
+        { {
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)),
+            }
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Dequantize following a symmetric quantization scheme an sve vector.
+ *
+ * @param[in] qv    Input values to be dequantized.
+ * @param[in] scale Quantization scaling factor.
+ *
+ * @return Dequantized values in a sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale)
+{
+    const auto          vscale = svdup_n_f32(scale);
+    const svfloat32x4_t vdequantized_input =
+    {
+        { {
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale),
+            }
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Quantize an sve vector holding floating point values.
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return An sve vector holding the quantized values
+ */
+inline svuint8_t svquantize_z(svbool_t pg, const svfloat32x4_t qv, const UniformQuantizationInfo &qi)
+{
+    const float scale     = qi.scale;
+    const int   offset    = qi.offset;
+    const auto  voffset   = svdup_n_f32(offset);
+    const auto  vinvscale = svdup_n_f32(1.f / scale);
+
+    const auto rf_0 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 0), vinvscale));
+    const auto rf_1 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 1), vinvscale));
+    const auto rf_2 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 2), vinvscale));
+    const auto rf_3 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 3), vinvscale));
+
+    const auto pa = svqxtnt_u32(svqxtnb_u32(rf_0), rf_1);
+    const auto pb = svqxtnt_u32(svqxtnb_u32(rf_2), rf_3);
+
+    return svqxtnt_u16(svqxtnb_u16(pa), pb);
+}
+
+/** Signed quantize an sve vector holding floating point values.
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return An sve vector holding the quantized values
+ */
+inline svint8_t svquantize_signed_z(svbool_t pg, const svfloat32x4_t qv, const UniformQuantizationInfo &qi)
+{
+    const float scale     = qi.scale;
+    const int   offset    = qi.offset;
+    const auto  voffset   = svdup_n_f32(offset);
+    const auto  vinvscale = svdup_n_f32(1.f / scale);
+    const auto  rf_0      = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 0), vinvscale));
+    const auto  rf_1      = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 1), vinvscale));
+    const auto  rf_2      = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 2), vinvscale));
+    const auto  rf_3      = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 3), vinvscale));
+
+    const auto pa = svqxtnt_s32(svqxtnb_s32(rf_0), rf_1);
+    const auto pb = svqxtnt_s32(svqxtnb_s32(rf_2), rf_3);
+
+    return svqxtnt_s16(svqxtnb_s16(pa), pb);
+}
+
+/** Quantize to QASYMM16 an sve vector holding 16 floating point values.
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return An sve vector holding the quantized values
+ */
+inline svuint16x2_t svquantize_qasymm16_z(svbool_t pg, const svfloat32x4_t qv, const UniformQuantizationInfo &qi)
+{
+    const float scale     = qi.scale;
+    const int   offset    = qi.offset;
+    const auto  voffset   = svdup_n_f32(offset);
+    const auto  vinvscale = svdup_n_f32(1.f / scale);
+
+    const auto rf_0 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 0), vinvscale));
+    const auto rf_1 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 1), vinvscale));
+    const auto rf_2 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 2), vinvscale));
+    const auto rf_3 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 3), vinvscale));
+
+    const auto pa = svqxtnt_u32(svqxtnb_u32(rf_0), rf_1);
+    const auto pb = svqxtnt_u32(svqxtnb_u32(rf_2), rf_3);
+
+    return svcreate2_u16(pa, pb);
+}
+} // namespace arm_compute
+#include "src/core/NEON/SVEAsymm.inl"
+#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif // ARM_COMPUTE_NEASYMM_H
diff --git a/src/core/NEON/SVEAsymm.inl b/src/core/NEON/SVEAsymm.inl
new file mode 100644
index 0000000000..edf5733c36
--- /dev/null
+++ b/src/core/NEON/SVEAsymm.inl
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+namespace arm_compute
+{
+#if defined(__ARM_FEATURE_SVE2)
+inline svuint8_t svmla_qasymm8_z(svbool_t pg, svuint8_t vd, svfloat32_t vs, svfloat32_t vo)
+{
+    // Convert uint8 vectors to uint16 vectors
+    auto vd_low_u16  = svmovlb_u16(vd);
+    auto vd_high_u16 = svmovlt_u16(vd);
+
+    // Convert uint16 vectors to uint32 vectors
+    auto A_u32 = svmovlb_u32(vd_low_u16);
+    auto B_u32 = svmovlt_u32(vd_low_u16);
+    auto C_u32 = svmovlb_u32(vd_high_u16);
+    auto D_u32 = svmovlt_u32(vd_high_u16);
+
+    // Convert uint32 vectors to float32 vectors
+    auto A_f32 = svcvt_f32_u32_z(pg, A_u32);
+    auto B_f32 = svcvt_f32_u32_z(pg, B_u32);
+    auto C_f32 = svcvt_f32_u32_z(pg, C_u32);
+    auto D_f32 = svcvt_f32_u32_z(pg, D_u32);
+
+    // vd = vd*vs + vo
+    A_f32 = svmla_f32_z(pg, vo, A_f32, vs);
+    B_f32 = svmla_f32_z(pg, vo, B_f32, vs);
+    C_f32 = svmla_f32_z(pg, vo, C_f32, vs);
+    D_f32 = svmla_f32_z(pg, vo, D_f32, vs);
+
+    // Convert float32 vectors to uint32 vectors
+    A_u32 = svcvt_u32_f32_z(pg, A_f32);
+    B_u32 = svcvt_u32_f32_z(pg, B_f32);
+    C_u32 = svcvt_u32_f32_z(pg, C_f32);
+    D_u32 = svcvt_u32_f32_z(pg, D_f32);
+
+    // Convert uint32 vectors to uint16 vectors (with saturation)
+    vd_low_u16  = svqxtnt_u32(svqxtnb_u32(A_u32), B_u32);
+    vd_high_u16 = svqxtnt_u32(svqxtnb_u32(C_u32), D_u32);
+
+    // convert uint16 vectors to uint8 vectors (with saturation)
+    const auto res = svqxtnt_u16(svqxtnb_u16(vd_low_u16), vd_high_u16);
+    return res;
+}
+
+inline svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, svfloat32_t vo)
+{
+    // Convert uint8 vectors to int16 vectors
+    auto vd_low_s16  = svmovlb_s16(vd);
+    auto vd_high_s16 = svmovlt_s16(vd);
+
+    // Convert int16 vectors to int32 vectors
+    auto A_s32 = svmovlb_s32(vd_low_s16);
+    auto B_s32 = svmovlt_s32(vd_low_s16);
+    auto C_s32 = svmovlb_s32(vd_high_s16);
+    auto D_s32 = svmovlt_s32(vd_high_s16);
+
+    // Convert int32 vectors to float32 vectors
+    auto A_f32 = svcvt_f32_s32_z(pg, A_s32);
+    auto B_f32 = svcvt_f32_s32_z(pg, B_s32);
+    auto C_f32 = svcvt_f32_s32_z(pg, C_s32);
+    auto D_f32 = svcvt_f32_s32_z(pg, D_s32);
+
+    // vd = vd*vs + vo
+    A_f32 = svmla_f32_z(pg, vo, A_f32, vs);
+    B_f32 = svmla_f32_z(pg, vo, B_f32, vs);
+    C_f32 = svmla_f32_z(pg, vo, C_f32, vs);
+    D_f32 = svmla_f32_z(pg, vo, D_f32, vs);
+
+    // Convert float32 vectors to int32 vectors
+    A_s32 = svcvt_s32_f32_z(pg, A_f32);
+    B_s32 = svcvt_s32_f32_z(pg, B_f32);
+    C_s32 = svcvt_s32_f32_z(pg, C_f32);
+    D_s32 = svcvt_s32_f32_z(pg, D_f32);
+
+    // Convert uint32 vectors to uint16 vectors (with saturation)
+    vd_low_s16  = svqxtnt_s32(svqxtnb_s32(A_s32), B_s32);
+    vd_high_s16 = svqxtnt_s32(svqxtnb_s32(C_s32), D_s32);
+
+    // convert uint16 vectors to uint8 vectors (with saturation)
+    const auto res = svqxtnt_s16(svqxtnb_s16(vd_low_s16), vd_high_s16);
+    return res;
+}
+#endif /* (__ARM_FEATURE_SVE2) */
+} // namespace arm_compute
diff --git a/src/core/NEON/SVEMath.h b/src/core/NEON/SVEMath.h
new file mode 100644
index 0000000000..bdf2e894e2
--- /dev/null
+++ b/src/core/NEON/SVEMath.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_SVEMATH_H
+#define ARM_COMPUTE_SVEMATH_H
+
+#if defined(__ARM_FEATURE_SVE)
+#include <arm_sve.h>
+#include <array>
+
+namespace arm_compute
+{
+/** Calculate exponent.
+ *
+ * @param[in] pg  Input reciprocal.
+ * @param[in] val Input vector value in F32 format.
+ *
+ * @return The calculated exponent.
+ */
+svfloat32_t svexp_f32_z(svbool_t pg, svfloat32_t val);
+
+/** Calculate reciprocal.
+ *
+ * @param[in] pg Input reciprocal.
+ * @param[in] x  Input value.
+ *
+ * @return The calculated reciprocal.
+ */
+svfloat32_t svinv_f32_z(svbool_t pg, svfloat32_t x);
+
+/** Calculate logarithm
+ *
+ * @param[in] pg Input reciprocal.
+ * @param[in] x  Input vector value in F32 format.
+ *
+ * @return The calculated logarithm.
+ */
+svfloat32_t svlog_f32_z(svbool_t pg, svfloat32_t x);
+
+/** Calculate hyperbolic tangent.
+ *
+ * tanh(x) = (e^2x - 1)/(e^2x + 1)
+ *
+ * @note We clamp x to [-5,5] to avoid overflowing issues.
+ *
+ * @param[in] pg  Input reciprocal.
+ * @param[in] val Input vector value in F32 format.
+ *
+ * @return The calculated Hyperbolic Tangent.
+ */
+svfloat32_t svtanh_f32_z(svbool_t pg, svfloat32_t val);
+
+/** Calculate hyperbolic tangent.
+ *
+ * tanh(x) = (e^2x - 1)/(e^2x + 1)
+ *
+ * @note We clamp x to [-5,5] to avoid overflowing issues.
+ *
+ * @param[in] pg  Input reciprocal.
+ * @param[in] val Input vector value in F16 format.
+ *
+ * @return The calculated Hyperbolic Tangent.
+ */
+svfloat16_t svtanh_f16_z(svbool_t pg, svfloat16_t val);
+
+/** Calculate exponential
+ *
+ * @param[in] pg Input reciprocal.
+ * @param[in] x  Input vector value in F16 format.
+ *
+ * @return The calculated exponent.
+ */
+svfloat16_t svexp_f16_z(svbool_t pg, svfloat16_t x);
+
+/** Calculate reciprocal.
+ *
+ * @param[in] pg Input reciprocal.
+ * @param[in] x  Input value.
+ *
+ * @return The calculated reciprocal.
+ */
+svfloat16_t svinv_f16_z(svbool_t pg, svfloat16_t x);
+
+/** Calculate logarithm
+ *
+ * @param[in] pg Input reciprocal.
+ * @param[in] x  Input vector value in F32 format.
+ *
+ * @return The calculated logarithm.
+ */
+svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x);
+
+} // namespace arm_compute
+#include "src/core/NEON/SVEMath.inl"
+#endif /* defined(__ARM_FEATURE_SVE) */
+#endif /* ARM_COMPUTE_SVEMATH_H */
+\ No newline at end of file
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl
new file mode 100644
index 0000000000..5ebfeaa5c5
--- /dev/null
+++ b/src/core/NEON/SVEMath.inl
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <cmath>
+#include <limits>
+
+#if defined(__ARM_FEATURE_SVE)
+
+namespace arm_compute
+{
+inline svfloat32_t svtaylor_poly_f32_z(svbool_t pg, svfloat32_t x, const std::array<svfloat32_t, 8> &coeffs)
+{
+    const auto A   = svmla_f32_z(pg, coeffs[0], coeffs[4], x);
+    const auto B   = svmla_f32_z(pg, coeffs[2], coeffs[6], x);
+    const auto C   = svmla_f32_z(pg, coeffs[1], coeffs[5], x);
+    const auto D   = svmla_f32_z(pg, coeffs[3], coeffs[7], x);
+    const auto x2  = svmul_f32_z(pg, x, x);
+    const auto x4  = svmul_f32_z(pg, x2, x2);
+    const auto res = svmla_f32_z(pg, svmla_f32_z(pg, A, B, x2), svmla_f32_z(pg, C, D, x2), x4);
+    return res;
+}
+
+inline svfloat16_t svtaylor_poly_f16_z(svbool_t pg, svfloat16_t x, const std::array<svfloat16_t, 8> &coeffs)
+{
+    const auto A   = svmla_f16_z(pg, coeffs[0], coeffs[4], x);
+    const auto B   = svmla_f16_z(pg, coeffs[2], coeffs[6], x);
+    const auto C   = svmla_f16_z(pg, coeffs[1], coeffs[5], x);
+    const auto D   = svmla_f16_z(pg, coeffs[3], coeffs[7], x);
+    const auto x2  = svmul_f16_z(pg, x, x);
+    const auto x4  = svmul_f16_z(pg, x2, x2);
+    const auto res = svmla_f16_z(pg, svmla_f16_z(pg, A, B, x2), svmla_f16_z(pg, C, D, x2), x4);
+    return res;
+}
+
+inline svfloat16_t svinv_f16_z(svbool_t pg, svfloat16_t x)
+{
+    auto recip = svrecpe_f16(x);
+    recip      = svmul_f16_z(pg, svrecps_f16(x, recip), recip);
+    recip      = svmul_f16_z(pg, svrecps_f16(x, recip), recip);
+    return recip;
+}
+
+inline svfloat32_t svinv_f32_z(svbool_t pg, svfloat32_t x)
+{
+    auto recip = svrecpe_f32(x);
+    recip      = svmul_f32_z(pg, svrecps_f32(x, recip), recip);
+    recip      = svmul_f32_z(pg, svrecps_f32(x, recip), recip);
+    return recip;
+}
+
+inline svfloat32_t svexp_f32_z(svbool_t pg, svfloat32_t x)
+{
+    const auto CONST_LN2          = svdup_n_f32(0.6931471805f); // ln(2)
+    const auto CONST_INV_LN2      = svdup_n_f32(1.4426950408f); // 1/ln(2)
+    const auto CONST_INF          = svdup_n_f32(std::numeric_limits<float>::infinity());
+    const auto CONST_MAX_INPUT    = svdup_n_f32(88.7f);
+    const auto CONST_0            = svdup_n_f32(0.f);
+    const auto CONST_NEGATIVE_126 = svdup_n_s32(-126);
+
+    /** Exponent polynomial coefficients */
+    const std::array<svfloat32_t, 8> exp_tab =
+    {
+        {
+            svdup_n_f32(1.f),
+            svdup_n_f32(0.0416598916054f),
+            svdup_n_f32(0.500000596046f),
+            svdup_n_f32(0.0014122662833f),
+            svdup_n_f32(1.00000011921f),
+            svdup_n_f32(0.00833693705499f),
+            svdup_n_f32(0.166665703058f),
+            svdup_n_f32(0.000195780929062f),
+        }
+    };
+
+    // Perform range reduction [-log(2),log(2)]
+    auto m   = svcvt_s32_f32_z(pg, svmul_f32_z(pg, x, CONST_INV_LN2));
+    auto val = svmls_f32_z(pg, x, svcvt_f32_s32_z(pg, m), CONST_LN2);
+
+    // Polynomial Approximation
+    auto poly = svtaylor_poly_f32_z(pg, val, exp_tab);
+
+    // Reconstruct
+    poly = svreinterpret_f32_s32(svqadd_s32(svreinterpret_s32_f32(poly), svlsl_n_s32_z(pg, m, 23)));
+
+    // Handle underflow
+    svbool_t ltpg = svcmplt_s32(pg, m, CONST_NEGATIVE_126);
+    poly          = svsel_f32(ltpg, CONST_0, poly);
+
+    // Handle overflow
+    svbool_t gtpg = svcmpgt_f32(pg, x, CONST_MAX_INPUT);
+    poly          = svsel_f32(gtpg, CONST_INF, poly);
+
+    return poly;
+}
+
+inline svfloat16_t svexp_f16_z(svbool_t pg, svfloat16_t x)
+{
+    const auto CONST_LN2          = svdup_n_f16(0.6931471805f); // ln(2)
+    const auto CONST_INV_LN2      = svdup_n_f16(1.4426950408f); // 1/ln(2)
+    const auto CONST_INF          = svdup_n_f16(std::numeric_limits<float16_t>::infinity());
+    const auto CONST_MAX_INPUT    = svdup_n_f16(88.7f);
+    const auto CONST_0            = svdup_n_f16(0.f);
+    const auto CONST_NEGATIVE_126 = svdup_n_s16(-126);
+
+    /** Exponent polynomial coefficients */
+    const std::array<svfloat16_t, 8> exp_tab =
+    {
+        {
+            svdup_n_f16(1.f),
+            svdup_n_f16(0.0416598916054f),
+            svdup_n_f16(0.500000596046f),
+            svdup_n_f16(0.0014122662833f),
+            svdup_n_f16(1.00000011921f),
+            svdup_n_f16(0.00833693705499f),
+            svdup_n_f16(0.166665703058f),
+            svdup_n_f16(0.000195780929062f),
+        }
+    };
+
+    // Perform range reduction [-log(2),log(2)]
+    auto m   = svcvt_s16_f16_z(pg, svmul_f16_z(pg, x, CONST_INV_LN2));
+    auto val = svmls_f16_z(pg, x, svcvt_f16_s16_z(pg, m), CONST_LN2);
+
+    // Polynomial Approximation
+    auto poly = svtaylor_poly_f16_z(pg, val, exp_tab);
+
+    // Reconstruct
+    poly = svreinterpret_f16_s16(svqadd_s16(svreinterpret_s16_f16(poly), svlsl_n_s16_z(pg, m, 11)));
+
+    // Handle underflow
+    svbool_t ltpg = svcmplt_s16(pg, m, CONST_NEGATIVE_126);
+    poly          = svsel_f16(ltpg, CONST_0, poly);
+
+    // Handle overflow
+    svbool_t gtpg = svcmpgt_f16(pg, x, CONST_MAX_INPUT);
+    poly          = svsel_f16(gtpg, CONST_INF, poly);
+
+    return poly;
+}
+
+inline svfloat32_t svtanh_f32_z(svbool_t pg, svfloat32_t val)
+{
+    const svfloat32_t CONST_1        = svdup_n_f32(1.f);
+    const svfloat32_t CONST_2        = svdup_n_f32(2.f);
+    const svfloat32_t CONST_MIN_TANH = svdup_n_f32(-10.f);
+    const svfloat32_t CONST_MAX_TANH = svdup_n_f32(10.f);
+
+    svfloat32_t x     = svmin_f32_z(pg, svmax_f32_z(pg, val, CONST_MIN_TANH), CONST_MAX_TANH);
+    svfloat32_t exp2x = svexp_f32_z(pg, svmul_f32_z(pg, CONST_2, x));
+    svfloat32_t num   = svsub_f32_z(pg, exp2x, CONST_1);
+    svfloat32_t den   = svadd_f32_z(pg, exp2x, CONST_1);
+    svfloat32_t tanh  = svdiv_f32_z(pg, num, den);
+    return tanh;
+}
+
+inline svfloat16_t svtanh_f16_z(svbool_t pg, svfloat16_t val)
+{
+    const svfloat16_t CONST_1        = svdup_n_f16(1.f);
+    const svfloat16_t CONST_2        = svdup_n_f16(2.f);
+    const svfloat16_t CONST_MIN_TANH = svdup_n_f16(-10.f);
+    const svfloat16_t CONST_MAX_TANH = svdup_n_f16(10.f);
+
+    const svfloat16_t x     = svmin_f16_z(pg, svmax_f16_z(pg, val, CONST_MIN_TANH), CONST_MAX_TANH);
+    const svfloat16_t exp2x = svexp_f16_z(pg, svmul_f16_z(pg, CONST_2, x));
+    const svfloat16_t num   = svsub_f16_z(pg, exp2x, CONST_1);
+    const svfloat16_t den   = svadd_f16_z(pg, exp2x, CONST_1);
+    const svfloat16_t tanh  = svdiv_f16_z(pg, num, den);
+    return tanh;
+}
+
+inline svfloat32_t svlog_f32_z(svbool_t pg, svfloat32_t x)
+{
+#if defined(__ARM_FEATURE_SVE2)
+    return svcvt_f32_s32_z(pg, svlogb_f32_z(pg, x));
+#else  /* !defined(__ARM_FEATURE_SVE2) */
+    /** Logarithm polynomial coefficients */
+    const std::array<svfloat32_t, 8> log_tab =
+    {
+        {
+            svdup_n_f32(-2.29561495781f),
+            svdup_n_f32(-2.47071170807f),
+            svdup_n_f32(-5.68692588806f),
+            svdup_n_f32(-0.165253549814f),
+            svdup_n_f32(5.17591238022f),
+            svdup_n_f32(0.844007015228f),
+            svdup_n_f32(4.58445882797f),
+            svdup_n_f32(0.0141278216615f),
+        }
+    };
+
+    const auto CONST_127 = svdup_n_s32(127);           // 127
+    const auto CONST_LN2 = svdup_n_f32(0.6931471805f); // ln(2)
+
+    // Extract exponent
+    auto m   = svsub_s32_z(pg, svasr_n_s32_z(pg, svreinterpret_s32_f32(x), 23), CONST_127);
+    auto val = svreinterpret_f32_s32(svsub_s32_z(pg, svreinterpret_s32_f32(x), svlsl_n_s32_z(pg, m, 23)));
+
+    // Polynomial Approximation
+    auto poly = svtaylor_poly_f32_z(pg, val, log_tab);
+
+    // Reconstruct
+    poly = svmla_f32_z(pg, poly, svcvt_f32_s32_z(pg, m), CONST_LN2);
+
+    return poly;
+#endif /* defined(__ARM_FEATURE_SVE2) */
+}
+
+inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x)
+{
+#if defined(__ARM_FEATURE_SVE2)
+    return svcvt_f16_s16_z(pg, svlogb_f16_z(pg, x));
+#else  /* !defined(__ARM_FEATURE_SVE2) */
+
+    /** Logarithm polynomial coefficients */
+    const std::array<svfloat16_t, 8> log_tab
+    {
+        {
+            svdup_n_f16(-2.29561495781f),
+            svdup_n_f16(-2.47071170807f),
+            svdup_n_f16(-5.68692588806f),
+            svdup_n_f16(-0.165253549814f),
+            svdup_n_f16(5.17591238022f),
+            svdup_n_f16(0.844007015228f),
+            svdup_n_f16(4.58445882797f),
+            svdup_n_f16(0.0141278216615f),
+        }
+    };
+
+    const auto CONST_7   = svdup_n_s16(7);             // 7
+    const auto CONST_LN2 = svdup_n_f16(0.6931471805f); // ln(2)
+
+    // Extract exponent
+    auto m   = svsub_s16_z(pg, svasr_n_s16_z(pg, svreinterpret_s16_f16(x), 11), CONST_7);
+    auto val = svreinterpret_f16_s16(svsub_s16_z(pg, svreinterpret_s16_f16(x), svlsl_n_s16_z(pg, m, 11)));
+
+    // Polynomial Approximation
+    auto poly = svtaylor_poly_f16_z(pg, val, log_tab);
+
+    // Reconstruct
+    poly = svmla_f16_z(pg, poly, svcvt_f16_s16_z(pg, m), CONST_LN2);
+
+    return poly;
+#endif /* defined(__ARM_FEATURE_SVE2) */
+}
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE) */
diff --git a/src/core/NEON/SVESymm.h b/src/core/NEON/SVESymm.h
new file mode 100644
index 0000000000..30e1e172a3
--- /dev/null
+++ b/src/core/NEON/SVESymm.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_SVESYMM_H
+#define ARM_COMPUTE_SVESYMM_H
+
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+/** Dequantize an sve vector holding 16-bit quantized values.
+ *
+ * @param[in] pg    Predicate value.
+ * @param[in] qv    Input values to be dequantized.
+ * @param[in] scale Quantization scale
+ *
+ * @return Dequantized values in an sve vector
+ */
+inline svfloat32x2_t svdequantize_qsymm16_z(svbool_t pg, const svint16_t &qv, float scale)
+{
+    const auto          vscale = svdup_n_f32(scale);
+    const svfloat32x2_t vdequantized_input =
+    {
+        {   {
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(qv)), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(qv)), vscale)
+            }
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Quantize an sve vector holding 8 floating point values.
+ *
+ * @param[in] pg    Predicate value.
+ * @param[in] qv    Input values to be quantized.
+ * @param[in] scale Quantization scale
+ *
+ * @return An sve vector holding the quantized values
+ */
+inline svint16_t svquantize_qsymm16_z(svbool_t pg, const svfloat32x2_t qv, float scale)
+{
+    const svfloat32_t vinvscale = svdup_n_f32(1.f / scale);
+
+    const auto rf_0 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget2_f32(qv, 0), vinvscale));
+    const auto rf_1 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget2_f32(qv, 1), vinvscale));
+    const auto pa   = svqxtnt_s32(svqxtnb_s32(rf_0), rf_1);
+
+    return pa;
+}
+
+/** Dequantize an sve vector holding 16 16-bit quantized values.
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return Dequantized values in an sve vector
+ */
+inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint16x2_t qv, const UniformQuantizationInfo &qi)
+{
+    const float         scale  = qi.scale;
+    const auto          vscale = svdup_n_f32(scale);
+    const svfloat32x4_t vdequantized_input =
+    {
+        { {
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 0))), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 0))), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 1))), vscale),
+                svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 1))), vscale),
+            }
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Quantize an sve vector holding 16 floating point values.
+ *
+ * @param[in] pg Predicate value.
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return An sve vector holding the quantized values
+ */
+inline svint16x2_t svquantize_qsymm16_z(svbool_t pg, const svfloat32x4_t qv, const UniformQuantizationInfo &qi)
+{
+    const float scale = qi.scale;
+    ARM_COMPUTE_ERROR_ON(scale == 0.f);
+    const auto vinvscale = svdup_n_f32(1.f / scale);
+    const auto rf_0      = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 0), vinvscale));
+    const auto rf_1      = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 1), vinvscale));
+    const auto rf_2      = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 2), vinvscale));
+    const auto rf_3      = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 3), vinvscale));
+
+    const auto pa = svqxtnt_s32(svqxtnb_s32(rf_0), rf_1);
+    const auto pb = svqxtnt_s32(svqxtnb_s32(rf_2), rf_3);
+
+    return svcreate2_s16(pa, pb);
+}
+
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif // ARM_COMPUTE_NESYMM_H
+\ No newline at end of file
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index d969fd8e38..f215787bf6 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -56,6 +56,18 @@ struct ActivationKernel
 
 static const ActivationKernel available_kernels[] =
 {
+#if defined(__ARM_FEATURE_SVE)
+    {
+        "fp16_sve_activation",
+        [](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
+        REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_activation)
+    },
+    {
+        "fp32_sve_activation",
+        [](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
+        REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_activation)
+    },
+#else  /* !defined(__ARM_FEATURE_SVE) */
     {
         "fp16_neon_activation",
         [](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
@@ -66,6 +78,25 @@ static const ActivationKernel available_kernels[] =
         [](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
         REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_activation)
     },
+#endif /* defined(__ARM_FEATURE_SVE)  */
+
+#if defined(__ARM_FEATURE_SVE2) /* defined(__ARM_FEATURE_SVE2) */
+    {
+        "qasymm8_sve_activation",
+        [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
+        REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_activation)
+    },
+    {
+        "qasymm8_signed_sve_activation",
+        [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+        REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_activation)
+    },
+    {
+        "qsymm16_sve_activation",
+        [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
+        REGISTER_QSYMM16_SVE(arm_compute::cpu::qsymm16_sve_activation)
+    },
+#else  /* !defined(__ARM_FEATURE_SVE2) */
     {
         "qasymm8_neon_activation",
         [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
@@ -81,6 +112,7 @@ static const ActivationKernel available_kernels[] =
         [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
         REGISTER_QSYMM16_NEON(arm_compute::cpu::qsymm16_neon_activation)
     },
+#endif /* defined(__ARM_FEATURE_SVE2) */
 };
 
 const ActivationKernel *get_implementation(const ActivationSelectorData &data)
@@ -159,7 +191,6 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
         // Output auto inizialitation if not yet initialized
         auto_init_if_empty(*output, *input->clone());
 
-        // NEActivationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
         Coordinates coord;
         coord.set_num_dimensions(output->num_dimensions());
         output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
diff --git a/src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp
index 58e1cfcf23..58e1cfcf23 100644
--- a/src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp
index 610db05224..610db05224 100644
--- a/src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp
diff --git a/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp
index 7b26441824..7b26441824 100644
--- a/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp
diff --git a/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp
index c616c5e27d..c616c5e27d 100644
--- a/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp
diff --git a/src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp
index 0bef807db9..0bef807db9 100644
--- a/src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp b/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp
new file mode 100644
index 0000000000..8d6f4f2351
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE)
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void fp16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+    const auto                                    window_start_x = static_cast<int>(window.x().start());
+    const auto                                    window_end_x   = static_cast<int>(window.x().end());
+    const ActivationLayerInfo::ActivationFunction act            = act_info.activation();
+
+    Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+    Iterator input(src, win_collapsed);
+    Iterator output(dst, win_collapsed);
+
+    const auto const_1     = svdup_n_f16(1.f);
+    const auto const_0     = svdup_n_f16(0.f);
+    const auto const_6     = svdup_n_f16(6.f);
+    const auto const_3     = svdup_n_f16(3.f);
+    const auto const_inv_6 = svdup_n_f16(0.166666667f);
+
+    const auto va = svdup_n_f16(act_info.a());
+    const auto vb = svdup_n_f16(act_info.b());
+    execute_window_loop(win_collapsed, [&](const Coordinates &)
+    {
+        const auto input_ptr  = reinterpret_cast<const float16_t *>(input.ptr());
+        const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
+
+        svfloat16_t tmp;
+
+        int      x  = window_start_x;
+        svbool_t pg = svwhilelt_b16(x, window_end_x);
+        do
+        {
+            const auto vin = svld1_f16(pg, input_ptr + x);
+            switch(act)
+            {
+                case ActivationLayerInfo::ActivationFunction::ABS:
+                    tmp = svabs_f16_z(pg, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LINEAR:
+                    tmp = svmla_f16_z(pg, vb, va, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+                    tmp = svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, vin))));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::RELU:
+                    tmp = svmax_f16_z(pg, const_0, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+                    tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, const_0, vin));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+                    tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, vb, vin));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+                    tmp = svadd_f16_z(pg, svmul_f16_z(pg, svmin_f16_z(pg, vin, const_0), va), svmax_f16_z(pg, vin, const_0));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+                    tmp = svlog_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, vin)));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::ELU:
+                    tmp = svsel_f16(svcmpgt_f16(pg, vin, const_0), vin, svmul_f16_z(pg, va, svsub_f16_z(pg, svexp_f16_z(pg, vin), const_1)));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::SQRT:
+                    tmp = svsqrt_f16_z(pg, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::SQUARE:
+                    tmp = svmul_f16_z(pg, vin, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::TANH:
+                    tmp = svmul_f16_z(pg, va, svtanh_f16_z(pg, svmul_f16_z(pg, vb, vin)));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::IDENTITY:
+                    tmp = vin;
+                    break;
+                case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+                    tmp = svmul_f16_z(pg, vin, svmul_f16_z(pg, const_inv_6, svmin_f16_z(pg, const_6, svmax_f16_z(pg, const_0, svadd_f16_z(pg, vin, const_3)))));
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+            svst1_f16(pg, output_ptr + x, tmp);
+
+            x += svcnth();
+            pg = svwhilelt_b16(x, window_end_x);
+
+        }
+        while(svptest_any(svptrue_b16(), pg));
+    },
+    input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif // __ARM_FEATURE_SVE
+\ No newline at end of file
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp b/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp
new file mode 100644
index 0000000000..2c276028a0
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/NEON/SVEMath.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE)
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void fp32_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+    const auto                                    window_start_x = static_cast<int>(window.x().start());
+    const auto                                    window_end_x   = static_cast<int>(window.x().end());
+    const ActivationLayerInfo::ActivationFunction act            = act_info.activation();
+
+    Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+    Iterator input(src, win_collapsed);
+    Iterator output(dst, win_collapsed);
+
+    const auto const_1     = svdup_n_f32(1.f);
+    const auto const_0     = svdup_n_f32(0.f);
+    const auto const_6     = svdup_n_f32(6.f);
+    const auto const_3     = svdup_n_f32(3.f);
+    const auto const_inv_6 = svdup_n_f32(0.166666667f);
+
+    const auto va = svdup_n_f32(act_info.a());
+    const auto vb = svdup_n_f32(act_info.b());
+    execute_window_loop(win_collapsed, [&](const Coordinates &)
+    {
+        const auto input_ptr  = reinterpret_cast<const float *>(input.ptr());
+        const auto output_ptr = reinterpret_cast<float *>(output.ptr());
+
+        svfloat32_t tmp;
+
+        // Compute S elements per iteration
+        int      x  = window_start_x;
+        svbool_t pg = svwhilelt_b32(x, window_end_x);
+        do
+        {
+            const auto vin = svld1_f32(pg, input_ptr + x);
+            switch(act)
+            {
+                case ActivationLayerInfo::ActivationFunction::ABS:
+                    tmp = svabs_f32_z(pg, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LINEAR:
+                    tmp = svmla_f32_z(pg, vb, va, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+                    tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin))));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::RELU:
+                    tmp = svmax_f32_z(pg, const_0, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+                    tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+                    tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+                    tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va), svmax_f32_z(pg, vin, const_0));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+                    tmp = svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin)));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::ELU:
+                    tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin, svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1)));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::SQRT:
+                    tmp = svsqrt_f32_z(pg, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::SQUARE:
+                    tmp = svmul_f32_z(pg, vin, vin);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::TANH:
+                    tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin)));
+                    break;
+                case ActivationLayerInfo::ActivationFunction::IDENTITY:
+                    tmp = vin;
+                    break;
+                case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+                    tmp = svmul_f32_z(pg, vin, svmul_f32_z(pg, const_inv_6, svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3)))));
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+            svst1_f32(pg, output_ptr + x, tmp);
+
+            x += svcntw();
+            pg = svwhilelt_b32(x, window_end_x);
+
+        }
+        while(svptest_any(svptrue_b32(), pg));
+    },
+    input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif // __ARM_FEATURE_SVE
+\ No newline at end of file
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp b/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp
new file mode 100644
index 0000000000..a49a562c84
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <arm_neon.h>
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEAsymm.h"
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+    const auto                                    window_start_x = static_cast<int>(window.x().start());
+    const auto                                    window_end_x   = static_cast<int>(window.x().end());
+    const ActivationLayerInfo::ActivationFunction act            = act_info.activation();
+
+    Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+    Iterator input(src, win_collapsed);
+    Iterator output(dst, win_collapsed);
+
+    const UniformQuantizationInfo qi_in           = src->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qi_out          = dst->info()->quantization_info().uniform();
+    const auto                    va              = svdup_n_u8(quantize_qasymm8(act_info.a(), qi_in));
+    const auto                    vb              = svdup_n_u8(quantize_qasymm8(act_info.b(), qi_in));
+    const auto                    const_0         = quantize_qasymm8(0.f, qi_in);
+    const auto                    vconst_0        = svdup_n_u8(const_0);
+    const auto                    vconst_1        = svdup_n_f32(1.f);
+    const auto                    va_f32          = svdup_n_f32(act_info.a());
+    const auto                    vb_f32          = svdup_n_f32(act_info.b());
+    const auto                    const_6_f32     = svdup_n_f32(6.f);
+    const auto                    const_0_f32     = svdup_n_f32(0.f);
+    const auto                    const_3_f32     = svdup_n_f32(3.f);
+    const auto                    const_inv_6_f32 = svdup_n_f32(0.166666667f);
+
+    // Initialise scale/offset for re-quantization
+    bool requant = true;
+    if(qi_in.scale == qi_out.scale && qi_in.offset == qi_out.offset)
+    {
+        requant = false;
+    }
+    float s  = qi_in.scale / qi_out.scale;
+    float o  = -qi_in.offset * s + qi_out.offset;
+    auto  vs = svdup_n_f32(s);
+    auto  vo = svdup_n_f32(o);
+
+    execute_window_loop(win_collapsed, [&](const Coordinates &)
+    {
+        const auto input_ptr  = reinterpret_cast<const uint8_t *>(input.ptr());
+        const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+
+        svuint8_t tmp;
+
+        int      x  = window_start_x;
+        svbool_t pg = svwhilelt_b8(x, window_end_x);
+        do
+        {
+            const auto vin = svld1_u8(pg, input_ptr + x);
+            if(act == ActivationLayerInfo::ActivationFunction::RELU)
+            {
+                // Perform activation
+                tmp = svmax_u8_z(pg, vconst_0, vin);
+                // Re-quantize to new output space
+                tmp = requant ? svmla_qasymm8_z(pg, tmp, vs, vo) : tmp;
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
+            {
+                // Perform activation
+                tmp = svmin_u8_z(pg, va, svmax_u8_z(pg, vconst_0, vin));
+                // Re-quantize to new output space
+                tmp = requant ? svmla_qasymm8_z(pg, tmp, vs, vo) : tmp;
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+            {
+                // Perform activation
+                tmp = svmin_u8_z(pg, va, svmax_u8_z(pg, vb, vin));
+                // Re-quantize to new output space
+                tmp = svmla_qasymm8_z(pg, tmp, vs, vo);
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+            {
+                // De-quantize
+                const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+                // Perform activation
+                const svfloat32x4_t tmp_dep =
+                {
+                    { {
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 0))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 1))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 2))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 3))))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_z(pg, tmp_dep, qi_out);
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+            {
+                // De-quantize
+                const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+                // Perform activation
+                const svfloat32x4_t tmp_dep =
+                {
+                    { {
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 0), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 1), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 2), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 3), vb_f32))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_z(pg, tmp_dep, qi_out);
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
+            {
+                // De-quantize
+                const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+                // Perform activation
+                const svfloat32x4_t tmp_dep =
+                {
+                    { {
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 0), const_3_f32))))),
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))),
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))),
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32))))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_z(pg, tmp_dep, qi_out);
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+
+            svst1_u8(pg, output_ptr + x, tmp);
+
+            x += svcntb();
+            pg = svwhilelt_b8(x, window_end_x);
+
+        }
+        while(svptest_any(svptrue_b8(), pg));
+
+    },
+    input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */
+\ No newline at end of file
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp b/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp
new file mode 100644
index 0000000000..f34bee88fc
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEAsymm.h"
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+    const auto                                    window_start_x = static_cast<int>(window.x().start());
+    const auto                                    window_end_x   = static_cast<int>(window.x().end());
+    const ActivationLayerInfo::ActivationFunction act            = act_info.activation();
+
+    Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+    Iterator input(src, win_collapsed);
+    Iterator output(dst, win_collapsed);
+
+    const UniformQuantizationInfo qi_in           = src->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qi_out          = dst->info()->quantization_info().uniform();
+    const auto                    va              = svdup_n_s8(quantize_qasymm8_signed(act_info.a(), qi_in));
+    const auto                    vb              = svdup_n_s8(quantize_qasymm8_signed(act_info.b(), qi_in));
+    const auto                    const_0         = quantize_qasymm8_signed(0.f, qi_in);
+    const auto                    vconst_0        = svdup_n_s8(const_0);
+    const auto                    vconst_1        = svdup_n_f32(1.f);
+    const auto                    va_f32          = svdup_n_f32(act_info.a());
+    const auto                    vb_f32          = svdup_n_f32(act_info.b());
+    const auto                    const_6_f32     = svdup_n_f32(6.f);
+    const auto                    const_0_f32     = svdup_n_f32(0.f);
+    const auto                    const_3_f32     = svdup_n_f32(3.f);
+    const auto                    const_inv_6_f32 = svdup_n_f32(0.166666667f);
+
+    // Initialise scale/offset for re-quantization
+    bool requant = true;
+    if(qi_in.scale == qi_out.scale && qi_in.offset == qi_out.offset)
+    {
+        requant = false;
+    }
+    float s  = qi_in.scale / qi_out.scale;
+    float o  = -qi_in.offset * s + qi_out.offset;
+    auto  vs = svdup_n_f32(s);
+    auto  vo = svdup_n_f32(o);
+
+    execute_window_loop(win_collapsed, [&](const Coordinates &)
+    {
+        const auto input_ptr  = reinterpret_cast<const int8_t *>(input.ptr());
+        const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+
+        svint8_t tmp;
+
+        int      x  = window_start_x;
+        svbool_t pg = svwhilelt_b8(x, window_end_x);
+        do
+        {
+            const auto vin = svld1_s8(pg, input_ptr + x);
+            if(act == ActivationLayerInfo::ActivationFunction::RELU)
+            {
+                // Perform activation
+                tmp = svmax_s8_z(pg, vconst_0, vin);
+                // Re-quantize to new output space
+                tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
+            {
+                // Perform activation
+                tmp = svmin_s8_z(pg, va, svmax_s8_z(pg, vconst_0, vin));
+                // Re-quantize to new output space
+                tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+            {
+                // Perform activation
+                tmp = svmin_s8_z(pg, va, svmax_s8_z(pg, vb, vin));
+                // Re-quantize to new output space
+                tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+            {
+                // De-quantize
+                const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+                // Perform activation
+                const svfloat32x4_t tmp_dep =
+                {
+                    { {
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 0))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 1))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 2))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 3))))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+            {
+                // De-quantize
+                const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+                // Perform activation
+                const svfloat32x4_t tmp_dep =
+                {
+                    { {
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 0), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 1), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 2), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 3), vb_f32))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
+            {
+                // De-quantize
+                const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+                // Perform activation
+                const svfloat32x4_t tmp_dep =
+                {
+                    { {
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 0), const_3_f32))))),
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))),
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))),
+                            svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32))))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+
+            svst1_s8(pg, output_ptr + x, tmp);
+
+            x += svcntb();
+            pg = svwhilelt_b8(x, window_end_x);
+
+        }
+        while(svptest_any(svptrue_b8(), pg));
+    },
+    input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp b/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp
new file mode 100644
index 0000000000..1432e3bbdf
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEMath.h"
+#include "src/core/NEON/SVESymm.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void qsymm16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+    const auto                                    window_start_x = static_cast<int>(window.x().start());
+    const auto                                    window_end_x   = static_cast<int>(window.x().end());
+    const ActivationLayerInfo::ActivationFunction act            = act_info.activation();
+
+    Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+    Iterator input(src, win_collapsed);
+    Iterator output(dst, win_collapsed);
+
+    const UniformQuantizationInfo qi_in    = src->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qi_out   = dst->info()->quantization_info().uniform();
+    const auto                    vconst_1 = svdup_n_f32(1.f);
+    const auto                    va_f32   = svdup_n_f32(act_info.a());
+    const auto                    vb_f32   = svdup_n_f32(act_info.b());
+
+    execute_window_loop(win_collapsed, [&](const Coordinates &)
+    {
+        const auto input_ptr  = reinterpret_cast<const int16_t *>(input.ptr());
+        const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+
+        svint16_t tmp;
+
+        int      x  = window_start_x;
+        svbool_t pg = svwhilelt_b16(x, window_end_x);
+        do
+        {
+            const auto vin = svld1_s16(pg, input_ptr + x);
+            if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+            {
+                // De-quantize
+                auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+                // Perform activation
+                const svfloat32x2_t tmp_dep =
+                {
+                    { {
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))),
+                            svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1))))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+            }
+            else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+            {
+                // De-quantize
+                auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+                // Perform activation
+                const svfloat32x2_t tmp_dep =
+                {
+                    { {
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))),
+                            svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32))),
+                        }
+                    }
+                };
+                // Re-quantize to new output space
+                tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR("Unsupported activation function");
+            }
+
+            svst1_s16(pg, output_ptr + x, tmp);
+
+            x += svcnth();
+            pg = svwhilelt_b16(x, window_end_x);
+
+        }
+        while(svptest_any(svptrue_b16(), pg));
+    },
+    input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */
diff --git a/src/core/NEON/kernels/activation/impl/list.h b/src/core/NEON/kernels/activation/impl/list.h
index 3b48ee3e22..db6c5b21b8 100644
--- a/src/core/NEON/kernels/activation/impl/list.h
+++ b/src/core/NEON/kernels/activation/impl/list.h
@@ -32,10 +32,15 @@ namespace cpu
     void func_name(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
 
 DECLARE_ACTIVATION_KERNEL(qasymm8_neon_activation);
+DECLARE_ACTIVATION_KERNEL(qasymm8_sve_activation);
 DECLARE_ACTIVATION_KERNEL(qasymm8_signed_neon_activation);
+DECLARE_ACTIVATION_KERNEL(qasymm8_signed_sve_activation);
 DECLARE_ACTIVATION_KERNEL(qsymm16_neon_activation);
+DECLARE_ACTIVATION_KERNEL(qsymm16_sve_activation);
 DECLARE_ACTIVATION_KERNEL(fp16_neon_activation);
+DECLARE_ACTIVATION_KERNEL(fp16_sve_activation);
 DECLARE_ACTIVATION_KERNEL(fp32_neon_activation);
+DECLARE_ACTIVATION_KERNEL(fp32_sve_activation);
 
 #undef DECLARE_ACTIVATION_KERNEL
 } // namespace cpu
diff --git a/src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp b/src/core/NEON/kernels/floor/impl/NEON/fp16.cpp
index 4f56ca9daf..4f56ca9daf 100644
--- a/src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp
+++ b/src/core/NEON/kernels/floor/impl/NEON/fp16.cpp
diff --git a/src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp b/src/core/NEON/kernels/floor/impl/NEON/fp32.cpp
index 3f4b14b3e5..3f4b14b3e5 100644
--- a/src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp
+++ b/src/core/NEON/kernels/floor/impl/NEON/fp32.cpp
diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h
index dcea3e8d38..649fe468a3 100644
--- a/src/core/common/Registrars.h
+++ b/src/core/common/Registrars.h
@@ -24,34 +24,63 @@
 #ifndef SRC_CORE_COMMON_REGISTRARS_H
 #define SRC_CORE_COMMON_REGISTRARS_H
 
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#if defined(ENABLE_FP16_KERNELS)
+
+#if defined(__ARM_FEATURE_SVE)
+#define REGISTER_FP16_SVE(func_name) &(func_name)
+#else /* !defined(__ARM_FEATURE_SVE) */
+#define REGISTER_FP16_SVE(func_name) nullptr
+#endif /* defined(__ARM_FEATURE_SVE) */
+
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
 #define REGISTER_FP16_NEON(func_name) &(func_name)
-#else /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
+#else /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
+#define REGISTER_FP16_NEON(func_name) nullptr
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
+
+#else /* !defined(ENABLE_FP16_KERNELS) */
 #define REGISTER_FP16_NEON(func_name) nullptr
+#define REGISTER_FP16_SVE(func_name) nullptr
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
 
 #if defined(ENABLE_FP32_KERNELS)
+#if defined(__ARM_FEATURE_SVE)
+#define REGISTER_FP32_SVE(func_name) &(func_name)
+#endif /* defined(__ARM_FEATURE_SVE) */
 #define REGISTER_FP32_NEON(func_name) &(func_name)
 #else /* defined(ENABLE_FP32_KERNELS) */
 #define REGISTER_FP32_NEON(func_name) nullptr
+#define REGISTER_FP32_SVE(func_name) nullptr
 #endif /* defined(ENABLE_FP32_KERNELS) */
 
 #if defined(ENABLE_QASYMM8_SIGNED_KERNELS)
+#if defined(__ARM_FEATURE_SVE)
+#define REGISTER_QASYMM8_SIGNED_SVE(func_name) &(func_name)
+#endif /* defined(__ARM_FEATURE_SVE) */
 #define REGISTER_QASYMM8_SIGNED_NEON(func_name) &(func_name)
 #else /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
 #define REGISTER_QASYMM8_SIGNED_NEON(func_name) nullptr
+#define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr
 #endif /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
 
 #if defined(ENABLE_QASYMM8_KERNELS)
+#if defined(__ARM_FEATURE_SVE)
+#define REGISTER_QASYMM8_SVE(func_name) &(func_name)
+#endif /* defined(__ARM_FEATURE_SVE) */
 #define REGISTER_QASYMM8_NEON(func_name) &(func_name)
 #else /* defined(ENABLE_QASYMM8_KERNELS) */
 #define REGISTER_QASYMM8_NEON(func_name) nullptr
+#define REGISTER_QASYMM8_SVE(func_name) nullptr
 #endif /* defined(ENABLE_QASYMM8_KERNELS) */
 
 #if defined(ENABLE_QSYMM16_KERNELS)
+#if defined(__ARM_FEATURE_SVE)
+#define REGISTER_QSYMM16_SVE(func_name) &(func_name)
+#endif /* defined(__ARM_FEATURE_SVE) */
 #define REGISTER_QSYMM16_NEON(func_name) &(func_name)
 #else /* defined(ENABLE_QSYMM16_KERNELS) */
 #define REGISTER_QSYMM16_NEON(func_name) nullptr
+#define REGISTER_QSYMM16_SVE(func_name) nullptr
 #endif /* defined(ENABLE_QSYMM16_KERNELS) */
 
 #endif /* SRC_CORE_COMMON_REGISTRARS_H */
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 84ff288b2f..f8f35f0a8e 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -61,7 +61,6 @@ RelativeTolerance<float> relative_tolerance(DataType data_type, ActivationLayerI
     switch(activation)
     {
         case ActivationLayerInfo::ActivationFunction::LOGISTIC:
-        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
         case ActivationLayerInfo::ActivationFunction::ELU:
         case ActivationLayerInfo::ActivationFunction::SQRT:
         case ActivationLayerInfo::ActivationFunction::TANH:
@@ -69,10 +68,26 @@ RelativeTolerance<float> relative_tolerance(DataType data_type, ActivationLayerI
             switch(data_type)
             {
                 case DataType::F16:
+#if defined(__ARM_FEATURE_SVE)
+                    return RelativeTolerance<float>(0.25f);
+#else  // !defined(__ARM_FEATURE_SVE)
                     return RelativeTolerance<float>(0.1f);
+#endif // defined(__ARM_FEATURE_SVE)
                 default:
                     return RelativeTolerance<float>(0.05f);
             }
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+            switch(data_type)
+            {
+                case DataType::F16:
+#if defined(__ARM_FEATURE_SVE)
+                    return RelativeTolerance<float>(0.9f);
+#else  // !defined(__ARM_FEATURE_SVE)
+                    return RelativeTolerance<float>(0.01f);
+#endif // defined(__ARM_FEATURE_SVE)
+                default:
+                    return RelativeTolerance<float>(0.00001f);
+            }
         default:
             return RelativeTolerance<float>(0.f);
     }
@@ -90,14 +105,29 @@ AbsoluteTolerance<float> absolute_tolerance(DataType data_type, ActivationLayerI
     switch(activation)
     {
         case ActivationLayerInfo::ActivationFunction::LOGISTIC:
-        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
         case ActivationLayerInfo::ActivationFunction::SQRT:
         case ActivationLayerInfo::ActivationFunction::TANH:
         case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
             switch(data_type)
             {
                 case DataType::F16:
+#if defined(__ARM_FEATURE_SVE)
+                    return AbsoluteTolerance<float>(0.25f);
+#else  // !defined(__ARM_FEATURE_SVE)
+                    return AbsoluteTolerance<float>(0.01f);
+#endif // defined(__ARM_FEATURE_SVE)
+                default:
+                    return AbsoluteTolerance<float>(0.00001f);
+            }
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+            switch(data_type)
+            {
+                case DataType::F16:
+#if defined(__ARM_FEATURE_SVE)
+                    return AbsoluteTolerance<float>(0.9f);
+#else  // !defined(__ARM_FEATURE_SVE)
                     return AbsoluteTolerance<float>(0.01f);
+#endif // defined(__ARM_FEATURE_SVE)
                 default:
                     return AbsoluteTolerance<float>(0.00001f);
             }
@@ -107,10 +137,10 @@ AbsoluteTolerance<float> absolute_tolerance(DataType data_type, ActivationLayerI
 }
 
 /** Tolerance for quantized asymmetric operations */
-#if defined(__aarch64__)
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
-#else  // defined(__aarch64__)
+#if(!defined(__aarch64__) || defined(__ARM_FEATURE_SVE2))
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
+#else  // !(!defined(__aarch64__) || defined(__ARM_FEATURE_SVE2))
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
 #endif // defined(__aarch64__)
 
 constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1);
author	Michalis Spyrou <michalis.spyrou@arm.com>	2020-11-22 00:49:42 +0000
committer	Michalis Spyrou <michalis.spyrou@arm.com>	2020-12-14 16:02:26 +0000
commit	aa51a5ba9a3f05be08b94859b53c398edee5d2e3 (patch)
tree	b28829b483421b210cd7c8a256c7feafed736b36
parent	3737c7934da929003bda446291489cf352e43751 (diff)
download	ComputeLibrary-aa51a5ba9a3f05be08b94859b53c398edee5d2e3.tar.gz