160 files changed, 9039 insertions, 1113 deletions
diff --git a/tests/SConscript b/tests/SConscript
index c7be105b1e..9f8bb54dec 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -117,9 +117,6 @@ files_validation += Glob('validation/UNIT/*.cpp')
 filter_pattern = test_env['test_filter']
 files_validation += Glob('validation/CPP/' + filter_pattern)
 
-# Add wrapper tests
-files_validation += Glob('validation/runtime/*/' + filter_pattern)
-
 if env['opencl']:
     if env['experimental_dynamic_fusion']:
         files_validation += Glob('validation/dynamic_fusion/gpu/' + filter_pattern)
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index 17e03368ac..d3cf4ab13e 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -157,6 +157,20 @@ public:
     }
 };
 
+/** Dataset containing large depthwise convolution shapes that the optimised depthwise kernels can handle. */
+class LargeCpuDepthwiseConv2dDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeCpuDepthwiseConv2dDataset()
+    {
+        add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 1));
+        // Asymmetric padding
+        add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
+        // Unlike LargeDepthwiseConvolutionLayerDataset, this does not have padding greater than kernel size.
+    }
+};
+
 class LargeDepthwiseConvolutionLayerDatasetFp16Subset final : public DepthwiseConvolutionLayerDataset
 {
 public:
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index c1e61444a8..d987f4f60b 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SHAPE_DATASETS_H
-#define ARM_COMPUTE_TEST_SHAPE_DATASETS_H
+#ifndef ACL_TESTS_DATASETS_SHAPEDATASETS_H
+#define ACL_TESTS_DATASETS_SHAPEDATASETS_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -199,8 +199,8 @@ public:
         : ShapeDataset("Shape",
     {
         // Batch size 1
-        TensorShape{ 3U, 11U },
-                     TensorShape{ 1U, 16U },
+        //TensorShape{ 3U, 11U },
+                     //TensorShape{ 1U, 16U },
                      TensorShape{ 27U, 13U, 7U },
                      TensorShape{ 7U, 7U, 17U, 2U },
                      // Batch size 4 and 2 SIMD iterations
@@ -212,6 +212,44 @@ public:
     }
 };
 
+class SMEMulShapes final : public ShapeDataset
+{
+public:
+    SMEMulShapes()
+        : ShapeDataset("Shape",
+    {
+        // Batch size 1
+        TensorShape{ 6U, 1U },
+        TensorShape{ 128U, 2U},
+        TensorShape{ 128U, 2U, 4U, 2U, 2U} // 5D collapsible case
+    })
+    {
+    }
+};
+
+/** Data set containing pairs of small tensor shapes that are broadcast compatible. */
+class SMEMulShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
+{
+public:
+    SMEMulShapesBroadcast()
+        : ZipDataset<ShapeDataset, ShapeDataset>(
+              ShapeDataset("Shape0",
+    {
+        // NOTE: This does not include x-wise broadcasting.
+        TensorShape{ 9U, 9U },
+        TensorShape{ 256U, 13U, 1U },
+        TensorShape{ 128U, 1U, 5U, 1U },
+    }),
+    ShapeDataset("Shape1",
+    {
+        TensorShape{ 9U, 1U, 2U },
+        TensorShape{ 256U, 1U, 2U },
+        TensorShape{ 128U, 64U, 1U, 3U },
+    }))
+    {
+    }
+};
+
 /** Data set containing small tensor shapes. */
 class SmallShapesNoBatches final : public ShapeDataset
 {
@@ -618,6 +656,21 @@ public:
     }
 };
 
+/** Data set containing tiny 5D tensor shapes. */
+class Tiny5dShapes final : public ShapeDataset
+{
+public:
+    Tiny5dShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 2U, 2U, 3U, 4U, 2U },
+                     TensorShape{ 2U, 3U, 4U, 6U, 2U },
+                     TensorShape{ 3U, 2U, 6U, 1U, 2U },
+    })
+    {
+    }
+};
+
 /** Data set containing small 5D tensor shapes. */
 class Small5dShapes final : public ShapeDataset
 {
@@ -1194,7 +1247,34 @@ public:
     }
 };
 
+/** Data set containing large 2D tensor shapes. */
+class Large2DMeanStdDevNormalizationShapes final : public ShapeDataset
+{
+public:
+    Large2DMeanStdDevNormalizationShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 1245U, 652U },
+        TensorShape{ 1048576U, 32U }
+    })
+    {
+    }
+};
+
+/** Data set containing SME Stressing shapes for Logistic SME Kernel. */
+class LogisticSMEStressShapesFp32 final : public ShapeDataset
+{
+public:
+    LogisticSMEStressShapesFp32()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 130U, 2U },
+        TensorShape{ 256U, 1U },
+    })
+    {
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SHAPE_DATASETS_H */
+#endif // ACL_TESTS_DATASETS_SHAPEDATASETS_H
diff --git a/tests/framework/Macros.h b/tests/framework/Macros.h
index 5ce0842864..09e01b0b0c 100644
--- a/tests/framework/Macros.h
+++ b/tests/framework/Macros.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_FRAMEWORK_MACROS
-#define ARM_COMPUTE_TEST_FRAMEWORK_MACROS
+#ifndef ACL_TESTS_FRAMEWORK_MACROS_H
+#define ACL_TESTS_FRAMEWORK_MACROS_H
 
 #include "Framework.h"
 #include "Registrars.h"
@@ -133,12 +133,16 @@
     void do_setup() override                                    \
     {                                                           \
         framework::Framework::get().set_new_fixture_call(true); \
-        apply(this, &FIXTURE::setup, _data);             \
-        configure_target();                                     \
-        if(!framework::Framework::get().configure_only())       \
+        apply(this, &FIXTURE::setup, _data);                    \
+                                                                \
+        if(!_skip_test)                                         \
         {                                                       \
-            allocate_and_run_target();                          \
-            compute_reference();                                \
+            configure_target();                                 \
+            if(!framework::Framework::get().configure_only())   \
+            {                                                   \
+                allocate_and_run_target();                      \
+                compute_reference();                            \
+            }                                                   \
         }                                                       \
     }
 #define FIXTURE_RUN(FIXTURE) \
@@ -324,4 +328,4 @@
 //
 // TEST CASE MACROS END
 //
-#endif /* ARM_COMPUTE_TEST_FRAMEWORK_MACROS */
+#endif // ACL_TESTS_FRAMEWORK_MACROS_H
diff --git a/tests/validation/CL/CropResize.cpp b/tests/validation/CL/CropResize.cpp
index f1fae3d5cc..b361cfdd91 100644
--- a/tests/validation/CL/CropResize.cpp
+++ b/tests/validation/CL/CropResize.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -91,7 +91,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
 // *INDENT-ON*
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        CLCropResizeFixture<half>,
@@ -104,7 +103,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/CL/LSTMLayer.cpp b/tests/validation/CL/LSTMLayer.cpp
index a550613b0c..02aef1f2af 100644
--- a/tests/validation/CL/LSTMLayer.cpp
+++ b/tests/validation/CL/LSTMLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,95 +43,106 @@ RelativeTolerance<float> tolerance_f32(0.001f);
 RelativeTolerance<half>  tolerance_f16(half(0.1));
 } // namespace
 
+using framework::dataset::make;
+
 TEST_SUITE(CL)
 TEST_SUITE(LSTMLayer)
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 2U), 1, DataType::U8),      // Wrong data type
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong input weights size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong recurrent weights size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell bias size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell state size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong output size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong scratch size
-               }),
-               framework::dataset::make("InputWeightsInfo", { TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("CellBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(30U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("ProjectionBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("CellStateInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-               })),
-               framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-               })),
-               framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(12U, 2U), 1, DataType::F32),
-               })),
-               framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-               })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false })),
-               input_info, input_weights_info, recurrent_weights_info, cell_bias_info, projection_bias_info, cell_state_info, output_info, scratch_info, info, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputInfo", {
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::U8),      // Wrong data type
+        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong input weights size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong recurrent weights size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell bias size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell state size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong output size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong scratch size
+    }),
+    make("InputWeightsInfo", {
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+    }),
+    make("RecurrentWeightsInfo", {
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+    }),
+    make("CellBiasInfo", {
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(30U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+    }),
+    make("ProjectionBiasInfo", {
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+    }),
+    make("CellStateInfo", {
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(11U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+    }),
+    make("OutputInfo", {
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(11U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+    }),
+    make("ScratchInfo", {
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(12U, 2U), 1, DataType::F32),
+    }),
+    make("ActivationInfo", {
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    }),
+    make("Expected", { false, false, false, false, false, false, false, false })),
+    input_info, input_weights_info, recurrent_weights_info, cell_bias_info, projection_bias_info, cell_state_info, output_info, scratch_info, info, expected)
 {
     LSTMParams<ITensorInfo> lstm_params_info;
     auto cell_bias_clone = cell_bias_info.clone();
@@ -154,11 +165,14 @@ template <typename T>
 using CLLSTMLayerFixture = LSTMLayerValidationFixture<CLTensor, CLAccessor, CLLSTMLayer, LSTMParams<ICLTensor>, T>;
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)),
-                                                                                                                 framework::dataset::make("ProjectionOpt", { true, false })),
-                                                                                                         framework::dataset::make("PeepholeOpt", { true, false })),
-                                                                                                 framework::dataset::make("UseLayerNorm", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture<float>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SmallLSTMLayerDataset(),
+        make("DataType", DataType::F32),
+        make("ProjectionOpt", { true, false }),
+        make("PeepholeOpt", { true, false }),
+        make("UseLayerNorm", { true, false }),
+        make("UseMemoryManager", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -167,11 +181,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture<float>, framework::DatasetMo
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType",
-                                                                                                                        DataType::F16)),
-                                                                                                                framework::dataset::make("ProjectionOpt", { true, false })),
-                                                                                                        framework::dataset::make("PeepholeOpt", { true, false })),
-                                                                                                framework::dataset::make("UseLayerNorm", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLLSTMLayerFixture<half>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SmallLSTMLayerDataset(),
+        make("DataType", DataType::F16),
+        make("ProjectionOpt", { true, false }),
+        make("PeepholeOpt", { true, false }),
+        make("UseLayerNorm", { true, false }),
+        make("UseMemoryManager", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
index cdeb622130..6f9dd2e2d6 100644
--- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,7 @@ namespace
 {
 /** Tolerance for float operations */
 RelativeTolerance<half>  tolerance_f16(half(0.2f));
-RelativeTolerance<float> tolerance_f32(1e-8f);
+RelativeTolerance<float> tolerance_f32(0.001f);
 } // namespace
 
 TEST_SUITE(CL)
@@ -83,7 +83,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture<half>, fr
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                        framework::dataset::make("InPlace", { false, true })),
                                                                                                                        framework::dataset::make("Epsilon", { 1e-8 })))
@@ -102,7 +102,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture<float>, f
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(),
                                                                                                                         framework::dataset::make("DataType", DataType::F32)),
                                                                                                                         framework::dataset::make("InPlace", { false, true })),
                                                                                                                         framework::dataset::make("Epsilon", { 1e-8 })))
diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp
index b1531eb64a..99464c7d92 100644
--- a/tests/validation/CL/ScatterLayer.cpp
+++ b/tests/validation/CL/ScatterLayer.cpp
@@ -31,6 +31,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt
index 174dfa88d2..c2b526817e 100644
--- a/tests/validation/CMakeLists.txt
+++ b/tests/validation/CMakeLists.txt
@@ -143,6 +143,16 @@ if(ENABLE_NEON)
             NEON/UNIT/TensorAllocator.cpp
             NEON/UNIT/MemoryManager.cpp
             NEON/UNIT/RuntimeContext.cpp
+            runtime/experimental/low_level/CpuGemmAssemblyDispatch.cpp
+            runtime/experimental/operators/CpuActivation.cpp
+            runtime/experimental/operators/CpuAdd.cpp
+            runtime/experimental/operators/CpuDepthwiseConv2d.cpp
+            runtime/experimental/operators/CpuElementwise.cpp
             runtime/experimental/operators/CpuGemm.cpp
-            runtime/experimental/operators/CpuTranspose.cpp)
+            runtime/experimental/operators/CpuGemmConv2d.cpp
+            runtime/experimental/operators/CpuGemmDirectConv2d.cpp
+            runtime/experimental/operators/CpuMul.cpp
+            runtime/experimental/operators/CpuSub.cpp
+            runtime/experimental/operators/CpuTranspose.cpp
+            runtime/experimental/operators/CpuWinogradConv2d.cpp)
 endif()
diff --git a/tests/validation/CPP/Permute.cpp b/tests/validation/CPP/Permute.cpp
index 9495fa738e..a128006890 100644
--- a/tests/validation/CPP/Permute.cpp
+++ b/tests/validation/CPP/Permute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -120,6 +120,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<int8_t>, framework::DatasetMo
 
 TEST_SUITE_END() // QASYMM8_SINGED
 
+#ifdef ARM_COMPUTE_ENABLE_FP16
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<float16_t>, framework::DatasetMode::PRECOMMIT,
+                       PermuteParametersSmall * framework::dataset::make("DataType", DataType::F16))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<float16_t>, framework::DatasetMode::NIGHTLY,
+                       PermuteParametersLarge * framework::dataset::make("DataType", DataType::F16))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 73f5de68ac..51a2cecb78 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,7 +39,6 @@
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/ActivationLayerFixture.h"
-
 #include "arm_compute/Acl.hpp"
 #include "support/AclRequires.h"
 
@@ -167,15 +166,6 @@ AbsoluteTolerance<uint8_t> tolerance_qasymm8(ActivationLayerInfo::ActivationFunc
 
 constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1);
 
-/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
-{
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-    DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-    DataType::F32,
-});
-
 const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(),
                                                    framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::SWISH }));
 
@@ -188,9 +178,17 @@ void test_float_sqrt_boundary_value()
     constexpr auto vector_size = uint32_t{ 16 };
 
     auto data_type = DataType::F32;
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     data_type = std::is_same<T, half>::value ? DataType::F16 : data_type;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
+    if(data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+
+        return;
+    }
 
     const auto boundary_value_vector = std::vector<T>
     {
@@ -336,7 +334,7 @@ template <typename T>
 using NEActivationLayerFixture = ActivationValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 TEST_CASE(SqrtBoundaryValue, framework::DatasetMode::ALL)
 {
@@ -346,11 +344,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<half>, framework::Data
                                                                                                       framework::dataset::make("DataType",
                                                                                                               DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function));
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function));
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 TEST_CASE(SqrtBoundaryValue, framework::DatasetMode::ALL)
@@ -364,6 +370,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::Dat
     // Validate output
     validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function));
 }
+// Run only on SME Devices to stress Logistic SME kernel
+#ifdef ARM_COMPUTE_ENABLE_SME2
+TEST_SUITE(SME)
+const auto LogsisticDataset =  combine(framework::dataset::make("InPlace", { false }), framework::dataset::make("Function", ActivationLayerInfo::ActivationFunction::LOGISTIC), framework::dataset::make("AlphaBeta", { 1.f }));
+FIXTURE_DATA_TEST_CASE(RunLogistic5D, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::Tiny5dShapes(), LogsisticDataset, framework::dataset::make("DataType",
+                                                                                                       DataType::F32)))
+
+{
+    // Validate output
+    validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLogisticSME, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::LogisticSMEStressShapesFp32(), LogsisticDataset, framework::dataset::make("DataType",
+                                                                                                       DataType::F32)))
+
+{
+    // Validate output
+    validate(Accessor(_target), _reference, relative_tolerance(_data_type, _function), 0.f, absolute_tolerance(_data_type, _function));
+}
+TEST_SUITE_END() // SME
+#endif // ARM_COMPUTE_ENABLE_SME2
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/AddMulAdd.cpp b/tests/validation/NEON/AddMulAdd.cpp
index 77e3d80fe6..5bb58ecb13 100644
--- a/tests/validation/NEON/AddMulAdd.cpp
+++ b/tests/validation/NEON/AddMulAdd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -127,27 +127,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<float>, framework::Data
 
 TEST_SUITE_END() // F32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
                                                                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                                                            ActivationFunctionsDataset))
 {
-    // Validate outputs
-    validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate outputs
+        validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
                                                                                                                  framework::dataset::make("DataType", DataType::F16)),
                                                                                                          ActivationFunctionsDataset))
 {
-    // Validate outputs
-    validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate outputs
+        validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp
index 91b8128dea..acb127498e 100644
--- a/tests/validation/NEON/ArgMinMax.cpp
+++ b/tests/validation/NEON/ArgMinMax.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -142,7 +142,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
 TEST_SUITE_END() // S32
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEArgMinMaxValidationFixture_F16_S32,
@@ -153,8 +154,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
                                        AxisDataset),
                                OpsDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
@@ -166,11 +175,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                                        AxisDataset),
                                OpsDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index 535c3e634e..7a7aa52041 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -209,17 +209,25 @@ TEST_SUITE_END() // S32
 TEST_SUITE_END() // Integer
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)),
                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                          OutOfPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index 8886ca2db5..9a6032cd9e 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -276,18 +276,26 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int32_t>, framew
 TEST_SUITE_END() // S32
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                     DataType::F16)),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                             OutOfPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/BatchConcatenateLayer.cpp b/tests/validation/NEON/BatchConcatenateLayer.cpp
index 6eafe82f8a..e275a759cb 100644
--- a/tests/validation/NEON/BatchConcatenateLayer.cpp
+++ b/tests/validation/NEON/BatchConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -87,25 +87,41 @@ template <typename T>
 using NEBatchConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::F16)),
                                                                                                                   framework::dataset::make("Axis", 3)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                         DataType::F16)),
                                                                                                                 framework::dataset::make("Axis", 3)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END()
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index 50eaf0c667..45661ab5d3 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,9 +50,9 @@ namespace
 {
 RelativeTolerance<float>           rel_tolerance_f32(0.05f);   /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.015f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-#endif                                                       // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif                                                       // ARM_COMPUTE_ENABLE_FP16
 
 const auto act_infos = framework::dataset::make("ActivationInfo",
 {
@@ -139,7 +139,7 @@ FIXTURE_DATA_TEST_CASE(RandomLarge, NEBatchNormalizationLayerFixture<float>, fra
 }
 TEST_SUITE_END() // F32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RandomSmall, NEBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
                                                                                                                        combine(framework::dataset::make("UseBeta", { false, true }),
@@ -148,8 +148,16 @@ FIXTURE_DATA_TEST_CASE(RandomSmall, NEBatchNormalizationLayerFixture<half>, fram
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, abs_tolerance_f16, 0);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, abs_tolerance_f16, 0);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RandomLarge, NEBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::LargeRandomBatchNormalizationLayerDataset(),
@@ -159,11 +167,19 @@ FIXTURE_DATA_TEST_CASE(RandomLarge, NEBatchNormalizationLayerFixture<half>, fram
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, abs_tolerance_f16, 0);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, abs_tolerance_f16, 0);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 
 TEST_SUITE_END() // BatchNormalizationLayer
diff --git a/tests/validation/NEON/BoundingBoxTransform.cpp b/tests/validation/NEON/BoundingBoxTransform.cpp
index 2ca2434150..4ef18352ee 100644
--- a/tests/validation/NEON/BoundingBoxTransform.cpp
+++ b/tests/validation/NEON/BoundingBoxTransform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,10 +45,10 @@ namespace
 {
 RelativeTolerance<float> relative_tolerance_f32(0.01f);
 AbsoluteTolerance<float> absolute_tolerance_f32(0.001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half>  relative_tolerance_f16(half(0.2));
 AbsoluteTolerance<float> absolute_tolerance_f16(half(0.02f));
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 constexpr AbsoluteTolerance<uint16_t> tolerance_qasymm16(1);
 
@@ -124,16 +124,24 @@ FIXTURE_DATA_TEST_CASE(BoundingBox, NEBoundingBoxTransformFixture<float>, framew
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(BoundingBox, NEBoundingBoxTransformFixture<half>, framework::DatasetMode::ALL,
                        combine(combine(DeltaDataset, BboxInfoDataset), framework::dataset::make("DataType", { DataType::F16 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, relative_tolerance_f16, 0.03f, absolute_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, relative_tolerance_f16, 0.03f, absolute_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp
index b56594546b..668c60545b 100644
--- a/tests/validation/NEON/Cast.cpp
+++ b/tests/validation/NEON/Cast.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -140,7 +140,15 @@ using NECastToQASYMM8_SIGNEDFixture = CastValidationFixture<Tensor, Accessor, NE
     FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \
                                                                                       datasets::ConvertPolicies()))              \
     {                                                                                                                            \
-        validate(Accessor(_target), _reference, tolerance);                                                                      \
+        if((idt != DataType::F16 && odt != DataType::F16) || CPUInfo::get().has_fp16())                                          \
+        {                                                                                                                        \
+            validate(Accessor(_target), _reference, tolerance);                                                                  \
+        }                                                                                                                        \
+        else                                                                                                                     \
+        {                                                                                                                        \
+            ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");                              \
+            framework::ARM_COMPUTE_PRINT_INFO();                                                                                 \
+        }                                                                                                                        \
     }                                                                                                                            \
     TEST_SUITE_END()
 
@@ -148,14 +156,14 @@ using NECastToQASYMM8_SIGNEDFixture = CastValidationFixture<Tensor, Accessor, NE
 CAST_SUITE(QASYMM8_SIGNED_to_S16, DataType::QASYMM8_SIGNED, DataType::S16, NECastToS16Fixture<int8_t>, CastQASYMM8_SIGNEDtoS16Dataset, one_tolerance)
 CAST_SUITE(QASYMM8_SIGNED_to_S32, DataType::QASYMM8_SIGNED, DataType::S32, NECastToS32Fixture<int8_t>, CastQASYMM8_SIGNEDtoS32Dataset, one_tolerance)
 CAST_SUITE(QASYMM8_SIGNED_to_F32, DataType::QASYMM8_SIGNED, DataType::F32, NECastToF32Fixture<int8_t>, CastQASYMM8_SIGNEDtoF32Dataset, one_tolerance)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(QASYMM8_SIGNED_to_F16, DataType::QASYMM8_SIGNED, DataType::F16, NECastToF16Fixture<int8_t>, CastQASYMM8_SIGNEDtoF16Dataset, one_tolerance)
-#endif //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif //  ARM_COMPUTE_ENABLE_FP16
 
 //QASYMM8
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(QASYMM8_to_F16, DataType::QASYMM8, DataType::F16, NECastToF16Fixture<uint8_t>, CastQASYMM8toF16Dataset, one_tolerance)
-#endif //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif //  ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, NECastToF32Fixture<uint8_t>, CastQASYMM8toF32Dataset, one_tolerance)
 CAST_SUITE(QASYMM8_to_S32, DataType::QASYMM8, DataType::S32, NECastToS32Fixture<uint8_t>, CastQASYMM8toS32Dataset, one_tolerance)
 
@@ -177,26 +185,26 @@ CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, NECastToS32Fixture<int16_t>
 // S32
 CAST_SUITE(S32_to_QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<int32_t>, CastS32toQASYMM8_SIGNEDDataset, one_tolerance)
 CAST_SUITE(S32_to_QASYMM8, DataType::S32, DataType::QASYMM8, NECastToQASYMM8Fixture<int32_t>, CastS32toQASYMM8Dataset, one_tolerance)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, NECastToF16Fixture<int32_t>, CastS32toF16Dataset, zero_tolerance)
-#endif //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif //  ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, NECastToF32Fixture<int32_t>, CastS32toF32Dataset, one_tolerance)
 CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, NECastToU8Fixture<int32_t>, CastS32toU8Dataset, one_tolerance)
 
 // F16
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(F16_to_QASYMM8_SIGNED, DataType::F16, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<half>, CastF16toQASYMM8_SIGNEDDataset, one_tolerance)
 CAST_SUITE(F16_to_QASYMM8, DataType::F16, DataType::QASYMM8, NECastToQASYMM8Fixture<half>, CastF16toQASYMM8Dataset, one_tolerance)
 CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, NECastToF32Fixture<half>, CastF16toF32Dataset, zero_tolerance)
 CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, NECastToS32Fixture<half>, CastF16toS32Dataset, one_tolerance)
-#endif //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif //  ARM_COMPUTE_ENABLE_FP16
 
 // F32
 CAST_SUITE(F32_to_QASYMM8_SIGNED, DataType::F32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<float>, CastF32toQASYMM8_SIGNEDDataset, one_tolerance)
 CAST_SUITE(F32_to_QASYMM8, DataType::F32, DataType::QASYMM8, NECastToQASYMM8Fixture<float>, CastF32toQASYMM8Dataset, one_tolerance)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance)
-#endif //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif //  ARM_COMPUTE_ENABLE_FP16
 CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
 CAST_SUITE(F32_to_U8, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
 
diff --git a/tests/validation/NEON/ChannelShuffle.cpp b/tests/validation/NEON/ChannelShuffle.cpp
index 9a2a9f24f0..c1590dc136 100644
--- a/tests/validation/NEON/ChannelShuffle.cpp
+++ b/tests/validation/NEON/ChannelShuffle.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -92,26 +92,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelShuffleLayerFixture<uint8_t>, framewor
 TEST_SUITE_END() // U8
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelShuffleLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallRandomChannelShuffleLayerDataset(),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::F16)),
                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelShuffleLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeRandomChannelShuffleLayerDataset(),
                                                                                                                       framework::dataset::make("DataType",
                                                                                                                               DataType::F16)),
                                                                                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelShuffleLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallRandomChannelShuffleLayerDataset(),
diff --git a/tests/validation/NEON/Comparisons.cpp b/tests/validation/NEON/Comparisons.cpp
index b77bcdd4f0..868c39d306 100644
--- a/tests/validation/NEON/Comparisons.cpp
+++ b/tests/validation/NEON/Comparisons.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -97,26 +97,42 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEComparisonFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        NEComparisonFixture<half>,
                        framework::DatasetMode::NIGHTLY,
                        combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/ConvertFullyConnectedWeights.cpp b/tests/validation/NEON/ConvertFullyConnectedWeights.cpp
index 65dbbcc55b..c52b50fa5b 100644
--- a/tests/validation/NEON/ConvertFullyConnectedWeights.cpp
+++ b/tests/validation/NEON/ConvertFullyConnectedWeights.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -97,22 +97,38 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvertFullyConnectedWeightsFixture<float>, f
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvertFullyConnectedWeightsFixture<half>, framework::DatasetMode::ALL, combine(datasets::Small3DShapes(), combine(params, framework::dataset::make("DataType",
                                                                                                                    DataType::F16))))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEConvertFullyConnectedWeightsFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large3DShapes(), combine(params, framework::dataset::make("DataType",
                                                                                                                        DataType::F16))))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvertFullyConnectedWeightsFixture<uint8_t>, framework::DatasetMode::ALL, combine(datasets::Small3DShapes(), combine(params, framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/Convolution3D.cpp b/tests/validation/NEON/Convolution3D.cpp
index 4185488742..76046a8b85 100644
--- a/tests/validation/NEON/Convolution3D.cpp
+++ b/tests/validation/NEON/Convolution3D.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,11 +43,11 @@ namespace validation
 {
 namespace
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
 const AbsoluteTolerance<float>            abs_tolerance_f16(0.2f);                   /**< Absolute tolerance for FP16 types */
 constexpr float                           tolerance_num = 0.07f;                     /**< Tolerance number for the FP16 implementation */
-#endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 constexpr AbsoluteTolerance<float>   tolerance_fp32(0.001f);                         /**< Tolerance for floating point tests */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);                           /**< Tolerance for quantized tests */
 
@@ -140,17 +140,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture<float>, framework:
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(data_precommit,
                                                                                                                         framework::dataset::make("DataType", DataType::F16)),
                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NDHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index d739d4e1a4..5f0ebd2542 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -74,16 +74,16 @@ const RelativeTolerance<float> rel_tolerance_winograd_3x3_f32(0.05f); /**< Relat
 const AbsoluteTolerance<float> abs_tolerance_f32(0.002f);             /**< Absolute tolerance for FP32 types */
 const AbsoluteTolerance<float> abs_tolerance_1xN_f32(0.0041f);        /**< Absolute tolerance for FP32 types */
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
 constexpr float               tolerance_num_f16 = 0.15f;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
 const AbsoluteTolerance<float>            abs_tolerance_f16(0.2f);                   /**< Absolute tolerance for FP16 types */
 constexpr float                           tolerance_num = 0.07f;                     /**< Tolerance number for the FP16 implementation */
-#endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                                                               /* ARM_COMPUTE_ENABLE_FP16 */
 
 #ifdef ARM_COMPUTE_ENABLE_SME
 // TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode.
@@ -96,9 +96,9 @@ constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value
 /** CNN data types */
 const auto CNNDataTypes = make("DataType",
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     DataType::F32,
     DataType::QASYMM8,
 });
@@ -147,6 +147,45 @@ const auto QuantizationData = make("QuantizationInfo",
 
 TEST_SUITE(NEON)
 TEST_SUITE(ConvolutionLayer)
+DATA_TEST_CASE(SupportedTypes, framework::DatasetMode::ALL, zip(
+                make("DataType", {
+                    DataType::F32,
+                    DataType::QASYMM8,
+                    DataType::QASYMM8,
+                    DataType::QASYMM8_SIGNED
+                }),
+                make("WeightsDataType", {
+                    DataType::F32,
+                    DataType::QASYMM8,
+                    DataType::QASYMM8_SIGNED,
+                    DataType::QASYMM8
+                }),
+                make("Expected",
+                {
+                    true,
+                    true,
+                    true,
+                    false
+                })),
+data_type_const, weights_data_type_const, expected_const)
+{
+    TensorInfo input_info   = TensorInfo(TensorShape(3U, 3U, 1U), 1, data_type_const);
+    TensorInfo weights_info = TensorInfo(TensorShape(2U, 2U, 1U, 1U), 1, weights_data_type_const);
+    TensorInfo output_info  = TensorInfo(TensorShape(2U, 2U, 1U), 1, data_type_const);
+
+    input_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0));
+    weights_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0));
+    output_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0));
+
+    Status status = NEConvolutionLayer::validate(
+                        &input_info,
+                        &weights_info,
+                        nullptr,
+                        &output_info,
+                        PadStrideInfo());
+
+    ARM_COMPUTE_EXPECT(bool(status) == expected_const, framework::LogLevel::ERRORS);
+}
 
 // *INDENT-OFF*
 // clang-format off
@@ -257,7 +296,7 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
 
     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
     {
-        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS);
     }
 }
 
@@ -303,7 +342,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
 
     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
     {
-        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS);
     }
 }
 
@@ -580,7 +619,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, frame
 
 /// It's enough to run the activations for a single weight/input combination and data type because
 /// activation function is called on top of the winograd output as a separate operator
-/// TODO: Enable after COMPMID-6573 is resolved
+/// TODO(COMPMID-6573): Enable after COMPMID-6573 is resolved
 FIXTURE_DATA_TEST_CASE(RunActivations, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::DISABLED,
                        combine(
                            make("Input", TensorShape(3U, 3U, 32U)),
@@ -648,9 +687,9 @@ FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<f
 
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
-using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
+using NEWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
 
 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(
                    make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
@@ -673,37 +712,61 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(
 make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
 input_info, weights_info, output_info, conv_info, fast_math, expected)
 {
-    ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
-                                                                            &weights_info.clone()->set_is_resizable(true),
-                                                                            &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
-    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+    if(CPUInfo::get().has_fp16())
+    {
+        ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
+                                                                                &weights_info.clone()->set_is_resizable(true),
+                                                                                &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
+        ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(Conv3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
                        combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
                                make("DataType", { DataType::F16 }),
                                ActivationFunctionsDataset,
                                make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
                        combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
                                make("DataType", { DataType::F16 }),
                                make("ActivationInfo", { ActivationLayerInfo() }),
                                make("DataLayout", { DataLayout::NHWC })))
 
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // Conv3x3
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // WinogradLayer
 
 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
@@ -990,16 +1053,24 @@ FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::A
     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
 }
 
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if defined(ARM_COMPUTE_ENABLE_FP16)
 FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
                        combine(combine(datasets::SmallConvolutionLayerDataset(),
                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                framework::dataset::make("ACL Scalar type", { DataType::F16 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 #if defined(ARM_COMPUTE_ENABLE_BF16)
 template <typename ScalarType>
@@ -1031,16 +1102,24 @@ FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::Da
     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
 }
 
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if defined(ARM_COMPUTE_ENABLE_FP16)
 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
                        combine(combine(datasets::SmallConvolutionLayerDataset(),
                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                framework::dataset::make("ACL Scalar type", { DataType::F16 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 #if defined(ARM_COMPUTE_ENABLE_BF16)
 template <typename ScalarType>
@@ -1119,7 +1198,7 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
     auto result_1 = run_conv();
     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
     {
-        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS);
     }
 }
 
@@ -1160,7 +1239,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
     auto result_1 = run_conv();
     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
     {
-        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS);
     }
 }
 
@@ -1179,7 +1258,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework
 TEST_SUITE_END() // BFLOAT16
 #endif           /* defined(ARM_COMPUTE_ENABLE_BF16) */
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
                                                                                                                    framework::dataset::make("ReshapeWeights", { true })),
@@ -1187,11 +1266,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework:
                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW })),
                                                                                                            ActivationFunctionsDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
@@ -1251,12 +1338,16 @@ FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, frame
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
-// TODO: COMPMID-6596 Extend quantized tests with at least one suite where the weight is padded (the legacy case, see floating point's RunPaddedWeights)
+// TODO(COMPMID-6573): Extend quantized tests with at least one suite where the weight is padded (the legacy case, see floating point's RunPaddedWeights)
+template <typename T>
+using NEGEMMConvolutionLayerForUpdatedStaticQuantInfoAfterConfigureFixture = ConvolutionValidationForUpdatedStaticQuantInfoAfterConfigureFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
 template <typename T>
 using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
 template <typename T>
 using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
 
+using NEGEMMConvolutionLayerQuantizedMixedSignFixture = ConvolutionValidationQuantizedMixedTypeFixture<Tensor, Accessor, NEConvolutionLayer, uint8_t, int8_t>;
+
 template <typename T>
 using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>;
 
@@ -1272,6 +1363,37 @@ TEST_SUITE(Quantized)
 /// separate quantization info for each input and the output tensor.
 /// When we can also support dynamic quantization with the presence of activation, these two versions should be merged
 /// again, with the explicitly specified quantization info removed
+
+#ifdef __aarch64__
+TEST_SUITE(UpdateStaticQuantInfoAfterConfigure)
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerForUpdatedStaticQuantInfoAfterConfigureFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                      framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                      framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(0.01f, -10) })),
+                                                                                                                      QuantizedActivationFunctionsDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerForUpdatedStaticQuantInfoAfterConfigureFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                       framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })),
+                                                                                                                       QuantizedActivationFunctionsDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // UpdateStaticQuantInfoAfterConfigure
+#endif // __aarch64__
+
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
                                                                                                                        framework::dataset::make("ReshapeWeights", { true })),
@@ -1332,6 +1454,50 @@ FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixtur
 }
 TEST_SUITE_END() // QASYMM8_SIGNED
 
+TEST_SUITE(QASYMM8_MIXED)
+FIXTURE_DATA_TEST_CASE(
+    RunSmall,
+    NEGEMMConvolutionLayerQuantizedMixedSignFixture,
+    framework::DatasetMode::ALL,
+    combine(combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                            framework::dataset::make("ReshapeWeights", {true})),
+                                                    framework::dataset::make("DataType", DataType::QASYMM8)),
+                                            framework::dataset::make("WeightsDataType", DataType::QASYMM8_SIGNED)),
+                                    framework::dataset::make("DataLayout", {DataLayout::NCHW, DataLayout::NHWC})),
+                            framework::dataset::make("QuantizationInfoIfActivationEnabled",
+{QuantizationInfo(2.f / 255.f, 10)})),
+framework::dataset::make("WeightQuantizationInfoIfActivationEnabled",
+{QuantizationInfo(2.f / 255.f, 10)})),
+QuantizedActivationFunctionsDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(
+    RunMixedDataLayout,
+    NEGEMMConvolutionLayerQuantizedMixedSignFixture,
+    framework::DatasetMode::ALL,
+    combine(
+        framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
+        framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+        framework::dataset::make("Bias", TensorShape(2U)),
+        framework::dataset::make("Output", TensorShape(11U, 25U, 2U)),
+        framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+        framework::dataset::make("Dilation", Size2D(1, 1)),
+        framework::dataset::make("ReshapeWeights", {true}),
+        framework::dataset::make("DataType", DataType::QASYMM8),
+        framework::dataset::make("WeightsDataType", DataType::QASYMM8_SIGNED),
+        framework::dataset::make("DataLayout", {DataLayout::NCHW, DataLayout::NHWC}),
+        framework::dataset::make("QuantizationInfoIfActivationEnabled", {QuantizationInfo(2.f / 255.f, 10)}),
+        framework::dataset::make("WeightQuantizationInfoIfActivationEnabled", {QuantizationInfo(2.f / 255.f, 10)}),
+        QuantizedActivationFunctionsDataset)
+    )
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8_MIXED
+
 TEST_SUITE(QSYMM8_PER_CHANNEL)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
@@ -1436,7 +1602,7 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
     auto result_1 = run_conv();
     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
     {
-        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS);
     }
 }
 
@@ -1476,7 +1642,7 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
     auto result_1 = run_conv();
     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
     {
-        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i], framework::LogLevel::ERRORS);
     }
 }
 
diff --git a/tests/validation/NEON/CropResize.cpp b/tests/validation/NEON/CropResize.cpp
index df7166bfdc..53614c7e51 100644
--- a/tests/validation/NEON/CropResize.cpp
+++ b/tests/validation/NEON/CropResize.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -92,7 +92,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
 // *INDENT-ON*
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NECropResizeFixture<half>,
@@ -101,11 +101,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
                                combine(framework::dataset::make("IsOutOfBounds", { true, false }),
                                        framework::dataset::make("DataType", DataType::F16))))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index b4c049f6f9..4ec2714957 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,10 +45,10 @@ namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f);    /**< Tolerance for floating point tests */
 constexpr AbsoluteTolerance<float> tolerance_quantized(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const RelativeTolerance<half_float::half> tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr float                           tolerance_num_fp16 = 0.02f;             /**< Tolerance number for FP16 tests -- follows a slightly stricter approach compared to ConvolutionLayer tests */
-#endif                                                                            /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+#endif                                                                            /* ARM_COMPUTE_ENABLE_FP16*/
 constexpr float tolerance_num_quant = 0.07f;                                      /**< Tolerance number for quantized types */
 
 const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
@@ -276,15 +276,23 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1<float>, framework::Da
 TEST_SUITE_END() // W5x1
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 TEST_SUITE(W4x4)
 FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)),
                                                                                                                    data_layouts_dataset),
                                                                                                            add_bias_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // W4x4
 TEST_SUITE(W3x3)
@@ -293,15 +301,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3<half>, framework
                                                                                                                   data_layouts_dataset),
                                                                                                                   add_bias_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)),
                                                                                                                         data_layouts_dataset),
                                                                                                                 add_bias_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // W3x3
 TEST_SUITE(W1x1)
@@ -309,8 +333,16 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<half>, framework::Dat
                                                                                                                    data_layouts_dataset),
                                                                                                            add_bias_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // W1x1
 TEST_SUITE(W5x1)
@@ -318,12 +350,20 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1<half>, framework::Dat
                                                                                                                    data_layouts_dataset),
                                                                                                            add_bias_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // W5x1
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index 1c69d44a2b..11fc9d89ae 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -84,25 +84,41 @@ template <typename T>
 using NEDepthConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::F16)),
                                                                                                                   framework::dataset::make("Axis", 2)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                         DataType::F16)),
                                                                                                                 framework::dataset::make("Axis", 2)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END()
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
index 4972708144..bd7c8faa9b 100644
--- a/tests/validation/NEON/DepthConvertLayer.cpp
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -74,9 +74,9 @@ const auto DepthConvertLayerZeroShiftDataset    = framework::dataset::make("Shif
 
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
 constexpr AbsoluteTolerance<int32_t> tolerance_one_int32(1);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<uint8_t> tolerance_one_uint8(1);
-#endif /*  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /*  ARM_COMPUTE_ENABLE_FP16 */
 } // namespace
 
 TEST_SUITE(NEON)
@@ -250,25 +250,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture<uint8_t>, frame
 }
 TEST_SUITE_END() // U8_to_F32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(U8_to_F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toF16Dataset),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                       DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toF16Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                     DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // U8_to_F36
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(U16_to_U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset),
@@ -338,7 +354,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, frame
 }
 TEST_SUITE_END() // S16_to_S32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16_to_QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        DepthConvertLayerF16toQASYMM8Dataset),
@@ -346,8 +362,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture<half>, fram
                                                                                                                        DepthConvertLayerZeroShiftDataset),
                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_qasymm8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_qasymm8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
                                                                                                                      DepthConvertLayerF16toQASYMM8Dataset),
@@ -355,8 +379,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture<half>, fram
                                                                                                                      DepthConvertLayerZeroShiftDataset),
                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_qasymm8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_qasymm8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16_to_QASYMM8
 
@@ -365,15 +397,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<half>, framework
                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                   DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_one_uint8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_one_uint8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toU8Dataset),
                                                                                                                         framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                 DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_one_uint8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_one_uint8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16_to_U8
 
@@ -382,15 +430,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<half>, framewor
                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset),
                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                  DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16_to_F32
 
@@ -399,15 +463,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<half>, framewor
                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_one_int32);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_one_int32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toS32Dataset),
                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                  DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_one_int32);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_one_int32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // F16_to_S32
@@ -419,8 +499,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerQuantizedToF16Fixture<uint8_
                        DepthConvertLayerZeroShiftDataset),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToF16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
                        DepthConvertLayerQASYMM8toF16Dataset),
@@ -428,8 +516,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToF16Fixture<uint8_
                        DepthConvertLayerZeroShiftDataset),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // QASYMM8_to_F16
 
@@ -438,15 +534,31 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<float>, framewo
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                     DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset),
                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                   DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F32_to_F16
 
@@ -455,19 +567,35 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<int32_t>, frame
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                       DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS32toF16Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                     DepthConvertLayerZeroShiftDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // S32_to_F16
 
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32_to_S32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toS32Dataset),
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index e9609b7b72..2d948f3e32 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -50,10 +50,10 @@ namespace
 constexpr RelativeTolerance<float>   tolerance_f32(0.01f);        /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);        /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8_SIGNED */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr float                     tolerance_num = 0.05f;                 /**< Tolerance number */
-#endif                                                                     // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif                                                                     // ARM_COMPUTE_ENABLE_FP16
 
 const auto depth_multipliers       = make("DepthMultiplier", { 1, 2, 8 });
 const auto large_depth_multipliers = make("DepthMultiplier", { 5, 32 });
@@ -469,7 +469,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLay
 TEST_SUITE_END() // Optimized
 TEST_SUITE_END() // F32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 
 FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
@@ -483,7 +483,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture<ha
         make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }),
         ActivationFunctionsDatasetNightly))
 {
-    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(Generic)
@@ -494,7 +502,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                            ActivationFunctionsDataset))
 {
-    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                                                                                                                         large_depth_multipliers),
@@ -503,7 +519,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, f
                                                                                                                         make("DataLayout", { DataLayout::NHWC })),
                                                                                                                         make("ActivationInfo", { ActivationLayerInfo() })))
 {
-    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(Dilation)
@@ -514,7 +538,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
                                            make("DataLayout", { DataLayout::NHWC })),
                                    ActivationFunctionsDataset))
 {
-    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
                            combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
@@ -523,7 +555,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, f
                                            make("DataLayout", { DataLayout::NHWC })),
                                    make("ActivationInfo", { ActivationLayerInfo() })))
 {
-    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // Dilation
 
@@ -538,7 +578,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                            ActivationFunctionsDataset))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
                            combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
@@ -548,7 +596,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, f
                                            make("DataLayout", { DataLayout::NHWC })),
                                    make("ActivationInfo", { ActivationLayerInfo() })))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(Dilation)
@@ -561,7 +617,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f
                                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                    ActivationFunctionsDataset))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
                            combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
@@ -571,7 +635,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, f
                                            make("DataLayout", { DataLayout::NHWC })),
                                    make("ActivationInfo", { ActivationLayerInfo() })))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // Dilation
@@ -586,7 +658,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmallW3x3, NEDepthwiseConvolutionLayerFixture<half
                                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                    ActivationFunctionsDataset))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE_NEW(RunSmallW5x5, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
                            combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
@@ -596,7 +676,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmallW5x5, NEDepthwiseConvolutionLayerFixture<half
                                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                    ActivationFunctionsDataset))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE_NEW(RunLargeW3x3, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
                            combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
@@ -606,11 +694,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLargeW3x3, NEDepthwiseConvolutionLayerFixture<half
                                            make("DataLayout", { DataLayout::NHWC })),
                                    make("ActivationInfo", { ActivationLayerInfo() })))
 {
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // Optimized
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/DequantizationLayer.cpp b/tests/validation/NEON/DequantizationLayer.cpp
index 82a2149152..05b1844ba2 100644
--- a/tests/validation/NEON/DequantizationLayer.cpp
+++ b/tests/validation/NEON/DequantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,11 +43,11 @@ namespace validation
 {
 namespace
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
-#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#else  /* ARM_COMPUTE_ENABLE_FP16 */
 const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 const auto dataset_quant_f32 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()),
                                                framework::dataset::make("DataType", DataType::F32)),
@@ -120,20 +120,36 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
 template <typename T>
 using NEDequantizationLayerFixture = DequantizationValidationFixture<Tensor, Accessor, NEDequantizationLayer, T>;
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, dataset_precommit_f16)
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, dataset_nightly_f16)
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, dataset_precommit_f32)
diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp
index fbfe8b8a7a..08ef68ecea 100644
--- a/tests/validation/NEON/DilatedConvolutionLayer.cpp
+++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp
@@ -45,19 +45,19 @@ namespace validation
 namespace
 {
 const AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const AbsoluteTolerance<float>            abs_tolerance_f16(0.3f);                   /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr float                           tolerance_num_f16 = 0.07f;                 /**< Tolerance number for FP16 */
-#endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                                                               /* ARM_COMPUTE_ENABLE_FP16 */
 constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8(1);                           /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     DataType::F32,
     DataType::QASYMM8,
 });
@@ -113,7 +113,7 @@ template <typename T>
 using NEGEMMDilatedConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
                                                                                                                         framework::dataset::make("ReshapeWeights", { true })),
@@ -121,8 +121,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture<half>, fra
                                                                                                                         framework::dataset::make("DataLayout", { DataLayout::NCHW })),
                                                                                                                         framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo())))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
@@ -130,11 +138,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture<half>, fra
                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
                                                                                                                       framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo())))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index 0779c9d388..201240a268 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,11 +46,11 @@ namespace validation
 {
 namespace
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
 const AbsoluteTolerance<float>            abs_tolerance_f16(0.2f);                   /**< Absolute tolerance for FP16 types */
 constexpr float                           tolerance_num = 0.07f;                     /**< Tolerance number for the FP16 implementation */
-#endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                                                               /* ARM_COMPUTE_ENABLE_FP16 */
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f);                           /**< Tolerance for floating point tests */
 
 /** Direct convolution data set.for FP32 */
@@ -335,25 +335,41 @@ template <typename T>
 using NEDirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T, true>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
                                                                                                                    DataType::F16)),
                                                                                                                    ActivationFunctionsDataset),
                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_f16_nightly, framework::dataset::make("DataType", DataType::F16)),
                                                                                                                  ActivationFunctionsDataset),
                                                                                                                  framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
index 0667ac73f9..3eab2a7a5f 100644
--- a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
+++ b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.01f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 #if defined(__aarch64__)
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(0);
@@ -64,23 +64,39 @@ template <typename T>
 using NEAbsLayerQuantizedFixture = AbsQuantizedValidationFixture<Tensor, Accessor, NEAbsLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                      DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                    DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp
index 95db4ad5fd..6bd1e16642 100644
--- a/tests/validation/NEON/ElementwiseDivision.cpp
+++ b/tests/validation/NEON/ElementwiseDivision.cpp
@@ -49,11 +49,11 @@ AbsoluteTolerance<int>   tolerance_zero_s32(0); // Tolerance for S32 division
 const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32),
                                                            framework::dataset::make("DataType", DataType::S32)),
                                                    framework::dataset::make("DataType", DataType::S32));
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half> tolerance_fp16(static_cast<half>(0.01f));
 const auto              ElementwiseDivisionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                                  framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 const auto ElementwiseDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                     framework::dataset::make("DataType", DataType::F32));
 const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
@@ -126,16 +126,24 @@ TEST_SUITE_END() // F32
 TEST_SUITE_END() // DynamicShape
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP16Dataset),
                                                                                                           InPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset),
diff --git a/tests/validation/NEON/ElementwiseExpLayer.cpp b/tests/validation/NEON/ElementwiseExpLayer.cpp
index 31cd78626f..0f1ada14c3 100644
--- a/tests/validation/NEON/ElementwiseExpLayer.cpp
+++ b/tests/validation/NEON/ElementwiseExpLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.01f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 #if defined(__aarch64__)
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
@@ -66,23 +66,39 @@ template <typename T>
 using NEExpLayerQuantizedFixture = ExpQuantizedValidationFixture<Tensor, Accessor, NEExpLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                      DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEExpLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                    DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseLog.cpp b/tests/validation/NEON/ElementwiseLog.cpp
index 1175903dac..67f4b8c16c 100644
--- a/tests/validation/NEON/ElementwiseLog.cpp
+++ b/tests/validation/NEON/ElementwiseLog.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.01f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 #if defined(__aarch64__)
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
@@ -66,23 +66,39 @@ template <typename T>
 using NELogLayerQuantizedFixture = LogQuantizedValidationFixture<Tensor, Accessor, NELogLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                      DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NELogLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                    DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseMax.cpp b/tests/validation/NEON/ElementwiseMax.cpp
index 61421ab3e5..97a10e0ed6 100644
--- a/tests/validation/NEON/ElementwiseMax.cpp
+++ b/tests/validation/NEON/ElementwiseMax.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,10 +56,10 @@ const auto ElementwiseMaxS32Dataset = combine(combine(framework::dataset::make("
                                               DataType::S32));
 const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                               framework::dataset::make("DataType", DataType::S16));
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 const auto ElementwiseMaxFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                framework::dataset::make("DataType", DataType::F32));
 const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
@@ -188,16 +188,24 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset),
                                                                                                      InPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset),
diff --git a/tests/validation/NEON/ElementwiseMin.cpp b/tests/validation/NEON/ElementwiseMin.cpp
index a134eb354d..bf45544668 100644
--- a/tests/validation/NEON/ElementwiseMin.cpp
+++ b/tests/validation/NEON/ElementwiseMin.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,10 +56,10 @@ const auto ElementwiseMinS32Dataset = combine(combine(framework::dataset::make("
                                               DataType::S32));
 const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                               framework::dataset::make("DataType", DataType::S16));
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 const auto ElementwiseMinFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                framework::dataset::make("DataType", DataType::F32));
 const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
@@ -188,16 +188,24 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset),
                                                                                                      InPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset),
diff --git a/tests/validation/NEON/ElementwiseNegation.cpp b/tests/validation/NEON/ElementwiseNegation.cpp
index 5b8ae8fc64..ab7025ff22 100644
--- a/tests/validation/NEON/ElementwiseNegation.cpp
+++ b/tests/validation/NEON/ElementwiseNegation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.01f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 #if defined(__aarch64__)
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(0);
@@ -64,25 +64,41 @@ template <typename T>
 using NENegLayerQuantizedFixture = NegQuantizedValidationFixture<Tensor, Accessor, NENegLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
                                                                                                              framework::dataset::make("DataType", DataType::F16)),
                                                                                                      framework::dataset::make("InPlace", { true, false })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
                                                                                                            framework::dataset::make("DataType", DataType::F16)),
                                                                                                    framework::dataset::make("InPlace", { false })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(),
diff --git a/tests/validation/NEON/ElementwisePower.cpp b/tests/validation/NEON/ElementwisePower.cpp
index 9ac9eec280..c1fa48b95f 100644
--- a/tests/validation/NEON/ElementwisePower.cpp
+++ b/tests/validation/NEON/ElementwisePower.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,11 +44,11 @@ namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.001f);
 /** Input data sets **/
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half> tolerance_fp16(static_cast<half>(0.01f));
 const auto              ElementwisePowerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                               framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 const auto ElementwisePowerFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                  framework::dataset::make("DataType", DataType::F32));
 const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
@@ -91,16 +91,24 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 // *INDENT-ON*
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset),
                                                                                                        InPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 
diff --git a/tests/validation/NEON/ElementwiseRound.cpp b/tests/validation/NEON/ElementwiseRound.cpp
index 620618cb0b..04c14ac430 100644
--- a/tests/validation/NEON/ElementwiseRound.cpp
+++ b/tests/validation/NEON/ElementwiseRound.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,23 +55,39 @@ template <typename T>
 using NERoundLayerQuantizedFixture = RoundQuantizedValidationFixture<Tensor, Accessor, NERoundLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                        DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NERoundLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                      DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
index 80788c893f..fa720d016f 100644
--- a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
+++ b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.01f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 #if defined(__aarch64__)
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(0);
@@ -83,23 +83,39 @@ template <typename T>
 using NERsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture<Tensor, Accessor, NERsqrtLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                        DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NERsqrtLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                      DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseSin.cpp b/tests/validation/NEON/ElementwiseSin.cpp
index 9c2d7ae268..348157e4e3 100644
--- a/tests/validation/NEON/ElementwiseSin.cpp
+++ b/tests/validation/NEON/ElementwiseSin.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 AbsoluteTolerance<float> tolerance_fp32(0.00001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 AbsoluteTolerance<float> tolerance_fp16(0.0005f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(0);
 } // namespace
@@ -59,23 +59,39 @@ template <typename T>
 using NESinLayerQuantizedFixture = SinQuantizedValidationFixture<Tensor, Accessor, NESinLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                      DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NESinLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                    DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/NEON/ElementwiseSquareDiff.cpp b/tests/validation/NEON/ElementwiseSquareDiff.cpp
index 9a86b541de..3c303449ab 100644
--- a/tests/validation/NEON/ElementwiseSquareDiff.cpp
+++ b/tests/validation/NEON/ElementwiseSquareDiff.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.01f);
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 /** Input data sets **/
 const auto ElementwiseSquaredDiffQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
@@ -62,10 +62,10 @@ const auto ElementwiseSquaredDiffS32Dataset = combine(combine(framework::dataset
                                                                                DataType::S32));
 const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                                       framework::dataset::make("DataType", DataType::S16));
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                        framework::dataset::make("DataType", DataType::F16));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 const auto ElementwiseSquaredDiffFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                        framework::dataset::make("DataType", DataType::F32));
 const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
@@ -185,16 +185,24 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset),
                                                                                                              InPlaceDataSet))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset),
diff --git a/tests/validation/NEON/Flatten.cpp b/tests/validation/NEON/Flatten.cpp
index 4ebd897c7a..266092ea64 100644
--- a/tests/validation/NEON/Flatten.cpp
+++ b/tests/validation/NEON/Flatten.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -81,22 +81,40 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<float>, framework::Datase
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<half>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
                                                                                                    framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    // Only validate if the cpu architecture supports FP16.
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
                                                                                                        framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 
 TEST_SUITE_END() // FlattenLayer
diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp
index 3cd1033ef9..758f8aa4b3 100644
--- a/tests/validation/NEON/Floor.cpp
+++ b/tests/validation/NEON/Floor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, 2022-2023 Arm Limited.
+ * Copyright (c) 2017-2020, 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -96,20 +96,36 @@ template <typename T>
 using NEFloorFixture = FloorValidationFixture<Tensor, Accessor, NEFloor, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFloorFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEFloorFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFloorFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index ee7e56227d..d3a1cbd565 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -48,11 +48,11 @@ namespace
 /** Tolerance for float operations */
 constexpr RelativeTolerance<float> rel_tolerance_f32(0.01f);  /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 const AbsoluteTolerance<float>            abs_tolerance_f16(0.3f);                   /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr float                           tolerance_num_f16 = 0.07f;                 /**< Tolerance number for FP16 */
-#endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+#endif                                                                               /* ARM_COMPUTE_ENABLE_FP16*/
 
 /** Tolerance for quantized asymmetric operations */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
@@ -61,9 +61,9 @@ constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(1);
 /** CNN data types */
 const auto CNNDataTypes = make("DataType",
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     DataType::F32,
 });
 
@@ -304,15 +304,23 @@ template <typename T>
 using NEFullyConnectedLayerDynamicBiasFixture = FullyConnectedWithDynamicBiasFixture<Tensor, Accessor, NEFullyConnectedLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
                                                                                                                         FullyConnectedParameters,
                                                                                                                         make("DataType", DataType::F16),
                                                                                                                 NoActivationFunctionDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
                            combine(datasets::FullyConnectedLayerWithActivationDataset(),
@@ -320,16 +328,32 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<half>, fr
                            make("DataType", DataType::F16),
                        ActivationFunctionsDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(),
                                                                                                                       FullyConnectedParameters,
                                                                                                                       make("DataType", DataType::F16),
                                                                                                               NoActivationFunctionDataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
                        make("DataType", DataType::F16),
@@ -338,7 +362,7 @@ FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFix
 {
 }
 TEST_SUITE_END()
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters,
diff --git a/tests/validation/NEON/FuseBatchNormalization.cpp b/tests/validation/NEON/FuseBatchNormalization.cpp
index 62265c6ac9..375e604281 100644
--- a/tests/validation/NEON/FuseBatchNormalization.cpp
+++ b/tests/validation/NEON/FuseBatchNormalization.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,9 +39,9 @@ namespace validation
 namespace
 {
 AbsoluteTolerance<float> absolute_tolerance_f32(0.001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 AbsoluteTolerance<float> absolute_tolerance_f16(0.2f);
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 } // namespace
 
 template <typename T>
@@ -108,7 +108,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationConvFixture<float>, fra
     validate(Accessor(_target_b), _reference_b, absolute_tolerance_f32);
 }
 TEST_SUITE_END() // FP32
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationConvFixture<half>, framework::DatasetMode::PRECOMMIT,
                                         combine(combine(combine(combine(combine(combine(
@@ -120,9 +120,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationConvFixture<half>, fram
                                                         with_gamma_values),
                                                         with_beta_values))
 {
-    // Validate outputs
-    validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
-    validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
+        validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationConvFixture<half>, framework::DatasetMode::NIGHTLY,
@@ -135,12 +143,20 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationConvFixture<half>, fram
                                                         with_gamma_values),
                                                         with_beta_values))
 {
-    // Validate outputs
-    validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
-    validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
+        validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // Convolution
 TEST_SUITE(DepthwiseConvolution)
@@ -177,7 +193,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationDWCFixture<float>, fram
 }
 
 TEST_SUITE_END() // FP32
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationDWCFixture<half>, framework::DatasetMode::PRECOMMIT,
                                         combine(combine(combine(combine(combine(combine(
@@ -189,9 +205,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFuseBatchNormalizationDWCFixture<half>, frame
                                                         with_gamma_values),
                                                         with_beta_values))
 {
-    // Validate outputs
-    validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
-    validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
+        validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationDWCFixture<half>, framework::DatasetMode::NIGHTLY,
@@ -204,13 +228,21 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFuseBatchNormalizationDWCFixture<half>, frame
                                                         with_gamma_values),
                                                         with_beta_values))
 {
-    // Validate outputs
-    validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
-    validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target_w), _reference_w, absolute_tolerance_f16);
+        validate(Accessor(_target_b), _reference_b, absolute_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE_END() // FP16
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // DepthwiseConvolution
 TEST_SUITE_END() // FuseBatchNormalization
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index 5f6a402204..bf74873385 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -56,17 +56,17 @@ using framework::dataset::make;
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Relative tolerance value for comparing reference's output against implementation's output for FP16 data types */
 const AbsoluteTolerance<float>      abs_tolerance_f16(0.2f);      /**< Absolute tolerance value for comparing reference's output against implementation's output for FP16 data types */
 constexpr float                     tolerance_num = 0.07f;        /**< Tolerance number for FP16 data types */
-#endif                                                            /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                                            /* ARM_COMPUTE_ENABLE_FP16 */
 /** CNN data types */
 const auto CNNDataTypes = make("DataType",
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     DataType::F32,
 });
 
@@ -398,21 +398,38 @@ DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine(
     ARM_COMPUTE_EXPECT((expected ==  bool(status)), framework::LogLevel::ERRORS);
 }
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
                                                                                                          make("ReshapeWeights", { true, false })),
                                                                                                  make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
                                                                                                        make("ReshapeWeights", { true, false })),
                                                                                                make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(BATCHED_MATMUL)
@@ -420,13 +437,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::Datase
                                                                                                                   make("ReshapeWeights", { false })),
                                                                                                           make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // BATCHED_MATMUL
 
 TEST_SUITE_END() // FP16
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index d25f43a330..01a16ebccb 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -141,20 +141,23 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
                                              TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
+                                             TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/255, 10)), // Invalid types
                                           }),
     make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+                                            TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                           }),
     make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
                                             TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
                                            }),
-    make("Expected", { true, false, false, false, true })),
+    make("Expected", { true, false, false, false, true, false })),
     a_info, b_info, output_info, expected)
 {
     // Lock tensors
@@ -357,12 +360,85 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantization
 TEST_SUITE_END() // DynamicQuantization
 
 #ifdef __aarch64__
+TEST_SUITE(UpdateStaticQuantInfoAfterConfigure)
+TEST_SUITE(QASYMM8_SIGNED)
+using NEGEMMLowpMatrixMultiplyCoreForUpdatedStaticQuantInfoAfterConfigureInt8Fixture =
+    GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreForUpdatedStaticQuantInfoAfterConfigureInt8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false }),
+        make("updated_sq_info_after_config", { true })))
+{
+    validate(Accessor(_target), _reference, tolerance_batched);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreForUpdatedStaticQuantInfoAfterConfigureInt8Fixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false }),
+        make("updated_sq_info_after_config", { true })))
+{
+    validate(Accessor(_target), _reference, tolerance_batched);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QASYMM8)
+using NEGEMMLowpMatrixMultiplyCoreForUpdatedStaticQuantInfoAfterConfigureUInt8Fixture =
+    GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreForUpdatedStaticQuantInfoAfterConfigureUInt8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false }),
+        make("updated_sq_info_after_config", { true })))
+{
+    validate(Accessor(_target), _reference, tolerance_batched);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreForUpdatedStaticQuantInfoAfterConfigureUInt8Fixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false }),
+        make("updated_sq_info_after_config", { true })))
+{
+    validate(Accessor(_target), _reference, tolerance_batched);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // UpdateStaticQuantInfoAfterConfigure
+
 // Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64
 TEST_SUITE(Dequant)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputAInfo", {
+        TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
+        TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/255, 10)),
+        TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/255, 10)), // Invalid types
+    }),
+    make("InputBInfo",{
+        TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/256, 10)),
+        TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(1.f/256, 10)),
+        TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+    }),
+    make("OutputInfo",{
+        TensorInfo(TensorShape(64U, 32U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 32U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 32U), 1, DataType::F32),
+    }),
+    make("Expected", { true, true, false })),
+    a_info, b_info, output_info, expected)
+{
+    // Lock tensors
+    Status status =  NEGEMMLowpMatrixMultiplyCore::validate(&a_info.clone()->set_is_resizable(false),
+                                                            &b_info.clone()->set_is_resizable(false),
+                                                            nullptr,
+                                                            &output_info.clone()->set_is_resizable(false));
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
 constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f);
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL,
     combine(
         datasets::SmallGEMMLowpDataset(),
+        make("DataTypeA", {DataType::QASYMM8_SIGNED, DataType::QASYMM8}),
+        make("DataTypeB", DataType::QASYMM8_SIGNED),
         make("accumulate", {true, false})
     ))
 {
@@ -373,6 +449,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFi
 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY,
     combine(
         datasets::LargeGEMMLowpDataset(),
+        make("DataTypeA", {DataType::QASYMM8_SIGNED, DataType::QASYMM8}),
+        make("DataTypeB", DataType::QASYMM8_SIGNED),
         make("accumulate", {false})
     ))
 {
diff --git a/tests/validation/NEON/GenerateProposalsLayer.cpp b/tests/validation/NEON/GenerateProposalsLayer.cpp
index 960c2054e7..7ad40abeaf 100644
--- a/tests/validation/NEON/GenerateProposalsLayer.cpp
+++ b/tests/validation/NEON/GenerateProposalsLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -385,16 +385,24 @@ FIXTURE_DATA_TEST_CASE(ComputeAllAnchors, NEComputeAllAnchorsFixture<float>, fra
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END() // FP32
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(ComputeAllAnchors, NEComputeAllAnchorsFixture<half>, framework::DatasetMode::ALL,
                        combine(combine(framework::dataset::make("NumAnchors", { 2, 4, 8 }), ComputeAllInfoDataset), framework::dataset::make("DataType", { DataType::F16 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp
index ef5e75c5db..ccd001ec01 100644
--- a/tests/validation/NEON/Im2Col.cpp
+++ b/tests/validation/NEON/Im2Col.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -104,25 +104,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<float>, framework::DatasetMode
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)),
                                                                                                     conv_args_small))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType",
                                                                                                           DataType::F16)),
                                                                                                   conv_args))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
 
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/InstanceNormalizationLayer.cpp b/tests/validation/NEON/InstanceNormalizationLayer.cpp
index 593ef0ad99..b349717673 100644
--- a/tests/validation/NEON/InstanceNormalizationLayer.cpp
+++ b/tests/validation/NEON/InstanceNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,13 +44,13 @@ namespace
 {
 /** Tolerance for float operations */
 AbsoluteTolerance<float> tolerance_f32(0.0015f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 // This precision is chosen based on the precision float16_t can provide
 // for the decimal numbers between 16 and 32 and decided based on multiple
 // times of execution of tests. Although, with randomly generated numbers
 // there is no gaurantee that this tolerance will be always large enough.
 AbsoluteTolerance<half> tolerance_f16(static_cast<half>(0.015625f));
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 } // namespace
 
 TEST_SUITE(NEON)
@@ -108,7 +108,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEInstanceNormalizationLayerFixture<float>, fra
 
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEInstanceNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(datasets::SmallShapes(),
@@ -116,11 +116,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEInstanceNormalizationLayerFixture<half>, fram
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                framework::dataset::make("InPlace", { false, true })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // InstanceNormalizationLayer
 TEST_SUITE_END() // Neon
diff --git a/tests/validation/NEON/L2NormalizeLayer.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp
index 56223c44d5..dfa1544894 100644
--- a/tests/validation/NEON/L2NormalizeLayer.cpp
+++ b/tests/validation/NEON/L2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,9 +44,9 @@ namespace
 {
 /** Tolerance for float operations */
 RelativeTolerance<float> tolerance_f32(0.00001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_f16(0.2f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 } // namespace
 
 TEST_SUITE(NEON)
@@ -116,15 +116,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<float>, framework::Da
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                        framework::dataset::make("Axis", { -1, 0, 1, 2 })),
                                framework::dataset::make("Epsilon", { 1e-6 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<half>, framework::DatasetMode::NIGHTLY,
@@ -132,11 +140,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<half>, framework::Dat
                                        framework::dataset::make("Axis", { -1, 0, 2 })),
                                framework::dataset::make("Epsilon", { 1e-6 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // L2NormalizeLayer
 TEST_SUITE_END() // Neon
diff --git a/tests/validation/NEON/LSTMLayer.cpp b/tests/validation/NEON/LSTMLayer.cpp
index c4645563bf..037307b6f5 100644
--- a/tests/validation/NEON/LSTMLayer.cpp
+++ b/tests/validation/NEON/LSTMLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,95 +43,107 @@ RelativeTolerance<float> tolerance_f32(0.00001f);
 RelativeTolerance<half>  tolerance_f16(half(0.1));
 } // namespace
 
+using framework::dataset::make;
+
 TEST_SUITE(NEON)
 TEST_SUITE(LSTMLayer)
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 2U), 1, DataType::U8),      // Wrong data type
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong input weights size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong recurrent weights size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell bias size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell state size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong output size
-                                                       TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong scratch size
-               }),
-               framework::dataset::make("InputWeightsInfo", { TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("RecurrentWeightsInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-                                                                  TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("CellBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(30U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("ProjectionBiasInfo", { TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                      TensorInfo(TensorShape(16U), 1, DataType::F32),
-               })),
-               framework::dataset::make("CellStateInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-               })),
-               framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-               })),
-               framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
-                                                             TensorInfo(TensorShape(12U, 2U), 1, DataType::F32),
-               })),
-               framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-               })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false })),
-               input_info, input_weights_info, recurrent_weights_info, cell_bias_info, projection_bias_info, cell_state_info, output_info, scratch_info, info, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputInfo", {
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::U8),      // Wrong data type
+        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Wrong input size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong input weights size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong recurrent weights size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell bias size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong cell state size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong output size
+        TensorInfo(TensorShape(8U, 2U), 1, DataType::F32),     // Wrong scratch size
+    }),
+    make("InputWeightsInfo", {
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(8U, 16U), 1, DataType::F32),
+    }),
+    make("RecurrentWeightsInfo", {
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),
+    }),
+    make("CellBiasInfo", {
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(30U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+    }),
+    make("ProjectionBiasInfo", {
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+    }),
+    make("CellStateInfo", {
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(11U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+    }),
+    make("OutputInfo", {
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(11U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
+    }),
+    make("ScratchInfo", {
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
+        TensorInfo(TensorShape(12U, 2U), 1, DataType::F32),
+    }),
+    make("ActivationInfo", {
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    }),
+    make("Expected", { false, false, false, false, false, false, false, false })),
+    input_info, input_weights_info, recurrent_weights_info, cell_bias_info,
+        projection_bias_info, cell_state_info, output_info, scratch_info, info, expected)
 {
     LSTMParams<ITensorInfo> lstm_params_info;
     auto cell_bias_clone = cell_bias_info.clone();
@@ -154,11 +166,14 @@ template <typename T>
 using NELSTMLayerFixture = LSTMLayerValidationFixture<Tensor, Accessor, NELSTMLayer, LSTMParams<ITensor>, T>;
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)),
-                                                                                                                 framework::dataset::make("ProjectionOpt", { true, false })),
-                                                                                                         framework::dataset::make("PeepholeOpt", { true, false })),
-                                                                                                 framework::dataset::make("UseLayerNorm", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture<float>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SmallLSTMLayerDataset(),
+        make("DataType", DataType::F32),
+        make("ProjectionOpt", { true, false }),
+        make("PeepholeOpt", { true, false }),
+        make("UseLayerNorm", { true, false }),
+        make("UseMemoryManager", { true, false })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -166,20 +181,32 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture<float>, framework::DatasetMo
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallLSTMLayerDataset(), framework::dataset::make("DataType",
-                                                                                                                        DataType::F16)),
-                                                                                                                framework::dataset::make("ProjectionOpt", { true, false })),
-                                                                                                        framework::dataset::make("PeepholeOpt", { true, false })),
-                                                                                                framework::dataset::make("UseLayerNorm", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NELSTMLayerFixture<half>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SmallLSTMLayerDataset(),
+        make("DataType", DataType::F16),
+        make("ProjectionOpt", { true, false }),
+        make("PeepholeOpt", { true, false }),
+        make("UseLayerNorm", { true, false }),
+        make("UseMemoryManager", { true, false })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
-    validate(Accessor(_target_scratch), _reference_scratch, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+        validate(Accessor(_target_scratch), _reference_scratch, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
+
 TEST_SUITE_END() // LSTMLayer
 TEST_SUITE_END() // Neon
 } // namespace validation
diff --git a/tests/validation/NEON/LSTMLayerQuantized.cpp b/tests/validation/NEON/LSTMLayerQuantized.cpp
index 6b98ee2b67..7e5e6fbedf 100644
--- a/tests/validation/NEON/LSTMLayerQuantized.cpp
+++ b/tests/validation/NEON/LSTMLayerQuantized.cpp
@@ -21,15 +21,17 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/runtime/Allocator.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
 
 #include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
 #include "tests/Utils.h"
-#include "tests/datasets/LSTMLayerDataset.h"
 #include "tests/framework/Asserts.h"
+#include "tests/framework/DatasetModes.h"
 #include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
 
 #include <vector>
@@ -214,6 +216,154 @@ TEST_CASE(RunSmall, framework::DatasetMode::PRECOMMIT)
     validate(Accessor(output_state), expected_output, tolerance_qsymm16);
 }
 
+TEST_CASE(RunSmallWithMemoryManager, framework::DatasetMode::PRECOMMIT)
+{
+    const int batch_size  = 2;
+    const int input_size  = 2;
+    const int output_size = 4;
+
+
+    QuantizationInfo qasymm(1.f / 128.f, 128);
+    QuantizationInfo qweights(1.f / 128.f, 128);
+    QuantizationInfo qsymm_3(8.f / 32768.f, 0);
+    QuantizationInfo qsymm_4(16.f / 32768.f, 0);
+
+    TensorShape input_shape{ input_size, batch_size };
+    TensorShape input_weights_shape{ input_size, output_size };
+    TensorShape recurrent_weights_shape{ output_size, output_size };
+    TensorShape output_shape{ output_size, batch_size};
+    TensorShape bias_shape{ output_size };
+
+    auto input_to_input_weights      = create_tensor<Tensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto input_to_forget_weights     = create_tensor<Tensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto input_to_cell_weights       = create_tensor<Tensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto input_to_output_weights     = create_tensor<Tensor>(input_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto recurrent_to_input_weights  = create_tensor<Tensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto recurrent_to_forget_weights = create_tensor<Tensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto recurrent_to_cell_weights   = create_tensor<Tensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto recurrent_to_output_weights = create_tensor<Tensor>(recurrent_weights_shape, DataType::QASYMM8, 1, qweights);
+    auto input_gate_bias             = create_tensor<Tensor>(bias_shape, DataType::S32);
+    auto forget_gate_bias            = create_tensor<Tensor>(bias_shape, DataType::S32);
+    auto cell_gate_bias              = create_tensor<Tensor>(bias_shape, DataType::S32);
+    auto output_gate_bias            = create_tensor<Tensor>(bias_shape, DataType::S32);
+
+    // LSTM input
+    auto input = create_tensor<Tensor>(input_shape, DataType::QASYMM8, 1, qasymm);
+
+    // LSTM output state
+    auto output_state = create_tensor<Tensor>(output_shape, DataType::QASYMM8, 1, qasymm);
+
+    // LSTM cell state
+    auto cell_state = create_tensor<Tensor>(output_shape, DataType::QSYMM16, 1, qsymm_4);
+
+    auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+    auto pool_mgr = std::make_shared<PoolManager>();
+    auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+    NELSTMLayerQuantized lstmq(mm);
+
+    lstmq.configure(&input, &input_to_input_weights, &input_to_forget_weights, &input_to_cell_weights, &input_to_output_weights,
+                    &recurrent_to_input_weights, &recurrent_to_forget_weights, &recurrent_to_cell_weights, &recurrent_to_output_weights,
+                    &input_gate_bias, &forget_gate_bias, &cell_gate_bias, &output_gate_bias, &cell_state, &output_state, &cell_state, &output_state);
+
+    input.allocator()->allocate();
+    input_to_input_weights.allocator()->allocate();
+    input_to_forget_weights.allocator()->allocate();
+    input_to_cell_weights.allocator()->allocate();
+    input_to_output_weights.allocator()->allocate();
+    recurrent_to_input_weights.allocator()->allocate();
+    recurrent_to_forget_weights.allocator()->allocate();
+    recurrent_to_cell_weights.allocator()->allocate();
+    recurrent_to_output_weights.allocator()->allocate();
+    input_gate_bias.allocator()->allocate();
+    forget_gate_bias.allocator()->allocate();
+    cell_gate_bias.allocator()->allocate();
+    output_gate_bias.allocator()->allocate();
+    cell_state.allocator()->allocate();
+    output_state.allocator()->allocate();
+
+    // Fill weights and biases
+    fill_tensor(input_to_input_weights, std::vector<uint8_t>{ 47,  168,
+                                                              66,  239,
+                                                               6,   42,
+                                                             237,  236 });
+
+    fill_tensor(input_to_forget_weights, std::vector<uint8_t> { 204,  193,
+                                                                148,  59,
+                                                                113,  17,
+                                                                 66, 197 });
+
+    fill_tensor(input_to_cell_weights, std::vector<uint8_t> { 172,  101,
+                                                              184, 209,
+                                                              165,  82,
+                                                              108, 209 });
+
+    fill_tensor(input_to_output_weights, std::vector<uint8_t> { 203, 244,
+                                                                219, 114,
+                                                                130,  16,
+                                                                163, 222 });
+
+    fill_tensor(recurrent_to_input_weights, std::vector<uint8_t> { 162, 168,  7,  95,
+                                                                    91, 155, 108, 216,
+                                                                   255, 100,  48, 188,
+                                                                    58,  37, 186, 147 });
+
+    fill_tensor(recurrent_to_forget_weights, std::vector<uint8_t> {  46,  58,  47, 170,
+                                                                    246,  96,  12,  99,
+                                                                     68,  23, 186, 161,
+                                                                    237, 164,  89,   6 });
+
+    fill_tensor(recurrent_to_cell_weights, std::vector<uint8_t> { 234,  99,   71, 206,
+                                                                  205, 159,   64, 253,
+                                                                  191, 148,  116,   8,
+                                                                  209, 136,   59, 138 });
+
+    fill_tensor(recurrent_to_output_weights, std::vector<uint8_t> {  23, 241, 137, 36,
+                                                                    206,   5, 227, 56,
+                                                                    254, 176, 231, 47,
+                                                                     18, 201, 161, 11 });
+
+    fill_tensor(input_gate_bias, std::vector<int>  {-103038,   30525,  115255, -38154 });
+    fill_tensor(forget_gate_bias, std::vector<int> { -23428,  126970,  116806,  46307 });
+    fill_tensor(cell_gate_bias, std::vector<int>   { 128006,   69949,  -42808,  42568 });
+    fill_tensor(output_gate_bias, std::vector<int> { -67066,  -53607,   47233,  7300  });
+
+    SimpleTensor<uint8_t> expected_output(output_shape, DataType::QASYMM8, 1, qasymm);
+
+    // Initialize state
+    fill_tensor(output_state, std::vector<uint8_t> { 128, 128, 128, 128,
+                                                     128, 128, 128, 128 });
+    fill_tensor(cell_state, std::vector<int16_t> { 0, 0, 0, 0,
+                                                   0, 0, 0, 0 });
+
+    // First input
+    fill_tensor(input, std::vector<uint8_t> { 106,  193,
+                                              155,  150 });
+
+    fill_tensor(expected_output, std::vector<uint8_t> { 128, 130,  36, 134,
+                                                        128, 131,  35, 133 });
+
+    Allocator alloc{};
+    mm->populate(alloc, 1);
+
+    lstmq.run();
+    validate(Accessor(output_state), expected_output, tolerance_qsymm16);
+
+    // Second input
+    fill_tensor(expected_output, std::vector<uint8_t> { 128, 129, 12, 137,
+                                                        128, 131, 10, 136 });
+    lstmq.run();
+    validate(Accessor(output_state), expected_output, tolerance_qsymm16);
+
+    // Third input
+    fill_tensor(expected_output, std::vector<uint8_t> { 128, 129, 8, 140,
+                                                        128, 130, 6, 138 });
+    lstmq.run();
+    validate(Accessor(output_state), expected_output, tolerance_qsymm16);
+
+    mm->clear();
+}
+
 TEST_CASE(RunLarge, framework::DatasetMode::PRECOMMIT)
 {
     const int batch_size  = 16;
diff --git a/tests/validation/NEON/LogSoftmaxLayer.cpp b/tests/validation/NEON/LogSoftmaxLayer.cpp
index a7ab033359..6718597c6b 100644
--- a/tests/validation/NEON/LogSoftmaxLayer.cpp
+++ b/tests/validation/NEON/LogSoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,9 +52,9 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     DataType::F32,
 });
 } // namespace
@@ -66,34 +66,58 @@ template <typename T>
 using NELogSoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NELogSoftmaxLayer, T, true>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                     framework::dataset::make("DataType", DataType::F16)),
                                                                                                                     framework::dataset::make("Beta", { 1.0f, 2.0f })),
                                                                                                             framework::dataset::make("Axis", { 0, -1 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                       framework::dataset::make("DataType", DataType::F16)),
                                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })),
                                                                                                               framework::dataset::make("Axis", { 0, -3, 2 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
                                                                                                                   framework::dataset::make("Beta", { 1.0f, 2.0f })),
                                                                                                           framework::dataset::make("Axis", { 0 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() //FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall2D, NELogSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp
index f22bd9e86a..ef79faba51 100644
--- a/tests/validation/NEON/MatMul.cpp
+++ b/tests/validation/NEON/MatMul.cpp
@@ -264,7 +264,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
 TEST_SUITE_END() // BF16
 #endif           /* ARM_COMPUTE_ENABLE_BF16 */
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEMatMulFixture<half>,
@@ -279,8 +279,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        NEMatMulFixture<half>,
@@ -295,8 +303,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors,
                        NEMatMulDynamicTensorsFixture<half>,
@@ -312,11 +328,19 @@ FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors,
 }),
 make("NumberOfRuns", 5)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/MaxUnpoolingLayer.cpp b/tests/validation/NEON/MaxUnpoolingLayer.cpp
index 0eb021fe71..ac1fde28c9 100644
--- a/tests/validation/NEON/MaxUnpoolingLayer.cpp
+++ b/tests/validation/NEON/MaxUnpoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023 Arm Limited.
+ * Copyright (c) 2020-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,7 +62,7 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framewor
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END() // FP32
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
                                                                                                                   framework::dataset::make("DataType", DataType::F16))),
@@ -70,11 +70,19 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework
 
                                                                                                                  ))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp
index 085f3608a0..02ce06a843 100644
--- a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,10 +44,10 @@ namespace validation
 namespace
 {
 /** Tolerance for float operations */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half> tolerance_f16(half(0.2f));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-RelativeTolerance<float>   tolerance_f32(1e-4f);
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+RelativeTolerance<float>   tolerance_f32(0.001f);
 RelativeTolerance<uint8_t> tolerance_qasymm8(1);
 } // namespace
 
@@ -77,26 +77,42 @@ template <typename T>
 using NEMeanStdDevNormalizationLayerFixture = MeanStdDevNormalizationLayerValidationFixture<Tensor, Accessor, NEMeanStdDevNormalizationLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(),
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("InPlace", { false, true })),
                        framework::dataset::make("Epsilon", { 1e-3 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                        framework::dataset::make("InPlace", { false, true })),
                                                                                                                        framework::dataset::make("Epsilon", { 1e-8 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(),
@@ -107,7 +123,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<float>, f
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DMeanStdDevNormalizationShapes(),
                                                                                                                         framework::dataset::make("DataType", DataType::F32)),
                                                                                                                         framework::dataset::make("InPlace", { false, true })),
                                                                                                                         framework::dataset::make("Epsilon", { 1e-8 })))
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
index 7260d1044e..793c2a1104 100644
--- a/tests/validation/NEON/NormalizationLayer.cpp
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,9 +44,9 @@ namespace validation
 namespace
 {
 /** Tolerance for float operations */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<float> tolerance_f16(0.1f);
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
 
 /** Input data set. */
@@ -95,17 +95,25 @@ template <typename T>
 using NENormalizationLayerFixture = NormalizationValidationFixture<Tensor, Accessor, NENormalizationLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(NormalizationDataset,
                                                                                                                  framework::dataset::make("DataType", DataType::F16)),
                                                                                                          framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), NormalizationDatasetFP32),
diff --git a/tests/validation/NEON/PReluLayer.cpp b/tests/validation/NEON/PReluLayer.cpp
index 69a05175ad..de07524a15 100644
--- a/tests/validation/NEON/PReluLayer.cpp
+++ b/tests/validation/NEON/PReluLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,13 +54,13 @@ const auto PReluLayerQASYMM8SignedDataset = combine(combine(framework::dataset::
 const auto PReluLayerFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                            framework::dataset::make("DataType", DataType::F32));
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> tolerance_fp16(0.001f);
 
 const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                            framework::dataset::make("DataType", DataType::F16));
 
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 } // namespace
 
@@ -157,21 +157,37 @@ TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPReluLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), PReluLayerFP16Dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPReluLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), PReluLayerFP16Dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_fp16, 0.01);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPReluLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), PReluLayerFP32Dataset))
diff --git a/tests/validation/NEON/PadLayer.cpp b/tests/validation/NEON/PadLayer.cpp
index 4947af3423..f175cdb1e9 100644
--- a/tests/validation/NEON/PadLayer.cpp
+++ b/tests/validation/NEON/PadLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -144,26 +144,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture<float>, framework::DatasetMode
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPaddingFixture<half>, framework::DatasetMode::ALL,
                        combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F16 })),
                                        PaddingSizesDataset),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture<half>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })),
                                        PaddingSizesDataset),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 
 TEST_SUITE(Quantized)
diff --git a/tests/validation/NEON/Permute.cpp b/tests/validation/NEON/Permute.cpp
index d897bbbe07..e9939105cd 100644
--- a/tests/validation/NEON/Permute.cpp
+++ b/tests/validation/NEON/Permute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -174,6 +174,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture<uint32_t>, framework::DatasetM
 }
 TEST_SUITE_END()
 
+#ifdef ARM_COMPUTE_ENABLE_FP16
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPermuteFixture<float16_t>, framework::DatasetMode::PRECOMMIT,
+                       PermuteParametersSmall * framework::dataset::make("DataType", DataType::F16))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture<float16_t>, framework::DatasetMode::NIGHTLY,
+                       PermuteParametersLarge * framework::dataset::make("DataType", DataType::F16))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
index 964d1c5deb..f93bafcff6 100644
--- a/tests/validation/NEON/PixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -101,7 +101,17 @@ const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
                            framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)),                               \
                            (INPLACE_DATASET)))                                                                            \
     {                                                                                                                     \
-        VALIDATE                                                                                                          \
+        if((DataType::DT1 != DataType::F16 &&                                                                             \
+            DataType::DT2 != DataType::F16 &&                                                                             \
+            DataType::DT3 != DataType::F16) || CPUInfo::get().has_fp16())                                                 \
+        {                                                                                                                 \
+            VALIDATE                                                                                                      \
+        }                                                                                                                 \
+        else                                                                                                              \
+        {                                                                                                                 \
+            ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");                       \
+            framework::ARM_COMPUTE_PRINT_INFO();                                                                          \
+        }                                                                                                                 \
     }
 
 // *INDENT-ON*
@@ -286,6 +296,20 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8SignedFixture,
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
+#ifdef ARM_COMPUTE_ENABLE_SME2
+FIXTURE_DATA_TEST_CASE(RunSMEMul, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(datasets::SMEMulShapes(),
+                                                                                                                     framework::dataset::make("DataTypeIn1", DataType::QASYMM8_SIGNED),
+                                                                                                                     framework::dataset::make("DataTypeIn2", DataType::QASYMM8_SIGNED),
+                                                                                                                     framework::dataset::make("DataTypeOut", DataType::QASYMM8_SIGNED),
+                                                                                                                     framework::dataset::make("Scale", { scale_unity }),
+                                                                                                                     PixelWiseMultiplicationPolicySTZDataset,
+                                                                                                                     PixelWiseMultiplicationQASYMM8QuantDataset,
+                                                                                                                     OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+#endif // ARM_COMPUTE_ENABLE_SME2
 FIXTURE_DATA_TEST_CASE(RunSmallInPlace, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        framework::dataset::make("DataTypeIn1", DataType::QASYMM8_SIGNED)),
                        framework::dataset::make("DataTypeIn2", DataType::QASYMM8_SIGNED)),
@@ -298,6 +322,22 @@ FIXTURE_DATA_TEST_CASE(RunSmallInPlace, NEPixelWiseMultiplicationQASYMM8SignedFi
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
+TEST_SUITE(Broadcast)
+#ifdef ARM_COMPUTE_ENABLE_SME2
+FIXTURE_DATA_TEST_CASE(RunSMEMul, NEPixelWiseMultiplicationBroadcastQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(datasets::SMEMulShapesBroadcast(),
+                                                                                                                     framework::dataset::make("DataTypeIn1", DataType::QASYMM8_SIGNED),
+                                                                                                                     framework::dataset::make("DataTypeIn2", DataType::QASYMM8_SIGNED),
+                                                                                                                     framework::dataset::make("DataTypeOut", DataType::QASYMM8_SIGNED),
+                                                                                                                     framework::dataset::make("Scale", { scale_unity }),
+                                                                                                                     PixelWiseMultiplicationPolicySTZDataset,
+                                                                                                                     PixelWiseMultiplicationQASYMM8QuantDataset,
+                                                                                                                     OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+#endif // ARM_COMPUTE_ENABLE_SME2
+TEST_SUITE_END() // Broadcast
 TEST_SUITE_END() // ScaleUnity
 TEST_SUITE_END() // QASYMM8_SIGNED
 
@@ -531,7 +571,7 @@ TEST_SUITE_END() // Broadcast
 
 TEST_SUITE_END() // S32toS32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16toF16)
 
 TEST_SUITE(Scale255)
@@ -539,7 +579,7 @@ PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF16Fixture<half_flo
 TEST_SUITE_END() // Scale255
 
 TEST_SUITE_END() // F16toF16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(F32toF32)
 
diff --git a/tests/validation/NEON/Pooling3dLayer.cpp b/tests/validation/NEON/Pooling3dLayer.cpp
index 07054462f5..1b30023ca5 100644
--- a/tests/validation/NEON/Pooling3dLayer.cpp
+++ b/tests/validation/NEON/Pooling3dLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,9 +70,9 @@ const auto Pooling3dLayerDatasetQASYMM8Large = combine(combine(combine(combine(f
 using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>;
 
 constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);     /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
-#endif                                                       /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                                       /* ARM_COMPUTE_ENABLE_FP16 */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);   /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */
 
@@ -239,14 +239,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<float>, framework::Data
 TEST_SUITE_END() // GlobalPooling
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall,
                                                                                                            framework::dataset::make("DataType", DataType::F16))))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 
@@ -254,8 +262,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::Datas
                                                                                                            framework::dataset::make("DataType",
                                                                                                                    DataType::F16))))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(GlobalPooling)
@@ -273,8 +289,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<half>, framework::Datas
                                     framework::dataset::make("ExcludePadding", {false, true})),
                                     framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 
@@ -286,8 +310,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallGlobal, NEPooling3dLayerGlobalFixture<half>, fram
                                     framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
                                     framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::DatasetMode::NIGHTLY,
@@ -302,15 +334,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::Datas
                                     framework::dataset::make("ExcludePadding", false)),
                                     framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 // clang-format on
 // *INDENT-ON*
 TEST_SUITE_END() // GlobalPooling
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 TEST_SUITE(Quantized)
 
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index 161fe627cc..f635a63bbe 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -58,9 +58,9 @@ const auto PoolingLayerDatasetQASYMM8Small = combine(combine(combine(framework::
                                                      framework::dataset::make("ExcludePadding", { true }));
 
 constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);     /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#endif                                                       /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                                       /* ARM_COMPUTE_ENABLE_FP16 */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);   /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */
 const auto                           pool_data_layout_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
@@ -226,7 +226,7 @@ FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<f
 TEST_SUITE_END() // CornerCases
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
                                                                                                                   combine(PoolingLayerIndicesDatasetFPSmall,
@@ -235,23 +235,47 @@ FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework
                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                                                                                                   framework::dataset::make("UseKernelIndices", { false })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
-    validate(Accessor(_target_indices), _ref_indices);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+        validate(Accessor(_target_indices), _ref_indices);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall,
                                                                                                                  framework::dataset::make("DataType", DataType::F16))),
                                                                                                          pool_data_layout_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
                                                                                                                framework::dataset::make("DataType", DataType::F16))),
                                                                                                        pool_data_layout_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE(CornerCases)
 FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(pool_outside_input_dataset,
@@ -259,12 +283,20 @@ FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<h
                                                 DataType::F16)),
                        pool_data_layout_dataset))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // CornerCases
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 
 TEST_SUITE(Quantized)
diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp
index bab7490762..fac5d73abd 100644
--- a/tests/validation/NEON/QuantizationLayer.cpp
+++ b/tests/validation/NEON/QuantizationLayer.cpp
@@ -125,50 +125,90 @@ FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture<floa
     validate(Accessor(_target), _reference, tolerance_u16);
 }
 TEST_SUITE_END() // FP32
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_u8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_u8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8Signed, NEQuantizationLayerQASYMM8SignedFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(QuantizationSmallShapes,
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_s8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_s8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_u16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_u16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLargeQASYMM8, NEQuantizationLayerQASYMM8Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes,
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_u8);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_u8);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(QuantizationLargeShapes,
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_u16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_u16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           //  ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE_END() // Float
 
 TEST_SUITE(Quantized)
diff --git a/tests/validation/NEON/RNNLayer.cpp b/tests/validation/NEON/RNNLayer.cpp
index 979aa0f2c5..d6e4b7ac0e 100644
--- a/tests/validation/NEON/RNNLayer.cpp
+++ b/tests/validation/NEON/RNNLayer.cpp
@@ -40,10 +40,10 @@ namespace validation
 namespace
 {
 RelativeTolerance<float> tolerance_f32(0.001f); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType:F32 */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<half> tolerance_f16(half(0.1)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType:F16 */
 constexpr float         abs_tolerance_f16(0.02f); /**< Absolute tolerance value for comparing reference's output against implementation's output for DataType:F16 */
-#endif                                            /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif                                            /* ARM_COMPUTE_ENABLE_FP16 */
 } // namespace
 
 TEST_SUITE(NEON)
@@ -134,15 +134,23 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERNNLayerFixture<float>, framework::DatasetMod
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERNNLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallRNNLayerDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16, 0.02f, abs_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16, 0.02f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // RNNLayer
 TEST_SUITE_END() // Neon
 } // namespace validation
diff --git a/tests/validation/NEON/ROIAlignLayer.cpp b/tests/validation/NEON/ROIAlignLayer.cpp
index 98c92a0b20..1f3db04ffd 100644
--- a/tests/validation/NEON/ROIAlignLayer.cpp
+++ b/tests/validation/NEON/ROIAlignLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,10 +47,10 @@ namespace
 RelativeTolerance<float> relative_tolerance_f32(0.01f);
 AbsoluteTolerance<float> absolute_tolerance_f32(0.001f);
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 RelativeTolerance<float> relative_tolerance_f16(0.01f);
 AbsoluteTolerance<float> absolute_tolerance_f16(0.001f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
 constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1);
@@ -115,17 +115,25 @@ FIXTURE_DATA_TEST_CASE(SmallROIAlignLayerFloat, NEROIAlignLayerFloatFixture, fra
     // Validate output
     validate(Accessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32);
 }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 using NEROIAlignLayerHalfFixture = ROIAlignLayerFixture<Tensor, Accessor, NEROIAlignLayer, half, half>;
 FIXTURE_DATA_TEST_CASE(SmallROIAlignLayerHalf, NEROIAlignLayerHalfFixture, framework::DatasetMode::ALL,
                        framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(),
                                                                                framework::dataset::make("DataType", { DataType::F16 })),
                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, relative_tolerance_f16, .02f, absolute_tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, relative_tolerance_f16, .02f, absolute_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/NEON/Range.cpp b/tests/validation/NEON/Range.cpp
index fda7b2c448..0df5e86186 100644
--- a/tests/validation/NEON/Range.cpp
+++ b/tests/validation/NEON/Range.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -144,7 +144,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture<int16_t>, framework::DatasetMode
 TEST_SUITE_END() // S16
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
                                                                                                               framework::dataset::make("DataType", DataType::F16),
@@ -152,11 +152,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture<half>, framework::DatasetMode::P
                                                                                                           float_step_dataset),
                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo() })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance, 0.f, abs_tolerance);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance, 0.f, abs_tolerance);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NERangeFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
diff --git a/tests/validation/NEON/ReduceMean.cpp b/tests/validation/NEON/ReduceMean.cpp
index 8ca0bb53a7..e5692693bd 100644
--- a/tests/validation/NEON/ReduceMean.cpp
+++ b/tests/validation/NEON/ReduceMean.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,9 @@ namespace validation
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr AbsoluteTolerance<float> tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
-#endif                                                   // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif                                                   // ARM_COMPUTE_ENABLE_FP16
 #ifdef __aarch64__
 constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);    /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric quantized type */
 constexpr AbsoluteTolerance<int8_t>  tolerance_s8(1);    /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric quantized type */
@@ -93,15 +93,23 @@ using NEReduceMeanFixture = ReduceMeanFixture<Tensor, Accessor, NEReduceMean, T>
 
 TEST_SUITE(Float)
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEReduceMeanFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), concat(axis_keep, axis_drop)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
@@ -109,11 +117,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                        framework::DatasetMode::NIGHTLY,
                        combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), concat(axis_keep, axis_drop)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEReduceMeanFixture<float>,
diff --git a/tests/validation/NEON/ReductionOperation.cpp b/tests/validation/NEON/ReductionOperation.cpp
index 48c3a1a788..727e880d28 100644
--- a/tests/validation/NEON/ReductionOperation.cpp
+++ b/tests/validation/NEON/ReductionOperation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,10 +45,10 @@ namespace
 /** Tolerance for float operations */
 AbsoluteTolerance<float> tolerance_f32(0.0001f);
 RelativeTolerance<float> rel_tolerance_f32(0.0001f);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 AbsoluteTolerance<float> tolerance_f16(0.2f);
 RelativeTolerance<float> rel_tolerance_f16(0.1f);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
 /** Tolerance for quantized operations */
 RelativeTolerance<float> tolerance_quantized(1.f);
 
@@ -149,22 +149,38 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture<float>, framework::
 }
 TEST_SUITE_END() // FP32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), Axises), ReductionOperations), KeepDims))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), Axises), ReductionOperations), KeepDims))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
 template <typename T>
 using NEReductionOperationQuantizedFixture = ReductionOperationQuantizedFixture<Tensor, Accessor, NEReductionOperation, T>;
diff --git a/tests/validation/NEON/Reverse.cpp b/tests/validation/NEON/Reverse.cpp
index 7b5337f14b..7d99bd614d 100644
--- a/tests/validation/NEON/Reverse.cpp
+++ b/tests/validation/NEON/Reverse.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -94,7 +94,7 @@ using NEReverseFixture = ReverseValidationFixture<Tensor, Accessor, NEReverse, T
 
 TEST_SUITE(Float)
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEReverseFixture<half>,
@@ -105,8 +105,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
                            make("use_negative_axis", { true, false }),
                            make("use_inverted_axis", { true, false })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
@@ -118,11 +126,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                            make("use_negative_axis", { true, false }),
                            make("use_inverted_axis", { true, false })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index f1209a21ac..55de2d6281 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -79,10 +79,10 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
 constexpr AbsoluteTolerance<int8_t>  tolerance_s8(1);
 constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
 RelativeTolerance<float>             tolerance_f32(0.05);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 constexpr float         abs_tolerance_f16(0.01f);
 RelativeTolerance<half> tolerance_f16(half(0.1));
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 
 constexpr float tolerance_num_s16 = 0.01f;
 constexpr float tolerance_num_f32 = 0.01f;
@@ -153,9 +153,9 @@ TEST_CASE(SupportDataType, framework::DatasetMode::ALL)
         { DataType::U64, false },
         { DataType::S64, false },
         { DataType::BFLOAT16, false },
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
         { DataType::F16, true },
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#endif // ARM_COMPUTE_ENABLE_FP16
         { DataType::F32, true },
         { DataType::F64, false },
         { DataType::SIZET, false },
@@ -381,57 +381,97 @@ FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture<float>, framewo
     validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
 }
 TEST_SUITE_END() // FP32
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 const auto f16_shape      = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16));
 const auto f16_shape_nhwc = combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F16));
 FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleSamplingPolicySet))
 {
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+    if(CPUInfo::get().has_fp16())
+    {
+        //Create valid region
+        TensorInfo        src_info(_shape, 1, _data_type);
+        const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
-    // Validate output
-    validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+        // Validate output
+        validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet))
 {
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+    if(CPUInfo::get().has_fp16())
+    {
+        //Create valid region
+        TensorInfo        src_info(_shape, 1, _data_type);
+        const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
-    // Validate output
-    validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+        // Validate output
+        validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunMediumNHWC, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet))
 {
-    //Create valid region
-    TensorInfo  src_info(_shape, 1, _data_type);
-    ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+    if(CPUInfo::get().has_fp16())
+    {
+        //Create valid region
+        TensorInfo  src_info(_shape, 1, _data_type);
+        ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
-    // Validate output
-    validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+        // Validate output
+        validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunMediumMixedDataLayoutNHWC, NEScaleMixedDataLayoutFixture<half>, framework::DatasetMode::PRECOMMIT, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet))
 {
-    //Create valid region
-    TensorInfo  src_info(_shape, 1, _data_type);
-    ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+    if(CPUInfo::get().has_fp16())
+    {
+        //Create valid region
+        TensorInfo  src_info(_shape, 1, _data_type);
+        ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
-    // Validate output
-    validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+        // Validate output
+        validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleAlignCornersSamplingPolicySet))
 {
-    //Create valid region
-    TensorInfo  src_info(_shape, 1, _data_type);
-    ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+    if(CPUInfo::get().has_fp16())
+    {
+        //Create valid region
+        TensorInfo  src_info(_shape, 1, _data_type);
+        ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
-    // Validate output
-    validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+        // Validate output
+        validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 TEST_SUITE_END() // Float
 
 TEST_SUITE(Integer)
diff --git a/tests/validation/NEON/Scatter.cpp b/tests/validation/NEON/Scatter.cpp
new file mode 100644
index 0000000000..1100929874
--- /dev/null
+++ b/tests/validation/NEON/Scatter.cpp
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/NEON/functions/NEScatter.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/ScatterDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ScatterLayerFixture.h"
+#include "tests/framework/datasets/Datasets.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename T>
+using NEScatterLayerFixture = ScatterValidationFixture<Tensor, Accessor, NEScatter, T>;
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
+RelativeTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
+RelativeTolerance<int32_t> tolerance_int(0); /**< Tolerance value for comparing reference's output against implementation's output for integer data types */
+} // namespace
+
+using framework::dataset::make;
+
+TEST_SUITE(NEON)
+TEST_SUITE(Scatter)
+DATA_TEST_CASE(Validate, framework::DatasetMode::DISABLED, zip(
+    make("InputInfo", { TensorInfo(TensorShape(9U), 1, DataType::F32),    // Mismatching data types
+                        TensorInfo(TensorShape(15U), 1, DataType::F32),   // Valid
+                        TensorInfo(TensorShape(15U), 1, DataType::U8),   // Valid
+                        TensorInfo(TensorShape(8U), 1, DataType::F32),
+                        TensorInfo(TensorShape(217U), 1, DataType::F32),    // Mismatch input/output dims.
+                        TensorInfo(TensorShape(217U), 1, DataType::F32),    // Updates dim higher than Input/Output dims.
+                        TensorInfo(TensorShape(12U), 1, DataType::F32),     // Indices wrong datatype.
+                        TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), // Number of updates != number of indices
+                        TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), // index_len != (dst_dims - upt_dims + 1)
+                        TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), // index_len > 5
+    }),
+    make("UpdatesInfo",{TensorInfo(TensorShape(3U), 1, DataType::F16),
+                        TensorInfo(TensorShape(15U), 1, DataType::F32),
+                        TensorInfo(TensorShape(15U), 1, DataType::U8),
+                        TensorInfo(TensorShape(2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(217U), 1, DataType::F32),
+                        TensorInfo(TensorShape(217U, 3U), 1, DataType::F32),
+                        TensorInfo(TensorShape(2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(9U, 3U, 2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(17U, 3U, 2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(1U), 1, DataType::F32),
+    }),
+    make("IndicesInfo",{TensorInfo(TensorShape(1U, 3U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 15U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 15U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 2U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 271U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 271U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32),
+                        TensorInfo(TensorShape(1U, 4U), 1, DataType::S32),
+                        TensorInfo(TensorShape(3U, 2U), 1, DataType::S32),
+                        TensorInfo(TensorShape(6U, 2U), 1, DataType::S32),
+    }),
+    make("OutputInfo",{TensorInfo(TensorShape(9U), 1, DataType::F16),
+                       TensorInfo(TensorShape(15U), 1, DataType::F32),
+                       TensorInfo(TensorShape(15U), 1, DataType::U8),
+                       TensorInfo(TensorShape(8U), 1, DataType::F32),
+                       TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                       TensorInfo(TensorShape(271U), 1, DataType::F32),
+                       TensorInfo(TensorShape(12U), 1, DataType::F32),
+                       TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32),
+                       TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32),
+                       TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32),
+    }),
+    make("ScatterInfo",{ ScatterInfo(ScatterFunction::Add, false),
+                         ScatterInfo(ScatterFunction::Max, false),
+                         ScatterInfo(ScatterFunction::Max, false),
+                         ScatterInfo(ScatterFunction::Min, false),
+                         ScatterInfo(ScatterFunction::Add, false),
+                         ScatterInfo(ScatterFunction::Update, false),
+                         ScatterInfo(ScatterFunction::Sub, false),
+                         ScatterInfo(ScatterFunction::Sub, false),
+                         ScatterInfo(ScatterFunction::Update, false),
+                         ScatterInfo(ScatterFunction::Update, false),
+    }),
+    make("Expected", { false, true, true, true, false, false, false, false, false, false })),
+    input_info, updates_info, indices_info, output_info, scatter_info, expected)
+{
+    const Status status = NEScatter::validate(&input_info, &updates_info, &indices_info, &output_info, scatter_info);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
+const auto allScatterFunctions = make("ScatterFunction",
+    {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max });
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScatterLayerFixture<float>, framework::DatasetMode::DISABLED,
+    combine(datasets::Small1DScatterDataset(),
+        make("DataType", {DataType::F32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+// With this test, src should be passed as nullptr.
+FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, NEScatterLayerFixture<float>, framework::DatasetMode::DISABLED,
+    combine(datasets::Small1DScatterDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Add}),
+        make("ZeroInit", {true}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+// Updates/src/dst have same no. dims.
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDim, NEScatterLayerFixture<float>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMultiDimDataset(),
+        make("DataType", {DataType::F32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+// m+1-D to m+n-D cases
+FIXTURE_DATA_TEST_CASE(RunSmallMultiIndices, NEScatterLayerFixture<float>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMultiIndicesDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add }),
+        make("ZeroInit", {false}),
+        make("Inplace", {false, true}),
+        make("Padding", {true})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+// m+k, k-1-D m+n-D case
+FIXTURE_DATA_TEST_CASE(RunSmallBatchedMultiIndices, NEScatterLayerFixture<float>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterBatchedDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}),
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+// m+k, k-1-D m+n-D case
+FIXTURE_DATA_TEST_CASE(RunSmallScatterScalar, NEScatterLayerFixture<float>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterScalarDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}),
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false}))) // NOTE: Padding not supported in this datset
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+
+
+// NOTE: Padding is disabled for the SmallScatterMixedDataset due certain shapes not supporting padding.
+//       Padding is well tested in F32 Datatype test cases.
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<half>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::F16}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Integer)
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<int32_t>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::S32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<int16_t>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::S16}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<int8_t>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::S8}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S8
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<uint32_t>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::U32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U32
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<uint16_t>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::U16}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, NEScatterLayerFixture<uint8_t>, framework::DatasetMode::DISABLED,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::U8}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(Accessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U8
+TEST_SUITE_END() // Integer
+
+TEST_SUITE_END() // Scatter
+TEST_SUITE_END() // NEON
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Select.cpp b/tests/validation/NEON/Select.cpp
index 40744581b0..25d510aa64 100644
--- a/tests/validation/NEON/Select.cpp
+++ b/tests/validation/NEON/Select.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -96,15 +96,22 @@ using NESelectFixture = SelectValidationFixture<Tensor, Accessor, NESelect, T>;
 
 TEST_SUITE(Float)
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NESelectFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
@@ -112,11 +119,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                        framework::DatasetMode::NIGHTLY,
                        combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/Slice.cpp b/tests/validation/NEON/Slice.cpp
index d5549c8cdb..2ec6d09134 100644
--- a/tests/validation/NEON/Slice.cpp
+++ b/tests/validation/NEON/Slice.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -67,15 +67,22 @@ template <typename T>
 using NESliceFixture = SliceFixture<Tensor, Accessor, NESlice, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NESliceFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(datasets::SmallSliceDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
@@ -83,11 +90,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                        framework::DatasetMode::NIGHTLY,
                        combine(datasets::LargeSliceDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 94d0866c38..e428d7958b 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -54,9 +54,9 @@ constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(1);
 /** CNN data types */
 const auto CNNDataTypes = make("DataType",
 {
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
     DataType::F16,
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     DataType::F32,
 });
 } // namespace
@@ -157,7 +157,7 @@ DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
 }
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
     combine(
@@ -166,8 +166,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<half>, framework::Datas
         make("Beta", { 1.0f, 2.0f }),
         make("Axis", { 0, -1 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
     combine(
@@ -176,8 +183,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::Dataset
         make("Beta", { 1.0f, 2.0f }),
         make("Axis", { 0, 1 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
     combine(
@@ -186,8 +201,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::Datas
         make("Beta", { 1.0f }),
         make("Axis", { 0, 2, -1 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY,
     combine(
@@ -196,11 +219,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::Dataset
         make("Beta", { 1.0f, 2.0f }),
         make("Axis", { 0 })))
 {
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() //FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
diff --git a/tests/validation/NEON/Split.cpp b/tests/validation/NEON/Split.cpp
index 72df2ad663..d7aa2e532c 100644
--- a/tests/validation/NEON/Split.cpp
+++ b/tests/validation/NEON/Split.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -98,17 +98,25 @@ template <typename T>
 using NESplitShapesFixture = SplitShapesFixture<Tensor, ITensor, Accessor, NESplit, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NESplitFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(datasets::SmallSplitDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate outputs
-    for(unsigned int i = 0; i < _target.size(); ++i)
+    if(CPUInfo::get().has_fp16())
     {
-        validate(Accessor(_target[i]), _reference[i]);
+        // Validate outputs
+        for(unsigned int i = 0; i < _target.size(); ++i)
+        {
+            validate(Accessor(_target[i]), _reference[i]);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
 
@@ -117,14 +125,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                        framework::DatasetMode::NIGHTLY,
                        combine(datasets::LargeSplitDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate outputs
-    for(unsigned int i = 0; i < _target.size(); ++i)
+    if(CPUInfo::get().has_fp16())
     {
-        validate(Accessor(_target[i]), _reference[i]);
+        // Validate outputs
+        for(unsigned int i = 0; i < _target.size(); ++i)
+        {
+            validate(Accessor(_target[i]), _reference[i]);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/StridedSlice.cpp b/tests/validation/NEON/StridedSlice.cpp
index a1b3cef801..7c76800d1f 100644
--- a/tests/validation/NEON/StridedSlice.cpp
+++ b/tests/validation/NEON/StridedSlice.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -69,15 +69,22 @@ template <typename T>
 using NEStridedSliceFixture = StridedSliceFixture<Tensor, Accessor, NEStridedSlice, T>;
 
 TEST_SUITE(Float)
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEStridedSliceFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(datasets::SmallStridedSliceDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
@@ -85,11 +92,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge,
                        framework::DatasetMode::NIGHTLY,
                        combine(datasets::LargeStridedSliceDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
-    // Validate output
-    validate(Accessor(_target), _reference);
+    if(CPUInfo::get().has_fp16())
+    {
+        validate(Accessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 TEST_SUITE_END() // FP16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/NEON/Unstack.cpp b/tests/validation/NEON/Unstack.cpp
index 3e8f1ff324..18e778b9fd 100644
--- a/tests/validation/NEON/Unstack.cpp
+++ b/tests/validation/NEON/Unstack.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -95,19 +95,28 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEUnstackFixture<float>, framework::DatasetMode
 }
 TEST_SUITE_END() // F32
 
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEUnstackFixture<half>, framework::DatasetMode::PRECOMMIT, unstack_dataset_small * framework::dataset::make("DataType", { DataType::F16 }))
 {
     ARM_COMPUTE_ERROR_ON(_target.size() != _reference.size());
-    // Validate output
-    for(size_t k = 0; k < _target.size(); ++k)
+
+    if(CPUInfo::get().has_fp16())
     {
-        validate(Accessor(_target[k]), _reference[k]);
+        // Validate output
+        for(size_t k = 0; k < _target.size(); ++k)
+        {
+            validate(Accessor(_target[k]), _reference[k]);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
 TEST_SUITE_END() // F16
-#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
 TEST_SUITE(Quantized)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEUnstackFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, unstack_dataset_small * framework::dataset::make("DataType", { DataType::QASYMM8 }))
diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h
index a24ba8913e..d3e8bf09f2 100644
--- a/tests/validation/fixtures/ActivationLayerFixture.h
+++ b/tests/validation/fixtures/ActivationLayerFixture.h
@@ -50,6 +50,12 @@ public:
 
     void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         ActivationLayerInfo info(function, alpha_beta, alpha_beta);
 
         _in_place                 = in_place;
diff --git a/tests/validation/fixtures/AddMulAddFixture.h b/tests/validation/fixtures/AddMulAddFixture.h
index d13fef2f02..788e1c974f 100644
--- a/tests/validation/fixtures/AddMulAddFixture.h
+++ b/tests/validation/fixtures/AddMulAddFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -144,8 +144,15 @@ public:
 
     void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info)
     {
-        Parent::setup(shape, data_type, act_info, interm_out);
-        compute_reference(shape, data_type, act_info);
+        const bool is_not_cpu = !std::is_same<TensorType, Tensor>::value;
+        const bool is_not_fp16 = data_type != DataType::F16;
+        const bool device_has_fp16 = CPUInfo::get().has_fp16();
+
+        if(is_not_cpu || is_not_fp16 || device_has_fp16)
+        {
+            Parent::setup(shape, data_type, act_info, interm_out);
+            compute_reference(shape, data_type, act_info);
+        }
     }
 
     // Compute Reference is moved outside of the generic fixture because with the quantized data types,
@@ -202,6 +209,12 @@ public:
                QuantizationInfo input1_qinfo, QuantizationInfo input2_qinfo, QuantizationInfo bn_mul_qinfo,
                QuantizationInfo bn_add_qinfo, QuantizationInfo add_output_qinfo, QuantizationInfo final_output_qinfo)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         // Quantization arguments moved to class attributes to prevent long function declerations
         Parent::_input1_qinfo       = input1_qinfo;
         Parent::_input2_qinfo       = input2_qinfo;
diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h
index 7a823568a8..884b19260a 100644
--- a/tests/validation/fixtures/ArgMinMaxFixture.h
+++ b/tests/validation/fixtures/ArgMinMaxFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023 Arm Limited.
+ * Copyright (c) 2018-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_ARG_MIN_MAX_FIXTURE
-#define ARM_COMPUTE_TEST_ARG_MIN_MAX_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ARGMINMAXFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ARGMINMAXFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ class ArgMinMaxValidationBaseFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            input_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, input_type, output_type, axis, op, q_info);
         _reference = compute_reference(shape, input_type, output_type, axis, op, q_info);
     }
@@ -168,4 +174,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ARG_MIN_MAX_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ARGMINMAXFIXTURE_H
diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h
index 0785af1151..112d908a81 100644
--- a/tests/validation/fixtures/ArithmeticOperationsFixture.h
+++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_ARITHMETIC_OPERATIONS_FIXTURE
-#define ARM_COMPUTE_TEST_ARITHMETIC_OPERATIONS_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ARITHMETICOPERATIONSFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ARITHMETICOPERATIONSFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ public:
     void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy,
                QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _op         = op;
         _act_info   = act_info;
         _is_inplace = is_inplace;
@@ -284,4 +290,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ARITHMETIC_OPERATIONS_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ARITHMETICOPERATIONSFIXTURE_H
diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
index 54a0ed9e09..2374ecf64a 100644
--- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_BATCH_NORMALIZATION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_BATCH_NORMALIZATION_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_BATCHNORMALIZATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_BATCHNORMALIZATIONLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,10 +46,15 @@ class BatchNormalizationLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            dt == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _data_type = dt;
         _use_beta  = use_beta;
         _use_gamma = use_gamma;
-
         _target    = compute_target(shape0, shape1, epsilon, act_info, dt, data_layout);
         _reference = compute_reference(shape0, shape1, epsilon, act_info, dt);
     }
@@ -165,4 +170,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_BATCH_NORMALIZATION_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_BATCHNORMALIZATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/BoundingBoxTransformFixture.h b/tests/validation/fixtures/BoundingBoxTransformFixture.h
index 03edaeab16..84576335b0 100644
--- a/tests/validation/fixtures/BoundingBoxTransformFixture.h
+++ b/tests/validation/fixtures/BoundingBoxTransformFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_BOUNDINGBOXTRANSFORM_FIXTURE
-#define ARM_COMPUTE_TEST_BOUNDINGBOXTRANSFORM_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_BOUNDINGBOXTRANSFORMFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_BOUNDINGBOXTRANSFORMFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -104,6 +104,12 @@ public:
 
     void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const bool is_qasymm16 = data_type == DataType::QASYMM16;
         _data_type_deltas      = (is_qasymm16) ? DataType::QASYMM8 : data_type;
         _boxes_qinfo           = (is_qasymm16) ? QuantizationInfo(.125f, 0) : QuantizationInfo();
@@ -234,4 +240,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_BOUNDINGBOXTRANSFORM_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_BOUNDINGBOXTRANSFORMFIXTURE_H
diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h
index e9d624e6f3..8297ec81dc 100644
--- a/tests/validation/fixtures/CastFixture.h
+++ b/tests/validation/fixtures/CastFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_CAST_FIXTURE
-#define ARM_COMPUTE_TEST_CAST_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CASTFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CASTFIXTURE_H
 
 #include "tests/validation/fixtures/DepthConvertLayerFixture.h"
 
@@ -38,6 +38,12 @@ class CastValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (dt_in == DataType::F16 || dt_out == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, dt_in, dt_out, policy);
         _reference = compute_reference(shape, dt_in, dt_out, policy);
     }
@@ -151,4 +157,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CAST_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CASTFIXTURE_H
diff --git a/tests/validation/fixtures/ChannelShuffleLayerFixture.h b/tests/validation/fixtures/ChannelShuffleLayerFixture.h
index 530dba3893..63dfd62751 100644
--- a/tests/validation/fixtures/ChannelShuffleLayerFixture.h
+++ b/tests/validation/fixtures/ChannelShuffleLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_FIXTURE
-#define ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CHANNELSHUFFLELAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CHANNELSHUFFLELAYERFIXTURE_H
 
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorShape.h"
@@ -47,6 +47,12 @@ class ChannelShuffleLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, unsigned int num_groups, DataType data_type, DataLayout data_layout)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, data_type, num_groups, data_layout);
         _reference = compute_reference(shape, data_type, num_groups);
     }
@@ -110,4 +116,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CHANNEL_SHUFFLE_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CHANNELSHUFFLELAYERFIXTURE_H
diff --git a/tests/validation/fixtures/ComparisonFixture.h b/tests/validation/fixtures/ComparisonFixture.h
index f25d5abb73..b7c94e1c8a 100644
--- a/tests/validation/fixtures/ComparisonFixture.h
+++ b/tests/validation/fixtures/ComparisonFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_COMPARISON_FIXTURE
-#define ARM_COMPUTE_TEST_COMPARISON_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_COMPARISONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_COMPARISONFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,12 @@ class ComparisonValidationGenericFixture : public framework::Fixture
 public:
     void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(op, shape0, shape1, data_type, qinfo0, qinfo1);
         _reference = compute_reference(op, shape0, shape1, data_type, qinfo0, qinfo1);
     }
@@ -155,4 +161,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_COMPARISON_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_COMPARISONFIXTURE_H
diff --git a/tests/validation/fixtures/ComputeAllAnchorsFixture.h b/tests/validation/fixtures/ComputeAllAnchorsFixture.h
index 620f1b53fa..a0e712e567 100644
--- a/tests/validation/fixtures/ComputeAllAnchorsFixture.h
+++ b/tests/validation/fixtures/ComputeAllAnchorsFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_COMPUTEALLANCHORS_FIXTURE
-#define ARM_COMPUTE_TEST_COMPUTEALLANCHORS_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_COMPUTEALLANCHORSFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_COMPUTEALLANCHORSFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,12 @@ class ComputeAllAnchorsGenericFixture : public framework::Fixture
 public:
     void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type, QuantizationInfo qinfo)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(num_anchors, data_type, info, qinfo);
         _reference = compute_reference(num_anchors, data_type, info, qinfo);
     }
@@ -124,4 +130,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_COMPUTEALLANCHORS_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_COMPUTEALLANCHORSFIXTURE_H
diff --git a/tests/validation/fixtures/ConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h
index 3a021661ac..dab055b7b9 100644
--- a/tests/validation/fixtures/ConcatenateLayerFixture.h
+++ b/tests/validation/fixtures/ConcatenateLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CONCATENATELAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CONCATENATELAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -52,6 +52,12 @@ private:
 public:
     void setup(TensorShape shape, DataType data_type, unsigned int axis)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         // Create input shapes
         std::mt19937                    gen(library->seed());
         std::uniform_int_distribution<> num_dis(2, 8);
@@ -170,4 +176,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CONCATENATELAYERFIXTURE_H
diff --git a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h
index 7ad14e1b40..5e2f9a9c3d 100644
--- a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h
+++ b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023 Arm Limited.
+ * Copyright (c) 2018-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_FIXTURE
-#define ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CONVERTFULLYCONNECTEDWEIGHTSFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CONVERTFULLYCONNECTEDWEIGHTSFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -45,6 +45,12 @@ class ConvertFullyConnectedWeightsValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, unsigned int weights_w, DataLayout training_data_layout, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const unsigned int height = input_shape.x() * input_shape.y() * input_shape.z();
         const TensorShape  weights_shape(weights_w, height);
 
@@ -128,4 +134,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CONVERT_FULLY_CONNECTED_WEIGHTS_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CONVERTFULLYCONNECTEDWEIGHTSFIXTURE_H
diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h
index 2a317e9b9b..780ae7a522 100644
--- a/tests/validation/fixtures/ConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/ConvolutionLayerFixture.h
@@ -32,6 +32,7 @@
 #include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
 #endif // ARM_COMPUTE_OPENCL_ENABLED
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
 #include "src/core/NEON/kernels/arm_gemm/utils.hpp"
 #include "src/graph/mutators/MutatorUtils.h"
 #include "tests/AssetsLibrary.h"
@@ -123,8 +124,14 @@ public:
 public:
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights,
                DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info, ActivationLayerInfo act_info,
-               bool mixed_layout = false, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false)
+               bool mixed_layout = false, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false, bool updated_sq_info_after_config = false)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         // This hash is used by random generators. There may be hash collisions but
         // this is intentional as it's a very easy way to make the the current
         // random generation process almost different for many test configurations,
@@ -151,7 +158,15 @@ public:
             _use_dynamic_output_quant = true;
         }
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info, pre_pad_layer, padded_weights);
+        if (updated_sq_info_after_config)
+        {
+            _target = compute_gemmlowp_target_for_updated_sq_info_after_config(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info, pre_pad_layer, padded_weights);
+        }
+        else
+        {
+            _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info, pre_pad_layer, padded_weights);
+        }
+
         _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info, pre_pad_layer);
     }
 
@@ -204,7 +219,10 @@ protected:
             {
                 if(_use_dynamic_output_quant)
                 {
-                    std::uniform_int_distribution<int32_t> distribution(-128, 127);
+                    // Using -127 as the lower bound because of possible overflow.
+                    // This is a known issue and reported in the errata.
+                    // See COMPMID-7109 for more details
+                    std::uniform_int_distribution<int32_t> distribution(-127, 127);
                     library->fill(tensor, distribution, i);
                 }
                 else
@@ -298,7 +316,6 @@ protected:
         WeightsInfo weights_info(!reshape_weights, weights_shape[idx_width], weights_shape[idx_height], weights_shape[3]);
         TensorShape reshaped_weights_shape(weights_shape);
 
-        // Create tensors
         TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info, _data_layout);
         TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _weights_data_type, 1, _weight_quantization_info, _data_layout);
         TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, QuantizationInfo() /*bias is not a quantized type*/, _data_layout);
@@ -372,6 +389,124 @@ protected:
         return dst;
     }
 
+    // Compute the target when updating static quantization information after configuration.
+    TensorType compute_gemmlowp_target_for_updated_sq_info_after_config(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info,
+                              bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false)
+    {
+        ARM_COMPUTE_ASSERT((std::is_same<FunctionType, NEGEMMConvolutionLayer>::value == true));
+        ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
+
+        const unsigned int num_groups = input_shape[2] / weights_shape[2];
+
+        if(_data_layout == DataLayout::NHWC)
+        {
+            permute(input_shape, PermutationVector(2U, 0U, 1U));
+            permute(weights_shape, PermutationVector(2U, 0U, 1U));
+            permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+            if(pre_pad_layer.size() > 0)
+            {
+                // make sure paddings exist for each c,h,w dimensions
+                for(unsigned int i = 0; i < 3 - pre_pad_layer.size(); ++i)
+                {
+                    pre_pad_layer.push_back({ 0, 0 });
+                }
+
+                // rotate padding info from nchw to nhwc
+                std::rotate(pre_pad_layer.begin(), pre_pad_layer.begin() + 2, pre_pad_layer.begin() + 3);
+            }
+        }
+
+        const int idx_width  = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
+        const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
+
+        WeightsInfo weights_info(!reshape_weights, weights_shape[idx_width], weights_shape[idx_height], weights_shape[3]);
+        TensorShape reshaped_weights_shape(weights_shape);
+
+        // Create tensors with fake quantization info and defer to pass the correct ones to a later stage.
+        auto qi = QuantizationInfo(0.550721, 37, true);
+        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, qi, _data_layout);
+        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _weights_data_type, 1, qi, _data_layout);
+        TensorType dst     = create_tensor<TensorType>(output_shape, _output_data_type, 1, qi, _data_layout);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, QuantizationInfo() /*bias is not a quantized type*/, _data_layout);
+
+        // Create and configure function
+        FunctionType conv;
+
+        const unsigned int height_index = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::HEIGHT);
+        const unsigned int width_index  = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::WIDTH);
+
+        const PaddingInfo pad_w = width_index < pre_pad_layer.size() ? pre_pad_layer[width_index] : PaddingInfo(0, 0);
+        const PaddingInfo pad_h = height_index < pre_pad_layer.size() ? pre_pad_layer[height_index] : PaddingInfo(0, 0);
+
+        if(pre_pad_layer.size() > 0 && arm_compute::graph::is_padding_in_height_or_width(_data_layout, pre_pad_layer))
+        {
+            // this is the logic implemented in NodeFusionMutator -> fuse_pad_with_convolution
+            const PadStrideInfo new_conv_info(
+                info.stride().first,
+                info.stride().second,
+                info.pad_left() + pad_w.first,
+                info.pad_right() + pad_w.second,
+                info.pad_top() + pad_h.first,
+                info.pad_bottom() + pad_h.second,
+                info.round());
+            detail::configure_conv_function(conv, &src, &weights, &bias, &dst, new_conv_info, weights_info, dilation, act_info, num_groups);
+        }
+        else
+        {
+            detail::configure_conv_function(conv, &src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups);
+        }
+
+        // After calling configure, we appropriately set the correct quantization info and update ACL.
+        src.info()->set_quantization_info(QuantizationInfo(_quantization_info.scale(), _quantization_info.offset(), true));
+        weights.info()->set_quantization_info(QuantizationInfo(_weight_quantization_info.scale(), _weight_quantization_info.offset(), true));
+        dst.info()->set_quantization_info(QuantizationInfo(_dst_q_info.scale(), _dst_q_info.offset(), true));
+
+        // propagate trough ACL the correct quantization info
+        NEGEMMConvolutionLayer *lp = reinterpret_cast<NEGEMMConvolutionLayer *>(&conv);
+        lp->update_quantization_parameters();
+
+        ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+        // Test "add padding after configure" behavior. This behavior should not affect the correctness
+        add_padding_x({ &src, &bias, &dst }, _data_layout);
+        // Padding weights may affect code path in some backends
+        if (padded_weights)
+        {
+            add_padding_x({ &weights }, _data_layout);
+        }
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(src), 0 + _hash);
+        fill(AccessorType(weights), 1 + _hash);
+        fill(AccessorType(bias), 2 + _hash);
+
+        if(_mixed_layout)
+        {
+            mix_layout(conv, src, dst);
+        }
+        else
+        {
+            // Compute Convolution function
+            conv.run();
+        }
+
+        return dst;
+    }
+
     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
                                       const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}))
     {
@@ -479,6 +614,44 @@ public:
     }
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+class ConvolutionValidationForUpdatedStaticQuantInfoAfterConfigureFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
+               DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info)
+    {
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights,
+                                                                                                 data_type, data_type, data_layout, quantization_info, quantization_info, act_info, mixed_layout,
+                                                                                                 PaddingList({}), false, true);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
+class ConvolutionValidationQuantizedMixedTypeFixture
+    : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
+{
+public:
+    void setup(TensorShape         input_shape,
+               TensorShape         weights_shape,
+               TensorShape         bias_shape,
+               TensorShape         output_shape,
+               PadStrideInfo       info,
+               Size2D              dilation,
+               bool                reshape_weights,
+               DataType            data_type,
+               DataType            weights_data_type,
+               DataLayout          data_layout,
+               QuantizationInfo    quantization_info,
+               QuantizationInfo    weight_quantization_info,
+               ActivationLayerInfo act_info)
+    {
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(
+            input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type,
+            weights_data_type, data_layout, quantization_info, weight_quantization_info, act_info);
+    }
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
 class ConvolutionValidationQuantizedPerChannelFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
 {
@@ -597,6 +770,12 @@ public:
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataLayout data_layout,
                const DataType data_type)
     {
+        if(std::is_same<TensorClass, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         conv = std::make_unique<ConvolutionFunction>();
         // prepare data
         _data_layout = data_layout;
@@ -783,6 +962,12 @@ class HasOptImplFixture : public framework::Fixture
 public:
     void setup(DataType data_type, arm_compute::WeightFormat query_weight_format)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         auto              conv        = std::make_unique<ConvolutionClass>();
         const auto        src_info    = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC);
         const auto        weight_info = TensorInfo(TensorShape(64, 3U, 3U, 64U), 1, enable_fast_math ? DataType::BFLOAT16 : data_type, DataLayout::NHWC);
diff --git a/tests/validation/fixtures/CpuActivationFixture.h b/tests/validation/fixtures/CpuActivationFixture.h
new file mode 100644
index 0000000000..9e05db969a
--- /dev/null
+++ b/tests/validation/fixtures/CpuActivationFixture.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUACTIVATIONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUACTIVATIONFIXTURE_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuActivationValidationGenericFixture : public framework::Fixture
+{
+public:
+
+    void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info)
+    {
+        ActivationLayerInfo info(function, alpha_beta, alpha_beta);
+
+        _in_place                 = in_place;
+        _data_type                = data_type;
+	// We are only testing fp32 datatype for CpuActivation wrapper. Hence,
+	// we can ignore quantization_info here and just use the default one.
+        _output_quantization_info = quantization_info;
+        _input_quantization_info  = quantization_info;
+
+        _function  = function;
+        _target    = compute_target(shape, info);
+        _reference = compute_reference(shape, info);
+    }
+
+protected:
+    std::vector<T> get_boundary_values(T min, T max)
+    {
+        // This function will return a vector filled with the following values that can
+        // represent two partitions derived from equivalent partitioning.
+        // * Lower parition: min, min + delta, lower quarter (nominal), center - delta
+        // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max
+        const auto delta         = is_data_type_float(_data_type) ? T(0.1f) : T(1);
+        const auto center_value  = (min + max) / 2;
+        const auto lower_quarter = (min + center_value) / 2;
+        const auto upper_quarter = (center_value + max) / 2;
+
+        std::vector<T> boundary_values{};
+
+        // To ensure all the inserted values are within the given range after subtracing/adding delta
+        auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values)
+        {
+            for(auto &v : new_values)
+            {
+                if(v >= min && v <= max)
+                {
+                    boundary_values.emplace_back(v);
+                }
+            }
+        };
+
+        insert_values({ min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), static_cast<T>(center_value - delta) });                               // lower partition
+        insert_values({ static_cast<T>(center_value), static_cast<T>(center_value + delta), static_cast<T>(upper_quarter), static_cast<T>(max - delta), max }); // upper partition
+
+        return boundary_values;
+    }
+
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        if(is_data_type_float(_data_type))
+        {
+            float min_bound = 0;
+            float max_bound = 0;
+            std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(_function, _data_type);
+            library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound)));
+        }
+        else
+        {
+            PixelValue min{};
+            PixelValue max{};
+            std::tie(min, max) = get_min_max(tensor.data_type());
+            library->fill_static_values(tensor, get_boundary_values(min.get<T>(), max.get<T>()));
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape, ActivationLayerInfo info)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, DataLayout::NCHW);
+        TensorType dst = create_tensor<TensorType>(shape, _data_type, 1, _output_quantization_info, DataLayout::NCHW);
+
+        // Create and configure function
+        FunctionType act_layer;
+
+        TensorType *dst_ptr = _in_place ? &src : &dst;
+
+        if(!_in_place)
+        {
+            act_layer.configure(src.info(), dst.info(), info);
+        }
+        else {
+            act_layer.configure(src.info(), nullptr, info);
+        }
+
+        ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+
+        if(!_in_place)
+        {
+            dst.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+        }
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        ITensorPack run_pack{ { arm_compute::TensorType::ACL_SRC, &src }, { arm_compute::TensorType::ACL_DST, dst_ptr } };
+        act_layer.run(run_pack);
+
+        if(_in_place)
+        {
+            return src;
+        }
+        else
+        {
+            return dst;
+        }
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo info)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, _data_type, 1, _input_quantization_info };
+
+        // Fill reference
+        fill(src);
+
+        return reference::activation_layer<T>(src, info, _output_quantization_info);
+    }
+
+protected:
+    TensorType                              _target{};
+    SimpleTensor<T>                         _reference{};
+    bool                                    _in_place{};
+    QuantizationInfo                        _input_quantization_info{};
+    QuantizationInfo                        _output_quantization_info{};
+    DataType                                _data_type{};
+    ActivationLayerInfo::ActivationFunction _function{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuActivationValidationFixture : public CpuActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type)
+    {
+        CpuActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, QuantizationInfo());
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUACTIVATIONFIXTURE_H
diff --git a/tests/validation/fixtures/CpuArithmeticOperationsFixture.h b/tests/validation/fixtures/CpuArithmeticOperationsFixture.h
new file mode 100644
index 0000000000..6abfe5803e
--- /dev/null
+++ b/tests/validation/fixtures/CpuArithmeticOperationsFixture.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUARITHMETICOPERATIONSFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUARITHMETICOPERATIONSFIXTURE_H
+
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorShape.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class CpuArithmeticOperationGenericFixture : public framework::Fixture
+{
+public:
+    void setup(reference::ArithmeticOperation op,
+               const TensorShape             &shape0,
+               const TensorShape             &shape1,
+               DataType                       data_type,
+               ConvertPolicy                  convert_policy,
+               QuantizationInfo               qinfo0,
+               QuantizationInfo               qinfo1,
+               QuantizationInfo               qinfo_out,
+               ActivationLayerInfo            act_info,
+               bool                           is_inplace)
+    {
+        if (std::is_same<TensorType, Tensor>::value && // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
+        _op         = op;
+        _act_info   = act_info;
+        _is_inplace = is_inplace;
+        _target     = compute_target(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
+        _reference  = compute_reference(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape0,
+                              const TensorShape &shape1,
+                              DataType           data_type,
+                              ConvertPolicy      convert_policy,
+                              QuantizationInfo   qinfo0,
+                              QuantizationInfo   qinfo1,
+                              QuantizationInfo   qinfo_out)
+    {
+        // Create tensors
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        ref_src1  = create_tensor<TensorType>(shape0, data_type, 1, qinfo0);
+        TensorType        ref_src2  = create_tensor<TensorType>(shape1, data_type, 1, qinfo1);
+        TensorType        dst       = create_tensor<TensorType>(out_shape, data_type, 1, qinfo_out);
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst = &dst;
+        if (_is_inplace)
+        {
+            bool src1_is_inplace =
+                !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out);
+            bool src2_is_inplace =
+                !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out);
+            bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if (src1_is_inplace)
+            {
+                actual_dst = &ref_src1;
+            }
+            else
+            {
+                actual_dst = &ref_src2;
+            }
+        }
+
+        // Create and configure function
+        FunctionType arith_op;
+        arith_op.configure(ref_src1.info(), ref_src2.info(), actual_dst->info(), convert_policy, _act_info);
+
+        ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
+
+        // Allocate tensors
+        ref_src1.allocator()->allocate();
+        ref_src2.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
+
+        // If don't do in-place computation, still need to allocate original dst
+        if (!_is_inplace)
+        {
+            ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+            dst.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+        }
+
+        // Fill tensors
+        fill(AccessorType(ref_src1), 0);
+        fill(AccessorType(ref_src2), 1);
+
+        // Compute function
+        ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &ref_src1},
+                             {arm_compute::TensorType::ACL_SRC_1, &ref_src2},
+                             {arm_compute::TensorType::ACL_DST, &dst}};
+        arith_op.run(run_pack);
+
+        return std::move(*actual_dst);
+    }
+
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape0,
+                                            const TensorShape &shape1,
+                                            DataType           data_type,
+                                            ConvertPolicy      convert_policy,
+                                            QuantizationInfo   qinfo0,
+                                            QuantizationInfo   qinfo1,
+                                            QuantizationInfo   qinfo_out)
+    {
+        // Create reference
+        SimpleTensor<uint8_t> ref_src1{shape0, data_type, 1, qinfo0};
+        SimpleTensor<uint8_t> ref_src2{shape1, data_type, 1, qinfo1};
+        SimpleTensor<uint8_t> ref_dst{TensorShape::broadcast_shape(shape0, shape1), data_type, 1, qinfo_out};
+
+        // Fill reference
+        fill(ref_src1, 0);
+        fill(ref_src2, 1);
+
+        auto result = reference::arithmetic_operation<uint8_t>(_op, ref_src1, ref_src2, ref_dst, convert_policy);
+        return _act_info.enabled() ? reference::activation_layer(result, _act_info, qinfo_out) : result;
+    }
+
+    TensorType                     _target{};
+    SimpleTensor<uint8_t>          _reference{};
+    reference::ArithmeticOperation _op{reference::ArithmeticOperation::ADD};
+    ActivationLayerInfo            _act_info{};
+    bool                           _is_inplace{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class CpuArithmeticAdditionValidationFixture
+    : public CpuArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType>
+{
+public:
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
+    {
+        CpuArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType>::setup(
+            reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy, QuantizationInfo(),
+            QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class CpuArithmeticSubtractionValidationFixture
+    : public CpuArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType>
+{
+public:
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
+    {
+        CpuArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType>::setup(
+            reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy, QuantizationInfo(),
+            QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUARITHMETICOPERATIONSFIXTURE_H
diff --git a/tests/validation/fixtures/CpuDepthwiseConv2dFixture.h b/tests/validation/fixtures/CpuDepthwiseConv2dFixture.h
new file mode 100644
index 0000000000..1197687358
--- /dev/null
+++ b/tests/validation/fixtures/CpuDepthwiseConv2dFixture.h
@@ -0,0 +1,905 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUDEPTHWISECONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUDEPTHWISECONV2DFIXTURE_H
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/DepthwiseConvolutionLayer.h"
+#include "utils/Utils.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include <cstdint>
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
+class CpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+    using TBias =
+        typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type;
+
+    void setup_quantization(TensorShape       input_shape,
+                            TensorShape       weights_shape,
+                            QuantizationInfo &input_q_info,
+                            QuantizationInfo &weights_q_info,
+                            DataType          data_type)
+    {
+        ARM_COMPUTE_UNUSED(input_shape);
+        const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+        const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+        std::mt19937                           generator(library->seed() + _hash);
+        std::uniform_real_distribution<float>  distribution_float(-5.0f, 3.0f);
+        std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+        const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+        const int32_t offset_lhs = distribution_t(generator);
+        const int32_t offset_rhs = distribution_t(generator);
+
+        _input_quantization_info   = QuantizationInfo(scale_lhs, offset_lhs);
+        _weights_quantization_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+        QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(
+            input_q_info, weights_q_info, weights_shape.y() /* heights */, weights_shape.x() /* width */,
+            1 /* channels */, data_type, 0.5f /* bias_fraction */);
+
+        _output_quantization_info = q_hint.q_info;
+        _min_bias                 = q_hint.bias_min;
+        _max_bias                 = q_hint.bias_max;
+    }
+
+public:
+    void setup(TensorShape         in_shape,
+               Size2D              kernel_size,
+               PadStrideInfo       pad_stride_info,
+               Size2D              dilation,
+               unsigned int        depth_multiplier,
+               DataType            input_data_type,
+               DataType            weights_data_type,
+               QuantizationInfo    input_quantization_info,
+               QuantizationInfo    weights_quantization_info,
+               QuantizationInfo    output_quantization_info,
+               DataLayout          data_layout,
+               ActivationLayerInfo act_info,
+               bool                mixed_layout = false,
+               bool                in_place     = false,
+               bool                run_twice    = false)
+    {
+        ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
+
+        _skip_test = false;
+        if (std::is_same<TensorType, Tensor>::value && // Cpu
+            (input_data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            _skip_test = true;
+            return;
+        }
+
+        // This hash is used by random generators. There may be hash collisions but
+        // this is intentional as it's a very easy way to make the the current
+        // random generation process almost different for many test configurations,
+        // which were using the same set of values before.
+        _hash = in_shape[0] + in_shape[1] + in_shape[2] + in_shape[3] + kernel_size.width + kernel_size.height +
+                dilation.x() + dilation.y() + pad_stride_info.pad_bottom() + pad_stride_info.pad_left() +
+                pad_stride_info.pad_right() + pad_stride_info.pad_top();
+
+        _mixed_layout      = mixed_layout;
+        _input_shape       = in_shape;
+        _input_data_type   = input_data_type;
+        _weights_data_type = weights_data_type;
+        _data_layout       = data_layout;
+        _pad_stride_info   = pad_stride_info;
+        _act_info          = act_info;
+        _depth_multiplier  = depth_multiplier;
+        _dilation          = dilation;
+        _in_place          = in_place;
+        _run_twice         = run_twice;
+
+        _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
+
+        _weights_shape = TensorShape(kernel_size.width, kernel_size.height);
+
+        const TensorInfo      in_info(_input_shape, 1, _input_data_type);
+        const TensorInfo      we_info(_weights_shape, 1, _weights_data_type);
+        const ConvolutionInfo info{_pad_stride_info, _depth_multiplier, _act_info, _dilation};
+        _output_shape = compute_depthwise_convolution_shape(in_info, we_info, info);
+
+        _weights_shape.set(2, _output_shape.z());
+        _biases_shape = TensorShape(_weights_shape[2]);
+
+        _input_quantization_info   = input_quantization_info;
+        _weights_quantization_info = weights_quantization_info;
+        _output_quantization_info  = output_quantization_info;
+
+        if (is_data_type_quantized(_input_data_type) && !is_data_type_quantized_symmetric(weights_data_type) &&
+            (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY))
+        {
+            setup_quantization(in_shape, _weights_shape, _input_quantization_info, _weights_quantization_info,
+                               _input_data_type);
+            _use_dynamic_output_quant = true;
+        }
+    }
+
+    void configure_target()
+    {
+        TensorShape input_shape   = _input_shape;
+        TensorShape weights_shape = _weights_shape;
+        TensorShape output_shape  = _output_shape;
+
+        if (_data_layout == DataLayout::NHWC)
+        {
+            permute(input_shape, PermutationVector(2U, 0U, 1U));
+            permute(weights_shape, PermutationVector(2U, 0U, 1U));
+            permute(output_shape, PermutationVector(2U, 0U, 1U));
+        }
+
+        // Create tensors
+        _src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
+        _weights =
+            create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+        if (_run_twice)
+        {
+            _weights.info()->set_are_values_constant(false);
+        }
+        _biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
+        TensorType *target_to_use = nullptr;
+        if (!_in_place)
+        {
+            _target =
+                create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+            target_to_use = &_target;
+        }
+
+        add_padding_x({&_src, &_biases}, _data_layout);
+        add_padding_x({&_weights}, _data_layout, true);
+        if (!_in_place)
+        {
+            add_padding_x({&_target}, _data_layout);
+        }
+
+        // Create Depthwise Convolution configure function
+        _dwc.configure(_src.info(), _weights.info(), _biases.info(), target_to_use->info(), _pad_stride_info,
+                       _depth_multiplier, _act_info, _dilation);
+
+        ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_biases.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_target.info()->is_resizable());
+    }
+
+    void allocate_and_run_target()
+    {
+        // Allocate tensors
+        _src.allocator()->allocate();
+        _weights.allocator()->allocate();
+        _biases.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
+
+        ITensorPack pack;
+        pack.add_tensor(arm_compute::TensorType::ACL_SRC_0, &_src);
+        pack.add_tensor(arm_compute::TensorType::ACL_SRC_1, &_weights);
+        pack.add_tensor(arm_compute::TensorType::ACL_SRC_2, &_biases);
+        pack.add_tensor(arm_compute::TensorType::ACL_DST, &_target);
+
+        auto mg = MemoryGroup{};
+        auto ws = manage_workspace<Tensor>(_dwc.workspace(), mg, pack, pack);
+
+        _target.allocator()->allocate();
+        ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(_src), 0 + _hash);
+        fill(AccessorType(_weights), 1 + _hash);
+        fill(AccessorType(_biases), 2 + _hash);
+
+        // Run with variable input
+        if (_run_twice)
+        {
+            _dwc.run(pack);
+
+            // Fill tensors with a new seed
+            fill(AccessorType(_src), 3 + _hash);
+            fill(AccessorType(_weights), 4 + _hash);
+            fill(AccessorType(_biases), 5 + _hash);
+        }
+
+        if (_mixed_layout)
+        {
+            mix_layout(_dwc, _src, _target);
+        }
+        else
+        {
+            // Compute function
+            _dwc.run(pack);
+        }
+    }
+
+    void compute_reference()
+    {
+        SimpleTensor<T>     src{_input_shape, _input_data_type, 1, _input_quantization_info};
+        SimpleTensor<TW>    weights{_weights_shape, _weights_data_type, 1, _weights_quantization_info};
+        SimpleTensor<TBias> biases{_biases_shape, _bias_data_type, 1, _input_quantization_info};
+
+        fill(src, 0 + _hash);
+        fill(weights, 1 + _hash);
+        fill(biases, 2 + _hash);
+
+        if (_run_twice)
+        {
+            SimpleTensor<T> depth_out =
+                reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info,
+                                                 _depth_multiplier, _dilation, _output_quantization_info);
+            if (_act_info.enabled())
+            {
+                reference::activation_layer<T>(depth_out, _act_info);
+            }
+
+            fill(src, 3 + _hash);
+            fill(weights, 4 + _hash);
+            fill(biases, 5 + _hash);
+        }
+
+        SimpleTensor<T> depth_out =
+            reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier,
+                                             _dilation, _output_quantization_info);
+        _reference = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out;
+    }
+
+protected:
+    void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst)
+    {
+        ARM_COMPUTE_ERROR_ON(_in_place);
+        // Test Multi DataLayout graph cases, when the data layout changes after configure
+        src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+
+        ITensorPack pack;
+        pack.add_tensor(arm_compute::TensorType::ACL_SRC_0, &_src);
+        pack.add_tensor(arm_compute::TensorType::ACL_SRC_1, &_weights);
+        pack.add_tensor(arm_compute::TensorType::ACL_SRC_2, &_biases);
+        pack.add_tensor(arm_compute::TensorType::ACL_DST, &_target);
+
+        auto mg = MemoryGroup{};
+        auto ws = manage_workspace<Tensor>(_dwc.workspace(), mg, pack, pack);
+
+        // Compute Convolution function
+        layer.run(pack);
+
+        // Reinstating original data layout for the test suite to properly check the values
+        src.info()->set_data_layout(_data_layout);
+        dst.info()->set_data_layout(_data_layout);
+    }
+
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch (tensor.data_type())
+        {
+            case DataType::QASYMM8:
+            {
+                if (_use_dynamic_output_quant)
+                {
+                    std::uniform_int_distribution<int32_t> distribution(0, 255);
+                    library->fill(tensor, distribution, i);
+                }
+                else
+                {
+                    // Legacy initialization in case the output quantization info can't be reliably estimated
+                    std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+                    std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
+                    library->fill(tensor, distribution, i);
+                }
+                break;
+            }
+            case DataType::QASYMM8_SIGNED:
+            {
+                if (_use_dynamic_output_quant)
+                {
+                    std::uniform_int_distribution<int32_t> distribution(-128, 127);
+                    library->fill(tensor, distribution, i);
+                }
+                else
+                {
+                    // Legacy initialization in case the output quantization info can't be reliably estimated
+                    std::pair<int, int> bounds =
+                        get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+                    std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
+                    library->fill(tensor, distribution, i);
+                }
+                break;
+            }
+            case DataType::QSYMM8_PER_CHANNEL:
+            {
+                int min_bound = 128;
+                int max_bound = -127;
+                for (size_t i = 0; i < _weights_quantization_info.scale().size(); i++)
+                {
+                    std::pair<int, int> bounds =
+                        get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
+                    if (bounds.first < min_bound)
+                    {
+                        min_bound = bounds.first;
+                    }
+                    if (bounds.second > max_bound)
+                    {
+                        max_bound = bounds.second;
+                    }
+                }
+                std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::BFLOAT16:
+            {
+                arm_compute::utils::uniform_real_distribution_16bit<bfloat16> distribution{-1.0f, 1.0f};
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F16:
+            {
+                arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+
+    TensorType   _src{};
+    TensorType   _weights{};
+    TensorType   _biases{};
+    FunctionType _dwc{};
+
+    TensorShape         _input_shape{};
+    TensorShape         _weights_shape{};
+    TensorShape         _biases_shape{};
+    TensorShape         _output_shape{};
+    DataType            _input_data_type{};
+    DataType            _weights_data_type{};
+    DataType            _bias_data_type{};
+    QuantizationInfo    _input_quantization_info{};
+    QuantizationInfo    _weights_quantization_info{};
+    QuantizationInfo    _output_quantization_info{};
+    DataLayout          _data_layout{};
+    PadStrideInfo       _pad_stride_info{};
+    ActivationLayerInfo _act_info{};
+    unsigned int        _depth_multiplier{};
+    Size2D              _dilation{};
+    bool                _mixed_layout{false};
+    bool                _in_place{false};
+    bool                _run_twice{false};
+    bool                _use_dynamic_output_quant{false};
+    bool                _skip_test{false};
+
+    int32_t _hash{0};
+    // Random initialization limits
+    // Default values are previously handcrafted limits
+    // that sould be used when we don't use dynamic quantization
+    int32_t _min_bias{-100};
+    int32_t _max_bias{100};
+    int32_t _min_u8{0};
+    int32_t _max_u8{50};
+    int32_t _min_s8{-25};
+    int32_t _max_s8{25};
+};
+
+template <typename TensorType,
+          typename AccessorType,
+          typename FunctionType,
+          typename T,
+          bool mixed_layout = false,
+          bool in_place     = false,
+          bool run_twice    = false>
+class CpuDepthwiseConv2dValidationFixture
+    : public CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+    void setup(TensorShape         in_shape,
+               Size2D              kernel_size,
+               PadStrideInfo       pad_stride_info,
+               Size2D              dilation,
+               unsigned int        depth_multiplier,
+               DataType            data_type,
+               DataLayout          data_layout,
+               ActivationLayerInfo act_info)
+    {
+        CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(
+            in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type,
+            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), data_layout, act_info, mixed_layout, in_place,
+            run_twice);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuDepthwiseConv2dNativeValidationFixture
+    : public CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+    void setup(size_t     width,
+               size_t     height,
+               size_t     channel,
+               size_t     batch,
+               Size2D     kernel_size,
+               size_t     depth_multiplier,
+               Size2D     dilation,
+               Size2D     stride,
+               bool       padding_valid,
+               DataType   data_type,
+               DataLayout data_layout)
+    {
+        _dilation         = dilation;
+        _depth_multiplier = depth_multiplier;
+        _data_type        = data_type;
+        _data_layout      = data_layout;
+
+        _input_shape   = TensorShape(width, height, channel, batch);
+        _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
+        _biases_shape  = TensorShape(_weights_shape.z());
+
+        if (padding_valid)
+        {
+            _conv_info = PadStrideInfo(stride.width, stride.height);
+        }
+        else
+        {
+            _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height),
+                                            DataLayout::NCHW, _dilation);
+        }
+    }
+
+    void configure_target()
+    {
+        TensorShape input_shape   = _input_shape;
+        TensorShape weights_shape = _weights_shape;
+
+        if (_data_layout == DataLayout::NHWC)
+        {
+            permute(input_shape, PermutationVector(2U, 0U, 1U));
+            permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        }
+
+        // Create tensors
+        _src     = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _biases  = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _target  = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+
+        add_padding_x({&_src, &_biases, &_target}, _data_layout);
+        add_padding_x({&_weights}, _data_layout, true);
+
+        // Create Depthwise Convolution configure function
+        const ConvolutionInfo info{_conv_info, _depth_multiplier, ActivationLayerInfo(), _dilation};
+        _dwc.configure(_src.info(), _weights.info(), _biases.info(), _target.info(), info);
+
+        ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_biases.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_target.info()->is_resizable());
+    }
+
+    void allocate_and_run_target()
+    {
+        // Allocate tensors
+        _src.allocator()->allocate();
+        _weights.allocator()->allocate();
+        _biases.allocator()->allocate();
+        _target.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(_src), 0);
+        fill(AccessorType(_weights), 1);
+        fill(AccessorType(_biases), 2);
+
+        arm_compute::ITensorPack pack;
+        pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_0, &_src);
+        pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_1, &_weights);
+        pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_2, &_biases);
+        pack.add_tensor(arm_compute::TensorType::ACL_DST, &_target);
+
+        // Compute function
+        _dwc.run(pack);
+    }
+
+    void compute_reference()
+    {
+        SimpleTensor<T> src{_input_shape, _data_type};
+        SimpleTensor<T> weights{_weights_shape, _data_type};
+        SimpleTensor<T> biases{_biases_shape, _data_type};
+
+        fill(src, 0);
+        fill(weights, 1);
+        fill(biases, 2);
+
+        const ConvolutionInfo info{_conv_info, _depth_multiplier, ActivationLayerInfo(), _dilation};
+        const TensorShape     dst_shape = compute_depthwise_convolution_shape(
+                TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info);
+        _reference =
+            reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch (tensor.data_type())
+        {
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+
+    TensorType   _src{};
+    TensorType   _weights{};
+    TensorType   _biases{};
+    FunctionType _dwc{};
+
+    TensorShape   _input_shape{};
+    TensorShape   _weights_shape{};
+    TensorShape   _biases_shape{};
+    DataType      _data_type{};
+    DataLayout    _data_layout{};
+    PadStrideInfo _conv_info{};
+    Size2D        _dilation{};
+    unsigned int  _depth_multiplier{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool in_place = false>
+class CpuDepthwiseConv2dNativeConfigurableValidationFixture
+    : public CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+    void setup(size_t                     width,
+               size_t                     height,
+               size_t                     channel,
+               size_t                     batch,
+               Size2D                     kernel_size,
+               size_t                     depth_multiplier,
+               Size2D                     dilation,
+               Size2D                     stride,
+               bool                       padding_valid,
+               DataType                   data_type,
+               DataLayout                 data_layout,
+               const ActivationLayerInfo &act_info,
+               unsigned int               n0,
+               bool                       export_to_cl_image)
+    {
+        _dilation           = dilation;
+        _depth_multiplier   = depth_multiplier;
+        _data_type          = data_type;
+        _data_layout        = data_layout;
+        _act_info           = act_info;
+        _n0                 = n0;
+        _export_to_cl_image = export_to_cl_image;
+        _in_place           = in_place;
+
+        _input_shape   = TensorShape(width, height, channel, batch);
+        _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
+        _biases_shape  = TensorShape(_weights_shape.z());
+
+        if (padding_valid)
+        {
+            _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height),
+                                            DataLayout::NCHW, _dilation);
+        }
+        else
+        {
+            _conv_info = PadStrideInfo(stride.width, stride.height);
+        }
+    }
+
+    void configure_target()
+    {
+#if defined(ARM_COMPUTE_OPENCL_ENABLED)
+        if (_export_to_cl_image)
+        {
+            _validate_output &= image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+            _validate_output &= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0);
+        }
+#endif // ARM_COMPUTE_OPENCL_ENABLED
+
+        if (!_validate_output)
+        {
+            return;
+        }
+
+        TensorShape input_shape   = _input_shape;
+        TensorShape weights_shape = _weights_shape;
+
+        if (_data_layout == DataLayout::NHWC)
+        {
+            permute(input_shape, PermutationVector(2U, 0U, 1U));
+            permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        }
+
+        // Create tensors
+        _src     = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _biases  = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType *target_to_use = nullptr;
+        if (!_in_place)
+        {
+            _target       = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+            target_to_use = &_target;
+        }
+
+        DWCComputeKernelInfo dwc_info;
+        dwc_info.n0                         = _n0;
+        dwc_info.m0                         = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1;
+        dwc_info.export_input_to_cl_image   = false;
+        dwc_info.export_weights_to_cl_image = _export_to_cl_image;
+
+        const ConvolutionInfo conv_kernel_info{_conv_info, _depth_multiplier, _act_info, _dilation};
+
+        add_padding_x({&_src, &_biases, &_target}, _data_layout);
+        add_padding_x({&_weights}, _data_layout,
+                      _export_to_cl_image); // Don't add left padding if cl image will be used
+
+        // Create Depthwise Convolution configure function
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info);
+
+        ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_biases.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(_target.info()->is_resizable());
+    }
+
+    void allocate_and_run_target()
+    {
+        if (!_validate_output)
+        {
+            return;
+        }
+
+        // Allocate tensors
+        _src.allocator()->allocate();
+        _weights.allocator()->allocate();
+        _biases.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
+        if (!_in_place)
+        {
+            _target.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        }
+
+        // Fill tensors
+        fill(AccessorType(_src), 0);
+        fill(AccessorType(_weights), 1);
+        fill(AccessorType(_biases), 2);
+
+        // Test Multi DataLayout graph cases, when the data layout changes after configure
+        _src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        if (!_in_place)
+        {
+            _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        }
+
+        // Compute function
+        _dwc.run();
+
+        // Reinstating original data layout for the test suite to properly check the values
+        if (!_in_place)
+        {
+            _target.info()->set_data_layout(_data_layout);
+        }
+    }
+
+    void compute_reference()
+    {
+        if (!_validate_output)
+        {
+            return;
+        }
+
+        SimpleTensor<T> src{_input_shape, _data_type};
+        SimpleTensor<T> weights{_weights_shape, _data_type};
+        SimpleTensor<T> biases{_biases_shape, _data_type};
+
+        fill(src, 0);
+        fill(weights, 1);
+        fill(biases, 2);
+
+        const ConvolutionInfo info{_conv_info, _depth_multiplier, _act_info, _dilation};
+        const TensorShape     dst_shape = compute_depthwise_convolution_shape(
+                TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info);
+        _reference = reference::activation_layer(
+            reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation),
+            _act_info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch (tensor.data_type())
+        {
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F16:
+            {
+                arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+
+    TensorType   _src{};
+    TensorType   _weights{};
+    TensorType   _biases{};
+    FunctionType _dwc{};
+
+    TensorShape         _input_shape{};
+    TensorShape         _weights_shape{};
+    TensorShape         _biases_shape{};
+    DataType            _data_type{};
+    DataLayout          _data_layout{};
+    PadStrideInfo       _conv_info{};
+    ActivationLayerInfo _act_info{};
+    Size2D              _dilation{};
+    unsigned int        _depth_multiplier{};
+    unsigned int        _n0{};
+    bool                _export_to_cl_image{};
+    bool                _validate_output{true};
+    bool                _in_place{false};
+};
+
+template <typename TensorType,
+          typename AccessorType,
+          typename FunctionType,
+          typename T,
+          bool mixed_layout = false,
+          bool in_place     = false>
+class CpuDepthwiseConv2dValidationQuantizedFixture
+    : public CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
+{
+public:
+    void setup(TensorShape         in_shape,
+               Size2D              kernel_size,
+               PadStrideInfo       pad_stride_info,
+               Size2D              dilation,
+               unsigned int        depth_multiplier,
+               DataType            data_type,
+               QuantizationInfo    input_quantization_info,
+               QuantizationInfo    output_quantization_info,
+               DataLayout          data_layout,
+               ActivationLayerInfo act_info)
+    {
+        CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(
+            in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type,
+            input_quantization_info, input_quantization_info, output_quantization_info, data_layout, act_info,
+            mixed_layout, in_place);
+    }
+};
+
+template <typename TensorType,
+          typename AccessorType,
+          typename FunctionType,
+          typename T,
+          typename TW,
+          bool in_place = false>
+class CpuDepthwiseConv2dValidationQuantizedPerChannelFixture
+    : public CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
+{
+public:
+    void setup(TensorShape         in_shape,
+               Size2D              kernel_size,
+               PadStrideInfo       pad_stride_info,
+               Size2D              dilation,
+               unsigned int        depth_multiplier,
+               DataType            input_data_type,
+               DataType            weights_data_type,
+               QuantizationInfo    input_quantization_info,
+               QuantizationInfo    output_quantization_info,
+               DataLayout          data_layout,
+               ActivationLayerInfo act_info)
+    {
+        const float out_scale = output_quantization_info.uniform().scale;
+        const float in_scale  = input_quantization_info.uniform().scale;
+
+        std::vector<float>                    weights_scales{};
+        std::mt19937                          gen(library->seed());
+        std::uniform_real_distribution<float> dis(0.01f, out_scale / in_scale);
+        for (size_t i = 0; i < in_shape.z() * depth_multiplier; ++i)
+        {
+            weights_scales.push_back(dis(gen));
+        }
+
+        CpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(
+            in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, input_data_type, weights_data_type,
+            input_quantization_info, QuantizationInfo(weights_scales), output_quantization_info, data_layout, act_info,
+            false, in_place);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUDEPTHWISECONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/CpuElementwiseFixture.h b/tests/validation/fixtures/CpuElementwiseFixture.h
new file mode 100644
index 0000000000..29757325ff
--- /dev/null
+++ b/tests/validation/fixtures/CpuElementwiseFixture.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUELEMENTWISEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUELEMENTWISEFIXTURE_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuElementwiseOperationsGenericFixture : public framework::Fixture
+{
+public:
+    void setup(ArithmeticOperation op,
+               const TensorShape  &shape0,
+               const TensorShape  &shape1,
+               DataType            data_type0,
+               DataType            data_type1,
+               DataType            output_data_type,
+               bool                is_inplace = false)
+    {
+        if (std::is_same<TensorType, Tensor>::value && // Cpu
+            (data_type0 == DataType::F16 || data_type1 == DataType::F16 || output_data_type == DataType::F16) &&
+            !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
+        _op         = op;
+        _is_inplace = is_inplace;
+
+        _target    = compute_target(shape0, shape1, data_type0, data_type1, output_data_type);
+        _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        if (is_data_type_float(tensor.data_type()))
+        {
+            switch (_op)
+            {
+                case ArithmeticOperation::DIV:
+                    library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)});
+                    break;
+                case ArithmeticOperation::POWER:
+                    library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f);
+                    break;
+                default:
+                    library->fill_tensor_uniform(tensor, i);
+            }
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape0,
+                              const TensorShape &shape1,
+                              DataType           data_type0,
+                              DataType           data_type1,
+                              DataType           output_data_type)
+    {
+        // Create tensors
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        ref_src1  = create_tensor<TensorType>(shape0, data_type0, 1, QuantizationInfo());
+        TensorType        ref_src2  = create_tensor<TensorType>(shape1, data_type1, 1, QuantizationInfo());
+        TensorType        dst       = create_tensor<TensorType>(out_shape, output_data_type, 1, QuantizationInfo());
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst = &dst;
+        if (_is_inplace)
+        {
+            bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) &&
+                                   (data_type0 == output_data_type);
+            bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) &&
+                                   (data_type1 == output_data_type);
+            bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if (src1_is_inplace)
+            {
+                actual_dst = &ref_src1;
+            }
+            else
+            {
+                actual_dst = &ref_src2;
+            }
+        }
+
+        // Create and configure function
+        FunctionType elem_op;
+        elem_op.configure(ref_src1.info(), ref_src2.info(), actual_dst->info());
+
+        ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
+
+        // Allocate tensors
+        ref_src1.allocator()->allocate();
+        ref_src2.allocator()->allocate();
+
+        // If don't do in-place computation, still need to allocate original dst
+        if (!_is_inplace)
+        {
+            ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+            dst.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+        }
+
+        ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(ref_src1), 0);
+        fill(AccessorType(ref_src2), 1);
+
+        // Compute function
+        ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &ref_src1},
+                             {arm_compute::TensorType::ACL_SRC_1, &ref_src2},
+                             {arm_compute::TensorType::ACL_DST, actual_dst}
+
+        };
+
+        elem_op.run(run_pack);
+
+        return std::move(*actual_dst);
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape0,
+                                      const TensorShape &shape1,
+                                      DataType           data_type0,
+                                      DataType           data_type1,
+                                      DataType           output_data_type)
+    {
+        // Create reference
+        SimpleTensor<T> ref_src1{shape0, data_type0, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_src2{shape1, data_type1, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_dst{TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, QuantizationInfo()};
+
+        // Fill reference
+        fill(ref_src1, 0);
+        fill(ref_src2, 1);
+
+        return reference::arithmetic_operation<T>(_op, ref_src1, ref_src2, ref_dst);
+    }
+
+    TensorType          _target{};
+    SimpleTensor<T>     _reference{};
+    ArithmeticOperation _op{ArithmeticOperation::ADD};
+    bool                _is_inplace{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuElementwiseDivisionValidationFixture
+    : public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(
+        const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
+    {
+        CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuElementwiseMaxValidationFixture
+    : public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(
+        const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
+    {
+        CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuElementwiseMinValidationFixture
+    : public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(
+        const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
+    {
+        CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUELEMENTWISEFIXTURE_H
diff --git a/tests/validation/fixtures/CpuGemmAssemblyDispatchFixture.h b/tests/validation/fixtures/CpuGemmAssemblyDispatchFixture.h
new file mode 100644
index 0000000000..fc070eb7a0
--- /dev/null
+++ b/tests/validation/fixtures/CpuGemmAssemblyDispatchFixture.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMASSEMBLYDISPATCHFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMASSEMBLYDISPATCHFIXTURE_H
+
+#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/GEMM.h"
+#include "arm_compute/core/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuGemmAssemblyDispatchGenericValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape shape_a,
+               TensorShape shape_b,
+               TensorShape shape_c,
+               TensorShape output_shape,
+               float       alpha,
+               float       beta,
+               DataType    data_type,
+               bool        accumulate)
+    {
+        ARM_COMPUTE_UNUSED(alpha);
+        ARM_COMPUTE_UNUSED(beta);
+        _target    = compute_target(shape_a, shape_b, shape_c, output_shape, data_type, accumulate);
+        _reference = compute_reference(shape_a, shape_b, output_shape, data_type, accumulate);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f)
+    {
+        switch (tensor.data_type())
+        {
+            case DataType::F16:
+            {
+                arm_compute::utils::uniform_real_distribution_16bit<half> distribution{float(lo), float(hi)};
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<float> distribution(lo, hi);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape_a,
+                              const TensorShape &shape_b,
+                              const TensorShape &shape_c,
+                              const TensorShape &output_shape,
+                              DataType           data_type,
+                              bool               accumulate)
+    {
+        ARM_COMPUTE_UNUSED(shape_c);
+        // Create tensors
+        TensorType  a   = create_tensor<TensorType>(shape_a, data_type, 1);
+        TensorType  b   = create_tensor<TensorType>(shape_b, data_type, 1);
+        TensorType *c   = nullptr;
+        TensorType  dst = create_tensor<TensorType>(output_shape, data_type, 1);
+
+        // Create and configure function
+        FunctionType gemm;
+
+        add_padding_x({&a, &b, &dst});
+
+        GEMMInfo gemm_info;
+        gemm_info.set_accumulate(accumulate);
+
+        ARM_COMPUTE_ASSERT(gemm.validate(a.info(), b.info(), nullptr, dst.info(), gemm_info));
+
+        // The GEMMinfo includes the values of the depth in case of reinterpreted 3d output.
+        // If the output shape has the same number of dimensions of the input the method called is a 2D matrix multiplication (depth_output_reinterpreted_as_3D = 0),
+        // in the other case we have to use the reinterpreted version of GEMM (depth_output_reinterpreted_as_3D = depth of the 3D output).
+        gemm.configure(a.info(), b.info(), nullptr, dst.info(), gemm_info);
+
+        ARM_COMPUTE_ASSERT(gemm.is_configured());
+
+        ARM_COMPUTE_ASSERT(a.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(b.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(a), 0, -1.f, 1.f);
+        fill(AccessorType(b), 1, -1.f, 1.f);
+        if (accumulate)
+        {
+            fill(AccessorType(dst), 6);
+        };
+
+        ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &a},
+                             {arm_compute::TensorType::ACL_SRC_1, &b},
+                             {arm_compute::TensorType::ACL_SRC_2, c},
+                             {arm_compute::TensorType::ACL_DST_0, &dst}};
+
+        // Prepare memory
+        ITensorPack prep_pack{{arm_compute::TensorType::ACL_SRC_1, &b}, {arm_compute::TensorType::ACL_SRC_2, c}};
+
+        experimental::MemoryRequirements aux_mem_req = gemm.workspace();
+        MemoryGroup                      memory_group{};
+
+        WorkspaceData<Tensor> workspace = manage_workspace<Tensor>(aux_mem_req, memory_group, run_pack, prep_pack);
+
+        gemm.prepare(prep_pack);
+        MemoryGroupResourceScope scope_mg(memory_group);
+
+        auto has_reshape = std::find_if(aux_mem_req.begin(), aux_mem_req.end(),
+                                        [](const arm_compute::experimental::MemoryInfo &m) -> bool {
+                                            return m.lifetime == arm_compute::experimental::MemoryLifetime::Persistent;
+                                        });
+
+        if (has_reshape != std::end(aux_mem_req))
+        {
+            b.mark_as_unused();
+        }
+        else
+        {
+            run_pack.add_const_tensor(ACL_SRC_1, &b);
+        }
+
+        // Release temporary tensors that are only used in prepare stage
+        release_temporaries<Tensor>(aux_mem_req, workspace);
+        // End of preparing
+
+        // Compute GEMM function
+        gemm.run(run_pack);
+
+        a.allocator()->free();
+        b.allocator()->free();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape_a,
+                                      const TensorShape &shape_b,
+                                      const TensorShape &output_shape,
+                                      DataType           data_type,
+                                      bool               accumulate)
+    {
+        // Create reference
+        SimpleTensor<T> a{shape_a, data_type, 1};
+        SimpleTensor<T> b{shape_b, data_type, 1};
+        SimpleTensor<T> c{output_shape, data_type, 1};
+        SimpleTensor<T> dst{output_shape, data_type, 1};
+
+        // Fill reference
+        fill(a, 0, -1.f, 1.f);
+        fill(b, 1, -1.f, 1.f);
+        fill(c, 2);
+
+        // Do in place summation
+        if (accumulate)
+        {
+            fill(dst, 6);
+        }
+
+        // Setting beta to 0 will effectively disable C for the
+        // computation of the reference: A * B + 0 * C
+        // Use transposed tensors if boolean enabled else use original tensors
+        if (accumulate)
+        {
+            reference::gemm_accumulate<T>(a, b, c, 1.0f, 0.f, dst);
+            return dst;
+        }
+        else
+        {
+            return reference::gemm<T>(a, b, c, 1.f, 0.f);
+        }
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool accumulate>
+class CpuGemmAssemblyDispatchValidationFixture
+    : protected CpuGemmAssemblyDispatchGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape_a,
+               TensorShape shape_b,
+               TensorShape shape_c,
+               TensorShape output_shape,
+               float       alpha,
+               float       beta,
+               DataType    data_type)
+    {
+        CpuGemmAssemblyDispatchGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMASSEMBLYDISPATCHFIXTURE_H
diff --git a/tests/validation/fixtures/CpuGemmConv2dFixture.h b/tests/validation/fixtures/CpuGemmConv2dFixture.h
new file mode 100644
index 0000000000..c8e82fb8a0
--- /dev/null
+++ b/tests/validation/fixtures/CpuGemmConv2dFixture.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMCONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMCONV2DFIXTURE_H
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/graph/Utils.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class CpuGemmConv2dValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape   input_shape,
+               TensorShape   weights_shape,
+               TensorShape   bias_shape,
+               TensorShape   output_shape,
+               PadStrideInfo info,
+               Size2D        dilation)
+    {
+        _dilation = dilation;
+        _hash     = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + weights_shape[0] +
+                weights_shape[1] + weights_shape[2] + weights_shape[3];
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(TensorShape          input_shape,
+                              TensorShape          weights_shape,
+                              const TensorShape   &bias_shape,
+                              TensorShape          output_shape,
+                              const PadStrideInfo &info)
+    {
+        // We need to permute to the same layout that the reference impl needs.
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+        permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+        const auto src_info     = TensorInfo(input_shape, 1, DataType::F32, _data_layout);
+        const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, _data_layout);
+        const auto biases_info  = TensorInfo(bias_shape, 1, DataType::F32, _data_layout);
+        auto       dst_info     = TensorInfo(output_shape, 1, DataType::F32, _data_layout);
+
+        auto conv = std::make_unique<FunctionType>();
+        conv->configure(&src_info, &weights_info, &biases_info, &dst_info, info);
+        ARM_COMPUTE_ASSERT(conv->validate(&src_info, &weights_info, &biases_info, &dst_info, info));
+
+        // Create tensors
+        auto src     = create_tensor<Tensor>(src_info);
+        auto weights = create_tensor<Tensor>(weights_info);
+        auto biases  = create_tensor<Tensor>(biases_info);
+        auto dst     = create_tensor<Tensor>(dst_info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src},
+                             {arm_compute::TensorType::ACL_SRC_1, &weights},
+                             {arm_compute::TensorType::ACL_SRC_2, &biases},
+                             {arm_compute::TensorType::ACL_DST, &dst}};
+        ITensorPack prep_pack{{arm_compute::TensorType::ACL_SRC_1, &weights},
+                              {arm_compute::TensorType::ACL_SRC_2, &biases}};
+
+        auto const aux_mem_req = conv->workspace();
+        auto       mg          = MemoryGroup{};
+        auto       ws          = manage_workspace<Tensor>(aux_mem_req, mg, run_pack, prep_pack);
+
+        // Fill tensors
+        fill(AccessorType(src), 0 + _hash);
+        fill(AccessorType(weights), 1 + _hash);
+        fill(AccessorType(biases), 2 + _hash);
+
+        conv->prepare(prep_pack);
+        conv->run(run_pack);
+
+        src.allocator()->free();
+        weights.allocator()->free();
+        biases.allocator()->free();
+
+        return dst;
+    }
+
+    SimpleTensor<float> compute_reference(const TensorShape   &input_shape,
+                                          const TensorShape   &weights_shape,
+                                          const TensorShape   &bias_shape,
+                                          const TensorShape   &output_shape,
+                                          const PadStrideInfo &info)
+    {
+        // Create reference
+        SimpleTensor<float> src{input_shape, DataType::F32};
+        SimpleTensor<float> weights{weights_shape, DataType::F32};
+        SimpleTensor<float> bias{bias_shape, DataType::F32};
+
+        fill(src, 0 + _hash);
+        fill(weights, 1 + _hash);
+        fill(bias, 2 + _hash);
+
+        return reference::convolution_layer<float>(src, weights, bias, output_shape, info, _dilation);
+    }
+
+    TensorType          _target{};
+    SimpleTensor<float> _reference{};
+    Size2D              _dilation{};
+    int32_t             _hash{0};
+    DataLayout          _data_layout{DataLayout::NHWC};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMCONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/CpuGemmDirectConv2dFixture.h b/tests/validation/fixtures/CpuGemmDirectConv2dFixture.h
new file mode 100644
index 0000000000..2e4000117f
--- /dev/null
+++ b/tests/validation/fixtures/CpuGemmDirectConv2dFixture.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMDIRECTCONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMDIRECTCONV2DFIXTURE_H
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/graph/Utils.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class CpuGemmDirectConv2dValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape   input_shape,
+               TensorShape   weights_shape,
+               TensorShape   bias_shape,
+               TensorShape   output_shape,
+               PadStrideInfo info,
+               Size2D        dilation)
+    {
+        _dilation = dilation;
+        _hash     = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + weights_shape[0] +
+                weights_shape[1] + weights_shape[2] + weights_shape[3];
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(TensorShape          input_shape,
+                              TensorShape          weights_shape,
+                              const TensorShape   &bias_shape,
+                              TensorShape          output_shape,
+                              const PadStrideInfo &info)
+    {
+        // We need to permute to the same layout that the reference impl needs.
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+        permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+        const auto src_info     = TensorInfo(input_shape, 1, DataType::F32, _data_layout);
+        const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, _data_layout);
+        const auto biases_info  = TensorInfo(bias_shape, 1, DataType::F32, _data_layout);
+        auto       dst_info     = TensorInfo(output_shape, 1, DataType::F32, _data_layout);
+        const auto conv_info    = Conv2dInfo{info, _dilation, ActivationLayerInfo(), false, 1};
+
+        auto conv = std::make_unique<FunctionType>();
+        conv->configure(&src_info, &weights_info, &biases_info, &dst_info, conv_info);
+        ARM_COMPUTE_ASSERT(conv->validate(&src_info, &weights_info, &biases_info, &dst_info, conv_info));
+
+        // Create tensors
+        auto src     = create_tensor<Tensor>(src_info);
+        auto weights = create_tensor<Tensor>(weights_info);
+        auto biases  = create_tensor<Tensor>(biases_info);
+        auto dst     = create_tensor<Tensor>(dst_info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src},
+                             {arm_compute::TensorType::ACL_SRC_1, &weights},
+                             {arm_compute::TensorType::ACL_SRC_2, &biases},
+                             {arm_compute::TensorType::ACL_DST, &dst}};
+        ITensorPack prep_pack{{arm_compute::TensorType::ACL_SRC_1, &weights},
+                              {arm_compute::TensorType::ACL_SRC_2, &biases}};
+
+        auto const aux_mem_req = conv->workspace();
+        auto       mg          = MemoryGroup{};
+        auto       ws          = manage_workspace<Tensor>(aux_mem_req, mg, run_pack, prep_pack);
+
+        // Fill tensors
+        fill(AccessorType(src), 0 + _hash);
+        fill(AccessorType(weights), 1 + _hash);
+        fill(AccessorType(biases), 2 + _hash);
+
+        conv->prepare(prep_pack);
+        conv->run(run_pack);
+
+        src.allocator()->free();
+        weights.allocator()->free();
+        biases.allocator()->free();
+
+        return dst;
+    }
+
+    SimpleTensor<float> compute_reference(const TensorShape   &input_shape,
+                                          const TensorShape   &weights_shape,
+                                          const TensorShape   &bias_shape,
+                                          const TensorShape   &output_shape,
+                                          const PadStrideInfo &info)
+    {
+        // Create reference
+        SimpleTensor<float> src{input_shape, DataType::F32};
+        SimpleTensor<float> weights{weights_shape, DataType::F32};
+        SimpleTensor<float> bias{bias_shape, DataType::F32};
+
+        fill(src, 0 + _hash);
+        fill(weights, 1 + _hash);
+        fill(bias, 2 + _hash);
+
+        return reference::convolution_layer<float>(src, weights, bias, output_shape, info, _dilation);
+    }
+
+    TensorType          _target{};
+    int32_t             _hash{0};
+    SimpleTensor<float> _reference{};
+    Size2D              _dilation{};
+    DataLayout          _data_layout{DataLayout::NHWC};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUGEMMDIRECTCONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/CpuMulFixture.h b/tests/validation/fixtures/CpuMulFixture.h
new file mode 100644
index 0000000000..ec16c9fa1f
--- /dev/null
+++ b/tests/validation/fixtures/CpuMulFixture.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUMULFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUMULFIXTURE_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2, typename T3 = T2>
+class CpuMulGenericValidationFixture : public framework::Fixture
+{
+public:
+    void setup(const TensorShape &shape0,
+               const TensorShape &shape1,
+               DataType           dt_in1,
+               DataType           dt_in2,
+               DataType           dt_out,
+               float              scale,
+               ConvertPolicy      convert_policy,
+               RoundingPolicy     rounding_policy,
+               bool               is_inplace)
+    {
+        if (std::is_same<TensorType, Tensor>::value && // Cpu
+            (dt_in1 == DataType::F16 || dt_in2 == DataType::F16 || dt_out == DataType::F16) &&
+            !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
+        _is_inplace = is_inplace;
+        _target     = compute_target(shape0, shape1, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy,
+                                     QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo());
+        _reference =
+            compute_reference(shape0, shape1, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy,
+                              QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo());
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, unsigned int seed_offset)
+    {
+        library->fill_tensor_uniform(tensor, seed_offset);
+    }
+
+    TensorType compute_target(const TensorShape  &shape0,
+                              const TensorShape  &shape1,
+                              DataType            dt_in1,
+                              DataType            dt_in2,
+                              DataType            dt_out,
+                              float               scale,
+                              ConvertPolicy       convert_policy,
+                              RoundingPolicy      rounding_policy,
+                              QuantizationInfo    qinfo0,
+                              QuantizationInfo    qinfo1,
+                              QuantizationInfo    qinfo_out,
+                              ActivationLayerInfo act_info)
+    {
+        // Create tensors
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        src1      = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0);
+        TensorType        src2      = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1);
+        TensorType        dst       = create_tensor<TensorType>(out_shape, dt_out, 1, qinfo_out);
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst = &dst;
+        if (_is_inplace)
+        {
+            bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) &&
+                                   (qinfo0 == qinfo_out) && (dt_in1 == dt_out);
+            bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) &&
+                                   (qinfo1 == qinfo_out) && (dt_in2 == dt_out);
+            bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if (src1_is_inplace)
+            {
+                actual_dst = &src1;
+            }
+            else
+            {
+                actual_dst = &src2;
+            }
+        }
+
+        auto allocate_tensor = [](TensorType &t)
+        {
+            ARM_COMPUTE_ASSERT(t.info()->is_resizable());
+            t.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!t.info()->is_resizable());
+        };
+
+        // Create and configure function
+        FunctionType multiply;
+        multiply.configure(src1.info(), src2.info(), actual_dst->info(), scale, convert_policy, rounding_policy,
+                           act_info);
+
+        allocate_tensor(src1);
+        allocate_tensor(src2);
+
+        // If don't do in-place computation, still need to allocate original dst
+        if (!_is_inplace)
+        {
+            allocate_tensor(dst);
+        }
+
+        // Fill tensors
+        fill(AccessorType(src1), 0);
+        fill(AccessorType(src2), 1);
+
+        // Compute function
+        ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src1},
+                             {arm_compute::TensorType::ACL_SRC_1, &src2},
+                             {arm_compute::TensorType::ACL_DST, actual_dst}};
+        multiply.run(run_pack);
+
+        return std::move(*actual_dst);
+    }
+
+    SimpleTensor<T3> compute_reference(const TensorShape  &shape0,
+                                       const TensorShape  &shape1,
+                                       DataType            dt_in1,
+                                       DataType            dt_in2,
+                                       DataType            dt_out,
+                                       float               scale,
+                                       ConvertPolicy       convert_policy,
+                                       RoundingPolicy      rounding_policy,
+                                       QuantizationInfo    qinfo0,
+                                       QuantizationInfo    qinfo1,
+                                       QuantizationInfo    qinfo_out,
+                                       ActivationLayerInfo act_info)
+    {
+        // Create reference
+        SimpleTensor<T1> src1{shape0, dt_in1, 1, qinfo0};
+        SimpleTensor<T2> src2{shape1, dt_in2, 1, qinfo1};
+
+        // Fill reference
+        fill(src1, 0);
+        fill(src2, 1);
+
+        auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy,
+                                                                       rounding_policy, dt_out, qinfo_out);
+        return act_info.enabled() ? reference::activation_layer(result, act_info, qinfo_out) : result;
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T3> _reference{};
+    bool             _is_inplace{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2, typename T3 = T2>
+class CpuMulValidationFixture
+    : public CpuMulGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>
+{
+public:
+    void setup(const TensorShape &shape,
+               DataType           dt_in1,
+               DataType           dt_in2,
+               DataType           dt_out,
+               float              scale,
+               ConvertPolicy      convert_policy,
+               RoundingPolicy     rounding_policy,
+               bool               is_inplace)
+    {
+        CpuMulGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>::setup(
+            shape, shape, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy, is_inplace);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUMULFIXTURE_H
diff --git a/tests/validation/fixtures/CpuWinogradConv2dFixture.h b/tests/validation/fixtures/CpuWinogradConv2dFixture.h
new file mode 100644
index 0000000000..d390aded28
--- /dev/null
+++ b/tests/validation/fixtures/CpuWinogradConv2dFixture.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CPUWINOGRADCONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CPUWINOGRADCONV2DFIXTURE_H
+
+#include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class CpuWinogradConv2dValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape         input_shape,
+               TensorShape         weights_shape,
+               TensorShape         bias_shape,
+               TensorShape         output_shape,
+               PadStrideInfo       info,
+               Size2D              dilation,
+               ActivationLayerInfo act_info)
+    {
+        ARM_COMPUTE_UNUSED(dilation);
+        _act_info = act_info;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i, float min, float max)
+    {
+        std::uniform_real_distribution<float> distribution(min, max);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(TensorShape          input_shape,
+                              TensorShape          weights_shape,
+                              TensorShape          bias_shape,
+                              TensorShape          output_shape,
+                              const PadStrideInfo &info)
+    {
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+        permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+
+        // Create and configure function
+        auto conv = std::make_unique<FunctionType>();
+        ARM_COMPUTE_EXPECT(static_cast<bool>(conv->validate(src.info(), weights.info(), bias.info(), dst.info(), info,
+                                                            _act_info, true)),
+                           framework::LogLevel::ERRORS);
+        conv->configure(src.info(), weights.info(), bias.info(), dst.info(), info, _act_info, true);
+
+        ARM_COMPUTE_ASSERT(src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+        add_padding_x({&src, &weights, &bias, &dst}, _data_layout);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        dst.allocator()->allocate();
+        bias.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!weights.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+        // Fill tensors
+        fill(AccessorType(src), 0, -0.5f, 0.5f);
+        fill(AccessorType(weights), 1, -0.5f, 0.5f);
+        fill(AccessorType(bias), 2, -0.5f, 0.5f);
+
+        // Compute function
+        ITensorPack run_pack  = {{ACL_SRC_0, &src}, {ACL_SRC_1, &weights}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}};
+        ITensorPack prep_pack = {{ACL_SRC_1, &weights}, {ACL_SRC_2, &bias}};
+
+        auto const aux_mem_req = conv->workspace();
+        auto       mg          = MemoryGroup{};
+        auto       ws          = manage_workspace<Tensor>(aux_mem_req, mg, run_pack, prep_pack);
+
+        conv->prepare(prep_pack);
+        conv->run(run_pack);
+
+        src.allocator()->free();
+        weights.allocator()->free();
+        bias.allocator()->free();
+
+        return dst;
+    }
+
+    SimpleTensor<float> compute_reference(const TensorShape   &input_shape,
+                                          const TensorShape   &weights_shape,
+                                          const TensorShape   &bias_shape,
+                                          const PadStrideInfo &info)
+    {
+        // Create reference
+        SimpleTensor<float> src_t{input_shape, _data_type, 1};
+        SimpleTensor<float> weights_t{weights_shape, _data_type, 1};
+        SimpleTensor<float> bias_t{bias_shape, _data_type, 1};
+
+        // Fill reference
+        fill(src_t, 0, -0.5f, 0.5f);
+        SimpleTensor<float> src_t1(copy_tensor<float, float>(src_t));
+
+        fill(weights_t, 1, -0.5f, 0.5f);
+        SimpleTensor<float> weights_t1(copy_tensor<float, float>(weights_t));
+        fill(bias_t, 2, -0.5f, 0.5f);
+        SimpleTensor<float> bias_t1(copy_tensor<float, float>(bias_t));
+
+        // Set output tile
+        Size2D output_tile(4U, 4U);
+        if (weights_shape[0] == 7 && weights_shape[1] == 1)
+        {
+            output_tile.width  = 2;
+            output_tile.height = 1;
+        }
+        else if (weights_shape[0] == 1 && weights_shape[1] == 7)
+        {
+            output_tile.width  = 1;
+            output_tile.height = 2;
+        }
+        else if (weights_shape[0] == 1)
+        {
+            output_tile.width = 1;
+        }
+        else if (weights_shape[1] == 1)
+        {
+            output_tile.height = 1;
+        }
+
+        WinogradInfo winograd_info(output_tile, Size2D(weights_shape[0], weights_shape[1]),
+                                   Size2D(input_shape[0], input_shape[1]), info, src_t1.data_layout());
+
+        // Compute tensor shapes for input, filter and output transforms
+        TensorShape input_transform_shape =
+            compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, _data_type), winograd_info);
+        TensorShape filter_transform_shape =
+            compute_winograd_filter_transform_shape(TensorInfo(weights_shape, 1, _data_type), winograd_info);
+        TensorShape batched_gemm_shape = input_transform_shape;
+        batched_gemm_shape[0]          = filter_transform_shape[0];
+        TensorShape output_transform_shape =
+            compute_winograd_output_transform_shape(TensorInfo(batched_gemm_shape, 1, _data_type), winograd_info);
+
+        // Dummy matrix C to perform matrix multiplication
+        SimpleTensor<float> dummy_c{batched_gemm_shape, _data_type, 1};
+
+        // Compute Winograd-based convolution
+        SimpleTensor<float> input_transform_out =
+            reference::winograd_input_transform<float>(src_t1, input_transform_shape, winograd_info);
+
+        SimpleTensor<float> filter_transform_out =
+            reference::winograd_filter_transform<float>(weights_t1, filter_transform_shape, winograd_info);
+        SimpleTensor<float> batched_gemm =
+            reference::gemm<float>(input_transform_out, filter_transform_out, dummy_c, 1.0f, 0.0f);
+        SimpleTensor<float> conv_out =
+            reference::winograd_output_transform<float>(batched_gemm, bias_t1, output_transform_shape, winograd_info);
+        SimpleTensor<float> conv_out_t(copy_tensor<float, float>(conv_out));
+        return (_act_info.enabled()) ? reference::activation_layer<float>(conv_out_t, _act_info) : conv_out_t;
+    }
+
+    TensorType          _target{};
+    SimpleTensor<float> _reference{};
+    ActivationLayerInfo _act_info{};
+    DataType            _data_type{DataType::F32};
+    DataLayout          _data_layout{DataLayout::NHWC};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CPUWINOGRADCONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h
index 30a3fd8569..51db5e0947 100644
--- a/tests/validation/fixtures/CropResizeFixture.h
+++ b/tests/validation/fixtures/CropResizeFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
-#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_CROPRESIZEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_CROPRESIZEFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -49,6 +49,12 @@ public:
     void setup(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method,
                float extrapolation_value, bool is_outside_bounds, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type);
         _reference = compute_reference(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type);
     }
@@ -131,4 +137,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_CROPRESIZEFIXTURE_H
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index 83170c413c..30443cc742 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,6 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DECONVOLUTIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DECONVOLUTIONLAYERFIXTURE_H
+
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
@@ -53,6 +57,12 @@ public:
                DataType data_type, DataType weights_data_type, DataLayout data_layout,
                QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, QuantizationInfo weights_quantization_info, bool add_bias)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _data_type                 = data_type;
         _weights_data_type         = weights_data_type;
         _bias_data_type            = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
@@ -248,6 +258,12 @@ public:
     void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
                unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
@@ -267,6 +283,12 @@ public:
     void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top,
                unsigned int pad_bottom, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(sx, sy, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::CEIL);
@@ -286,6 +308,12 @@ public:
     void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
                unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
@@ -307,6 +335,12 @@ public:
                unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias,
                DataType weights_data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
@@ -331,3 +365,5 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DECONVOLUTIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h
index f55d20bf3e..7a60ca8bf5 100644
--- a/tests/validation/fixtures/DepthConvertLayerFixture.h
+++ b/tests/validation/fixtures/DepthConvertLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE
-#define ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DEPTHCONVERTLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DEPTHCONVERTLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -47,6 +47,12 @@ class DepthConvertLayerValidationBaseFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (dt_in == DataType::F16 || dt_out == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _shift             = shift;
         _quantization_info = quantization_info;
         _target            = compute_target(shape, dt_in, dt_out, policy, shift);
@@ -149,4 +155,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DEPTHCONVERTLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 6e2e3a3846..055e74de89 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -91,6 +91,15 @@ public:
                DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false)
     {
         ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
+
+        _skip_test = false;
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (input_data_type == DataType::F16 || weights_data_type == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            _skip_test = true;
+            return;
+        }
+
         // This hash is used by random generators. There may be hash collisions but
         // this is intentional as it's a very easy way to make the the current
         // random generation process almost different for many test configurations,
@@ -374,6 +383,7 @@ protected:
     bool                _in_place{ false };
     bool                _run_twice{ false };
     bool                _use_dynamic_output_quant{false};
+    bool                _skip_test{false};
 
     int32_t _hash{0};
     // Random initialization limits
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index 4eb25a5bc5..165cd423df 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DEQUANTIZATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DEQUANTIZATIONLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -49,6 +49,12 @@ class DequantizationValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType src_data_type, DataType dst_datatype, DataLayout data_layout)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (src_data_type == DataType::F16 || dst_datatype == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _quantization_info = generate_quantization_info(src_data_type, shape.z());
         _target            = compute_target(shape, src_data_type, dst_datatype, data_layout);
         _reference         = compute_reference(shape, src_data_type);
@@ -164,4 +170,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DEQUANTIZATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h
index e27a41a23b..fb8db15a47 100644
--- a/tests/validation/fixtures/DirectConvolution3DFixture.h
+++ b/tests/validation/fixtures/DirectConvolution3DFixture.h
@@ -54,6 +54,12 @@ public:
     {
         ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NDHWC);
 
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const TensorShape weights_shape(num_kernels, input_shape[0], kernel_width, kernel_height, kernel_depth);
         const TensorShape bias_shape(num_kernels);
         const DataType    bias_data_type = is_data_type_quantized(data_type) ? DataType::S32 : data_type;
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index 6f204642ca..debfce9142 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -93,6 +93,12 @@ public:
     void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels,
                DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout, bool mixed_layout = false)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         // This hash is used by random generators. There may be hash collisions but
         // this is intentional as it's a very easy way to make the the current
         // random generation process almost different for many test configurations,
@@ -133,6 +139,12 @@ public:
         ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN);
         ARM_COMPUTE_UNUSED(dilation);
 
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         // This hash is used by random generators. There may be hash collisions but
         // this is intentional as it's a very easy way to make the the current
         // random generation process almost different for many test configurations,
diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h
index f36a1f75b7..0c809b001b 100644
--- a/tests/validation/fixtures/ElementwiseOperationsFixture.h
+++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,13 @@ public:
                DataType data_type0, DataType data_type1, DataType output_data_type,
                QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace = false, bool use_dynamic_shape = false)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (data_type0 == DataType::F16 || data_type1 == DataType::F16 || output_data_type == DataType::F16) &&
+            !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _op                = op;
         _use_dynamic_shape = use_dynamic_shape;
         _is_inplace        = is_inplace;
diff --git a/tests/validation/fixtures/ElementwiseUnaryFixture.h b/tests/validation/fixtures/ElementwiseUnaryFixture.h
index 15344288db..70f6ea9172 100644
--- a/tests/validation/fixtures/ElementwiseUnaryFixture.h
+++ b/tests/validation/fixtures/ElementwiseUnaryFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE
-#define ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEUNARYFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEUNARYFIXTURE_H
 
 #include "arm_compute/core/QuantizationInfo.h"
 #include "arm_compute/core/TensorShape.h"
@@ -53,6 +53,12 @@ public:
     void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op,
                bool use_dynamic_shape = false, QuantizationInfo qinfo = QuantizationInfo(), QuantizationInfo qinfo_out = QuantizationInfo())
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            input_data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _op                = op;
         _target            = compute_target(input_shape, input_data_type, in_place, qinfo, qinfo_out);
         _reference         = compute_reference(input_shape, input_data_type, qinfo, qinfo_out);
@@ -444,4 +450,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEUNARYFIXTURE_H
diff --git a/tests/validation/fixtures/FlattenLayerFixture.h b/tests/validation/fixtures/FlattenLayerFixture.h
index e72487c7cf..ee48e1d6cf 100644
--- a/tests/validation/fixtures/FlattenLayerFixture.h
+++ b/tests/validation/fixtures/FlattenLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_FLATTENLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_FLATTENLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -52,6 +52,12 @@ class FlattenLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         TensorShape shape_flatten;
         TensorInfo  input_info(shape, 1, data_type);
         shape_flatten = compute_flatten_shape(&input_info);
@@ -118,4 +124,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_FLATTENLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/FloorFixture.h b/tests/validation/fixtures/FloorFixture.h
index 7d38666f47..5cbf2b8e9c 100644
--- a/tests/validation/fixtures/FloorFixture.h
+++ b/tests/validation/fixtures/FloorFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_FLOOR_FIXTURE
-#define ARM_COMPUTE_TEST_FLOOR_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_FLOORFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_FLOORFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,12 @@ class FloorValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, data_type);
         _reference = compute_reference(shape, data_type);
     }
@@ -103,4 +109,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_FLOOR_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_FLOORFIXTURE_H
diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h
index 344187868f..481a3b7659 100644
--- a/tests/validation/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h
@@ -92,6 +92,12 @@ public:
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights,
                DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info, bool mixed_layout = false)
     {
+        if(std::is_same<TensorType, Tensor>::value && // Cpu
+            data_type==DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         ARM_COMPUTE_UNUSED(weights_shape);
         ARM_COMPUTE_UNUSED(bias_shape);
 
@@ -459,6 +465,12 @@ public:
     void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape,
                DataType data_type, ActivationLayerInfo activation_info, bool constant_weights, bool constant_bias, bool weights_reshaped, bool remove_bias = false)
     {
+        if(std::is_same<TensorType, Tensor>::value && // Cpu
+            data_type==DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _data_type = data_type;
 
         const bool     is_quantized   = is_data_type_quantized(data_type);
diff --git a/tests/validation/fixtures/FuseBatchNormalizationFixture.h b/tests/validation/fixtures/FuseBatchNormalizationFixture.h
index a05e4169a7..61affff6ba 100644
--- a/tests/validation/fixtures/FuseBatchNormalizationFixture.h
+++ b/tests/validation/fixtures/FuseBatchNormalizationFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_FIXTURE
-#define ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_FUSEBATCHNORMALIZATIONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_FUSEBATCHNORMALIZATIONFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -49,6 +49,12 @@ class FuseBatchNormalizationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape_w, DataType data_type, DataLayout data_layout, bool in_place, bool with_bias, bool with_gamma, bool with_beta)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         std::tie(_target_w, _target_b)       = compute_target(shape_w, data_type, data_layout, in_place, with_bias, with_gamma, with_beta);
         std::tie(_reference_w, _reference_b) = compute_reference(shape_w, data_type, with_bias, with_gamma, with_beta);
     }
@@ -202,4 +208,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_FUSEBATCHNORMALIZATION_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_FUSEBATCHNORMALIZATIONFIXTURE_H
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index 94bedc83e1..34c0574412 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -51,6 +51,12 @@ class GEMMGenericValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         ARM_COMPUTE_UNUSED(pretranspose);
         _target    = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate);
         _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate);
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index aa4eedb75d..854442b174 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -25,6 +25,7 @@
 #define ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
 
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "src/core/utils/quantization/AsymmHelpers.h"
 #include "tests/validation/Helpers.h"
 #include "tests/framework/Fixture.h"
@@ -91,18 +92,113 @@ struct TensorFillInfo
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
-TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+TensorType compute_gemmlowp_target_for_updated_sq_info_after_config(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
                                    const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
                                    GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(),
-                                   bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN)
+                                   bool accumulate = false, DataType data_type_output = DataType::UNKNOWN)
 {
+    ARM_COMPUTE_ASSERT((std::is_same<FunctionType, NEGEMMLowpMatrixMultiplyCore>::value == true));
     ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
     ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
+
     // If unknown, set to sensible defaults
     if (data_type_output == DataType::UNKNOWN) {
         data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
     }
 
+    // Create tensors with fake quantization info and defer to pass the correct ones to a later stage.
+    auto qi = QuantizationInfo(0.550721, -37, true);
+    TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1, qi);
+    TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1, qi);
+    TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, qi);
+
+    TensorType bias;
+    if(is_fused)
+    {
+        TensorShape bias_shape(shape_b[0]);
+        bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1);
+    }
+
+    // Create and configure function
+    // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
+    FunctionType gemmlowp;
+
+    gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false,
+                                                                             output_stage, false /*fp_mixed_precision*/, false /*fast_math*/, false /*broadcast_bias*/,
+                                                                             arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED,
+                                                                             false /* pretranspose_B */, accumulate));
+
+    ARM_COMPUTE_ASSERT(a.info()->is_resizable());
+    ARM_COMPUTE_ASSERT(b.info()->is_resizable());
+    ARM_COMPUTE_ASSERT(output.info()->is_resizable());
+
+    add_padding_x({ &a, &b, &output });
+
+    // Allocate tensors
+    a.allocator()->allocate();
+    b.allocator()->allocate();
+    output.allocator()->allocate();
+
+    ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
+    ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
+    ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
+
+    // Fill tensors
+    fill_quantized(AccessorType(a), 0 + finfo.hash);
+    fill_quantized(AccessorType(b), 1 + finfo.hash);
+
+    if (accumulate)
+    {
+        ARM_COMPUTE_ASSERT(accumulate != run_twice);
+        fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output);
+    }
+
+    if(is_fused)
+    {
+        ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+        bias.allocator()->allocate();
+        ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+        fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
+    }
+
+    // Run with variable inputs.
+    if(run_twice)
+    {
+        gemmlowp.run();
+        fill_quantized(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run
+        fill_quantized(AccessorType(b), 4 + finfo.hash);
+        if(is_fused)
+        {
+            fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
+        }
+    }
+
+    // now properly set the correct quantization info and update ACL
+    a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true));
+    b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true));
+    output.info()->set_quantization_info(QuantizationInfo(output_qinfo.scale(), output_qinfo.offset(), true));
+
+    // propagate trough ACL the correct quantization info
+    NEGEMMLowpMatrixMultiplyCore *lp = reinterpret_cast<NEGEMMLowpMatrixMultiplyCore *>(&gemmlowp);
+    lp->update_quantization_parameters();
+
+    // Compute GEMM function
+    gemmlowp.run();
+    return output;
+}
+
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
+TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                   const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
+                                   GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(),
+                                   bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN)
+{
+    ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+        // If unknown, set to sensible defaults
+    if (data_type_output == DataType::UNKNOWN) {
+        data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
+    }
+
     // Create tensors
     TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo);
     TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
@@ -185,7 +281,6 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
                                                  DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo())
 {
     ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
-    ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
     TensorShape shape_a_to_use = shape_a;
     if(reinterpret_input_as_3d)
     {
@@ -412,7 +507,7 @@ public:
      *
      */
     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type,
-               bool reshape_b_only_on_first_run)
+               bool reshape_b_only_on_first_run, bool updated_sq_info_after_config = false)
     {
         ARM_COMPUTE_ASSERT(output_stage_type != GEMMLowpOutputStageType::NONE);
         ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type));
@@ -429,15 +524,23 @@ public:
         init_gemmlowp_output_stage_info(data_type, a_qinfo, b_qinfo, output_qinfo, output_stage_type, output_stage);
 
         _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, output_stage, finfo);
-        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo, updated_sq_info_after_config);
     }
 
 protected:
     TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo,
-                              DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo())
+                              DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(), bool updated_sq_info_after_config = false)
     {
-        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo,
-                b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo);
+        if (updated_sq_info_after_config)
+        {
+            return compute_gemmlowp_target_for_updated_sq_info_after_config<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo,
+                    b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo);
+        }
+        else
+        {
+            return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo,
+                    b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo);
+        }
     }
 
     SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
@@ -472,29 +575,59 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo
 class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture
 {
 public:
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, DataType data_type_a, DataType data_type_b, bool accumulate)
     {
         const bool dynamic_qinfo = false;
         const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
         const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset);
         TensorFillInfo finfo;
-        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
-        _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo,
+                                    accumulate, dynamic_qinfo);
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b,
+                                       finfo, accumulate, dynamic_qinfo);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, DataType data_type_a, DataType data_type_b, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
     {
         const auto output_qinfo = QuantizationInfo();
-        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32);
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type_a, data_type_b, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32);
     }
 
-    SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate, const bool dynamic_qinfo)
+    SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, DataType data_type_a, DataType data_type_b, const TensorFillInfo& finfo, bool accumulate, const bool dynamic_qinfo)
     {
         QuantizationInfo s32_ref_output_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0, dynamic_qinfo);
 
-        SimpleTensor<int32_t> s32_ref_output =  compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo,
-        DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo);
+        SimpleTensor<int32_t> s32_ref_output;
+        if (data_type_a == DataType::QASYMM8)
+        {
+            if (data_type_b == DataType::QASYMM8)
+            {
+                s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(
+                    shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo);
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR_ON(data_type_b != DataType::QASYMM8_SIGNED);
+                s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, int8_t, false, false, run_twice>(
+                    shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo);
+            }
+        }
+        else
+        {
+            ARM_COMPUTE_ERROR_ON(data_type_a != DataType::QASYMM8_SIGNED);
+            if (data_type_b == DataType::QASYMM8)
+            {
+                ARM_COMPUTE_ERROR("QASYMM8_SIGNED input with QASYMM8 weights not supported");
+            }
+            else
+            {
+                ARM_COMPUTE_ERROR_ON(data_type_b != DataType::QASYMM8_SIGNED);
+                s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(
+                    shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo);
+            }
+        }
+
         s32_ref_output.quantization_info(s32_ref_output_quant_info);
 
         SimpleTensor<float> f32_ref_output(s32_ref_output.shape(), DataType::F32);
diff --git a/tests/validation/fixtures/Im2ColFixture.h b/tests/validation/fixtures/Im2ColFixture.h
index 5c7978f4ab..8fb53c35b4 100644
--- a/tests/validation/fixtures/Im2ColFixture.h
+++ b/tests/validation/fixtures/Im2ColFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_IM2COL_FIXTURE
-#define ARM_COMPUTE_TEST_IM2COL_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_IM2COLFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_IM2COLFIXTURE_H
 
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorShape.h"
@@ -51,6 +51,12 @@ public:
     void setup(TensorShape input_shape, DataType data_type, const Size2D &kernel_dims, const PadStrideInfo &conv_info, const QuantizationInfo &quant_info, const DataLayout &data_layout,
                unsigned int num_groups)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _kernel_dims = kernel_dims;
         _conv_info   = conv_info;
         _quant_info  = quant_info;
@@ -136,4 +142,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_IM2COL_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_IM2COLFIXTURE_H
diff --git a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h
index c26dd99f02..b78b742e09 100644
--- a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_INSTANCENORMALIZATION_FIXTURE
-#define ARM_COMPUTE_TEST_INSTANCENORMALIZATION_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_INSTANCENORMALIZATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_INSTANCENORMALIZATIONLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,12 @@ class InstanceNormalizationLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type, DataLayout data_layout, bool in_place)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, data_type, data_layout, in_place);
         _reference = compute_reference(shape, data_type);
     }
@@ -146,4 +152,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_INSTANCENORMALIZATION_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_INSTANCENORMALIZATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h
index b8f4b1eaf3..9e65f1eaa5 100644
--- a/tests/validation/fixtures/L2NormalizeLayerFixture.h
+++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_L2NORMALIZELAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_L2NORMALIZELAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -50,6 +50,12 @@ class L2NormalizeLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type, DataLayout data_layout, int axis, float epsilon)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, data_type, data_layout, axis, epsilon);
         _reference = compute_reference(shape, data_type, data_layout, axis, epsilon);
     }
@@ -134,4 +140,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_L2NORMALIZELAYERFIXTURE_H
diff --git a/tests/validation/fixtures/LSTMLayerFixture.h b/tests/validation/fixtures/LSTMLayerFixture.h
index a32e9adfe5..fa7c7d1d90 100644
--- a/tests/validation/fixtures/LSTMLayerFixture.h
+++ b/tests/validation/fixtures/LSTMLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2023 Arm Limited.
+ * Copyright (c) 2018-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_LSTM_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_LSTM_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_LSTMLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_LSTMLAYERFIXTURE_H
+
+#include "arm_compute/runtime/Allocator.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
 
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
@@ -48,10 +54,16 @@ class LSTMLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, TensorShape input_weights_shape, TensorShape recurrent_weights_shape, TensorShape cell_bias_shape, TensorShape output_cell_shape, TensorShape output_shape,
                TensorShape scratch_shape, ActivationLayerInfo info, float cell_threshold, float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt,
-               bool use_layer_norm)
+               bool use_layer_norm, bool use_memory_manager)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target = compute_target(input_shape, input_weights_shape, recurrent_weights_shape, cell_bias_shape, output_cell_shape, output_shape, scratch_shape, info, cell_threshold, projection_threshold,
-                                 data_type, projection_opt, peephole_opt, use_layer_norm);
+                                 data_type, projection_opt, peephole_opt, use_layer_norm, use_memory_manager);
         _reference = compute_reference(input_shape, input_weights_shape, recurrent_weights_shape, cell_bias_shape, output_cell_shape, output_shape, scratch_shape, info, cell_threshold, projection_threshold,
                                        data_type, projection_opt, peephole_opt, use_layer_norm);
     }
@@ -77,7 +89,7 @@ protected:
     }
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &input_weights_shape, const TensorShape &recurrent_weights_shape, const TensorShape &cell_bias_shape,
                               const TensorShape &output_cell_shape, const TensorShape &output_shape, const TensorShape &scratch_shape, ActivationLayerInfo info, float cell_threshold,
-                              float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt, bool use_layer_norm)
+                              float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt, bool use_layer_norm, bool use_memory_manager)
     {
         const unsigned int num_cells   = input_weights_shape.y();
         const unsigned int num_outputs = recurrent_weights_shape.x();
@@ -159,7 +171,17 @@ protected:
         }
 
         // Create and configure function
-        FunctionType lstm;
+        std::shared_ptr<MemoryManagerOnDemand> mm = nullptr;
+
+        if(use_memory_manager)
+        {
+            auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+            auto pool_mgr = std::make_shared<PoolManager>();
+            mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+        }
+
+        FunctionType lstm(mm);
+
         lstm.configure(&input, &input_to_forget_w, &input_to_cell_w, &input_to_output_w, &recurrent_to_forget_w,
                        &recurrent_to_cell_w, &recurrent_to_output_w, &forget_gate_bias, &cell_bias, &output_gate_bias,
                        &output_state_in, &cell_state_in,
@@ -314,8 +336,27 @@ protected:
         }
 
         // Compute function
+        if(use_memory_manager)
+        {
+            if(std::is_same<TensorType, Tensor>::value)
+            {
+                Allocator alloc{};
+                mm->populate(alloc, 1);
+            }
+            else
+            {
+                CLBufferAllocator alloc{};
+                mm->populate(alloc, 1);
+            }
+        }
+
         lstm.run();
 
+        if(use_memory_manager)
+        {
+            mm->clear();
+        }
+
         _target_scratch = std::move(scratch);
         return output;
     }
@@ -535,4 +576,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_LSTM_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_LSTMLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h
index ffd12e56d0..4ccd2b8266 100644
--- a/tests/validation/fixtures/MatMulFixture.h
+++ b/tests/validation/fixtures/MatMulFixture.h
@@ -65,6 +65,12 @@ public:
                QuantizationInfo    b_qinfo = QuantizationInfo(),
                QuantizationInfo    o_qinfo = QuantizationInfo())
     {
+        if(std::is_same<TensorType, Tensor>::value && // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         // For brevity, the input shapes are assumed to be not-transposed for both a and b matrices.
         if (transpose_a)
         {
diff --git a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h
index 808e3ffabd..1fd2049272 100644
--- a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h
+++ b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, 2023 Arm Limited.
+ * Copyright (c) 2020-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_MAXUNPOOLINGLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_MAXUNPOOLINGLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ class MaxUnpoolingLayerValidationGenericFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         std::mt19937                    gen(library->seed());
         std::uniform_int_distribution<> offset_dis(0, 20);
         const float                     scale     = data_type == DataType::QASYMM8_SIGNED ? 1.f / 127.f : 1.f / 255.f;
@@ -159,4 +165,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_MAXUNPOOLINGLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h
index bf5d20790c..f8176e82ae 100644
--- a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_MEANSTDDEVNORMALIZATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_MEANSTDDEVNORMALIZATIONLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,12 @@ class MeanStdDevNormalizationLayerValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType dt, bool in_place, float epsilon = 1e-8)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            dt == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         QuantizationInfo qi = QuantizationInfo(0.5f, 10);
         _data_type          = dt;
         _target             = compute_target(shape, dt, in_place, epsilon, qi);
@@ -128,4 +134,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MEAN_STDDEV_NORMALIZATION_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_MEANSTDDEVNORMALIZATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/NormalizationLayerFixture.h b/tests/validation/fixtures/NormalizationLayerFixture.h
index ddaa3533f5..06ec88b5af 100644
--- a/tests/validation/fixtures/NormalizationLayerFixture.h
+++ b/tests/validation/fixtures/NormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_NORMALIZATION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_NORMALIZATION_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_NORMALIZATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_NORMALIZATIONLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ class NormalizationValidationGenericFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, DataLayout data_layout)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         NormalizationLayerInfo info(norm_type, norm_size, 5, beta, 1.f, is_scaled);
 
         _target    = compute_target(shape, info, data_type, data_layout);
@@ -126,4 +132,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_NORMALIZATION_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_NORMALIZATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/PadLayerFixture.h b/tests/validation/fixtures/PadLayerFixture.h
index 93b43616ff..10c7cdab82 100644
--- a/tests/validation/fixtures/PadLayerFixture.h
+++ b/tests/validation/fixtures/PadLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_PADLAYER_FIXTURE
-#define ARM_COMPUTE_TEST_PADLAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_PADLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_PADLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,12 @@ class PaddingFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type, const PaddingList &padding, const PaddingMode mode)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         PaddingList clamped_padding = padding;
         if(mode != PaddingMode::CONSTANT)
         {
@@ -132,4 +138,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_PADLAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_PADLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
index 4345d8a13f..213e7355a5 100644
--- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
+++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE
-#define ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_PIXELWISEMULTIPLICATIONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_PIXELWISEMULTIPLICATIONFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -58,6 +58,13 @@ public:
                ActivationLayerInfo act_info,
                bool                is_inplace)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (dt_in1 == DataType::F16 || dt_in2 == DataType::F16 || dt_out == DataType::F16) &&
+            !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _is_inplace = is_inplace;
         _target     = compute_target(shape0, shape1, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy, qinfo0, qinfo1, qinfo_out, act_info);
         _reference  = compute_reference(shape0, shape1, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy, qinfo0, qinfo1, qinfo_out, act_info);
@@ -233,4 +240,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_PIXELWISEMULTIPLICATIONFIXTURE_H
diff --git a/tests/validation/fixtures/Pooling3dLayerFixture.h b/tests/validation/fixtures/Pooling3dLayerFixture.h
index 1bdf615fb1..1d9ee58df9 100644
--- a/tests/validation/fixtures/Pooling3dLayerFixture.h
+++ b/tests/validation/fixtures/Pooling3dLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_POOLING3DLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_POOLING3DLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -47,6 +47,12 @@ class Pooling3dLayerValidationGenericFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, Pooling3dLayerInfo pool_info, DataType data_type, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo())
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, pool_info, data_type, input_qinfo, output_qinfo);
         _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo);
     }
@@ -161,4 +167,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_POOLING3DLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 59c920868b..c7265a0e8a 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_POOLINGLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_POOLINGLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ public:
     void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout, bool indices = false,
                QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo(), bool mixed_layout = false)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _mixed_layout = mixed_layout;
         _pool_info    = pool_info;
         _target       = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices);
@@ -225,4 +231,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_POOLING_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_POOLINGLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h
index 1b21967bda..1cc0a56399 100644
--- a/tests/validation/fixtures/QuantizationLayerFixture.h
+++ b/tests/validation/fixtures/QuantizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_QUANTIZATIONLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_QUANTIZATIONLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ class QuantizationValidationGenericFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type_in, DataType data_type_out, QuantizationInfo qinfo, QuantizationInfo qinfo_in)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            (data_type_in == DataType::F16 || data_type_out == DataType::F16) && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, data_type_in, data_type_out, qinfo, qinfo_in);
         _reference = compute_reference(shape, data_type_in, data_type_out, qinfo, qinfo_in);
     }
@@ -116,4 +122,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_QUANTIZATIONLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/RNNLayerFixture.h b/tests/validation/fixtures/RNNLayerFixture.h
index e9a05e7838..8741ef4fae 100644
--- a/tests/validation/fixtures/RNNLayerFixture.h
+++ b/tests/validation/fixtures/RNNLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_RNN_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_RNN_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_RNNLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_RNNLAYERFIXTURE_H
 
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
@@ -45,6 +45,12 @@ public:
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape recurrent_weights_shape, TensorShape bias_shape, TensorShape output_shape, ActivationLayerInfo info,
                DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(input_shape, weights_shape, recurrent_weights_shape, bias_shape, output_shape, info, data_type);
         _reference = compute_reference(input_shape, weights_shape, recurrent_weights_shape, bias_shape, output_shape, info, data_type);
     }
@@ -144,4 +150,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_RNN_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_RNNLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/ROIAlignLayerFixture.h b/tests/validation/fixtures/ROIAlignLayerFixture.h
index ad76dcbbd9..fd076862dd 100644
--- a/tests/validation/fixtures/ROIAlignLayerFixture.h
+++ b/tests/validation/fixtures/ROIAlignLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_ROIALIGNLAYER_FIXTURE
-#define ARM_COMPUTE_TEST_ROIALIGNLAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_ROIALIGNLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_ROIALIGNLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -47,6 +47,12 @@ class ROIAlignLayerGenericFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _rois_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::QASYMM16 : data_type;
         _target         = compute_target(input_shape, data_type, data_layout, pool_info, rois_shape, qinfo, output_qinfo);
         _reference      = compute_reference(input_shape, data_type, pool_info, rois_shape, qinfo, output_qinfo);
@@ -209,4 +215,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ROIALIGNLAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_ROIALIGNLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/RangeFixture.h b/tests/validation/fixtures/RangeFixture.h
index 166613a318..50682e979e 100644
--- a/tests/validation/fixtures/RangeFixture.h
+++ b/tests/validation/fixtures/RangeFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_RANGE_FIXTURE
-#define ARM_COMPUTE_TEST_RANGE_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_RANGEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_RANGEFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -57,6 +57,12 @@ class RangeFixture : public framework::Fixture
 public:
     void setup(const DataType data_type0, float start, float step, const QuantizationInfo qinfo0 = QuantizationInfo())
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type0 == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(data_type0, qinfo0, start, step);
         _reference = compute_reference(data_type0, qinfo0, start, step);
     }
@@ -138,4 +144,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_RANGE_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_RANGEFIXTURE_H
diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h
index e61941435c..9f18497095 100644
--- a/tests/validation/fixtures/ReduceMeanFixture.h
+++ b/tests/validation/fixtures/ReduceMeanFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_REDUCE_MEAN_FIXTURE
-#define ARM_COMPUTE_TEST_REDUCE_MEAN_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_REDUCEMEANFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_REDUCEMEANFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -49,6 +49,12 @@ class ReduceMeanValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
         _reference = compute_reference(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
     }
@@ -172,4 +178,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REDUCE_MEAN_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_REDUCEMEANFIXTURE_H
diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h
index b44f299486..fc9801d9fd 100644
--- a/tests/validation/fixtures/ReductionOperationFixture.h
+++ b/tests/validation/fixtures/ReductionOperationFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_REDUCTION_OPERATION_FIXTURE
-#define ARM_COMPUTE_TEST_REDUCTION_OPERATION_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_REDUCTIONOPERATIONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_REDUCTIONOPERATIONFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -47,6 +47,12 @@ class ReductionOperationValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info, bool keep_dims = false)
     {
+        if(std::is_same<TensorType, Tensor>::value && // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN);
         _keep_dims                = keep_dims && !is_arg_min_max;
 
@@ -166,4 +172,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REDUCTION_OPERATION_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_REDUCTIONOPERATIONFIXTURE_H
diff --git a/tests/validation/fixtures/ReverseFixture.h b/tests/validation/fixtures/ReverseFixture.h
index 856bff7b12..5bb8f876d2 100644
--- a/tests/validation/fixtures/ReverseFixture.h
+++ b/tests/validation/fixtures/ReverseFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,6 +47,12 @@ class ReverseValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, TensorShape axis_shape, DataType data_type, bool use_negative_axis = false, bool use_inverted_axis = false)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _num_dims  = shape.num_dimensions();
         _target    = compute_target(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis);
         _reference = compute_reference(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis);
diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h
index 86d89d71f7..03a7ca6ab3 100644
--- a/tests/validation/fixtures/ScaleFixture.h
+++ b/tests/validation/fixtures/ScaleFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,12 @@ public:
     void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy,
                bool align_corners, bool mixed_layout, QuantizationInfo output_quantization_info)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _shape                    = shape;
         _policy                   = policy;
         _border_mode              = border_mode;
diff --git a/tests/validation/fixtures/ScatterLayerFixture.h b/tests/validation/fixtures/ScatterLayerFixture.h
index af161ef98b..34c6023b09 100644
--- a/tests/validation/fixtures/ScatterLayerFixture.h
+++ b/tests/validation/fixtures/ScatterLayerFixture.h
@@ -25,7 +25,6 @@
 #define ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H
 
 #include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
diff --git a/tests/validation/fixtures/SelectFixture.h b/tests/validation/fixtures/SelectFixture.h
index 8cb6f062f9..eef86b808e 100644
--- a/tests/validation/fixtures/SelectFixture.h
+++ b/tests/validation/fixtures/SelectFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SELECT_FIXTURE
-#define ARM_COMPUTE_TEST_SELECT_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_SELECTFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_SELECTFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -65,6 +65,12 @@ class SelectValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, bool has_same_same_rank, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         TensorShape condition_shape = detail::select_condition_shape(shape, has_same_same_rank);
 
         _target    = compute_target(shape, condition_shape, data_type);
@@ -144,4 +150,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SELECT_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_SELECTFIXTURE_H
diff --git a/tests/validation/fixtures/SliceOperationsFixtures.h b/tests/validation/fixtures/SliceOperationsFixtures.h
index b1f91ea2e0..65b8fb88d2 100644
--- a/tests/validation/fixtures/SliceOperationsFixtures.h
+++ b/tests/validation/fixtures/SliceOperationsFixtures.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
-#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_SLICEOPERATIONSFIXTURES_H
+#define ACL_TESTS_VALIDATION_FIXTURES_SLICEOPERATIONSFIXTURES_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -47,6 +47,12 @@ class SliceFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, Coordinates starts, Coordinates ends, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, starts, ends, data_type);
         _reference = compute_reference(shape, starts, ends, data_type);
     }
@@ -112,6 +118,12 @@ public:
                int32_t begin_mask, int32_t end_mask, int32_t shrink_mask,
                DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, starts, ends, strides, begin_mask, end_mask, shrink_mask, data_type);
         _reference = compute_reference(shape, starts, ends, strides, begin_mask, end_mask, shrink_mask, data_type);
     }
@@ -176,4 +188,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_SLICEOPERATIONSFIXTURES_H
diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h
index f4bf8df9c0..399a8b70c4 100644
--- a/tests/validation/fixtures/SoftmaxLayerFixture.h
+++ b/tests/validation/fixtures/SoftmaxLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SOFTMAX_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_SOFTMAX_LAYER_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_SOFTMAXLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_SOFTMAXLAYERFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -48,6 +48,12 @@ class SoftmaxValidationGenericFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta, size_t axis)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _quantization_info = quantization_info;
 
         _reference = compute_reference(shape, data_type, quantization_info, beta, axis);
@@ -157,4 +163,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SOFTMAX_LAYER_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_SOFTMAXLAYERFIXTURE_H
diff --git a/tests/validation/fixtures/SplitFixture.h b/tests/validation/fixtures/SplitFixture.h
index 203925329c..79ce152671 100644
--- a/tests/validation/fixtures/SplitFixture.h
+++ b/tests/validation/fixtures/SplitFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SPLIT_FIXTURE
-#define ARM_COMPUTE_TEST_SPLIT_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_SPLITFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_SPLITFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -49,6 +49,12 @@ class SplitFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, unsigned int axis, unsigned int splits, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, axis, splits, data_type);
         _reference = compute_reference(shape, axis, splits, data_type);
     }
@@ -150,6 +156,12 @@ class SplitShapesFixture : public framework::Fixture
 public:
     void setup(TensorShape shape, unsigned int axis, std::vector<TensorShape> split_shapes, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(shape, axis, split_shapes, data_type);
         _reference = compute_reference(shape, axis, split_shapes, data_type);
     }
@@ -254,4 +266,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SPLIT_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_SPLITFIXTURE_H
diff --git a/tests/validation/fixtures/UnstackFixture.h b/tests/validation/fixtures/UnstackFixture.h
index 30b7dd5539..b543ea263c 100644
--- a/tests/validation/fixtures/UnstackFixture.h
+++ b/tests/validation/fixtures/UnstackFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_UNSTACK_FIXTURE
-#define ARM_COMPUTE_TEST_UNSTACK_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_UNSTACKFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_UNSTACKFIXTURE_H
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -49,6 +49,12 @@ class UnstackValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, int axis, int num, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(input_shape, axis, num, data_type);
         _reference = compute_reference(input_shape, axis, num, data_type);
     }
@@ -114,4 +120,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_UNSTACK_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_UNSTACKFIXTURE_H
diff --git a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h
index 20b678b36c..4d165a6563 100644
--- a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h
@@ -59,6 +59,12 @@ public:
                DataType data_type, ActivationLayerInfo act_info, const DataLayout &data_layout)
 
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         ARM_COMPUTE_UNUSED(dilation);
         _mixed_layout = mixed_layout;
         _target       = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, act_info, data_layout);
@@ -244,6 +250,12 @@ class WinogradInputTransformValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, WinogradInfo winograd_info, DataLayout data_layout, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         TensorShape output_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info);
         _mixed_layout            = mixed_layout;
         _target                  = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type);
@@ -355,6 +367,12 @@ class WinogradFilterTransformValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, Size2D output_tile, DataLayout data_layout, DataType data_type)
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         WinogradInfo winograd_info(output_tile, Size2D(input_shape[0], input_shape[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */);
         TensorShape  output_shape = compute_winograd_filter_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info);
 
@@ -469,6 +487,12 @@ class WinogradOutputTransformValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape input_shape, WinogradInfo winograd_info, DataType data_type, ActivationLayerInfo act_info = ActivationLayerInfo())
     {
+        if(std::is_same<TensorType, Tensor>::value &&  // Cpu
+            data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+
         _target    = compute_target(input_shape, winograd_info, data_type, act_info);
         _reference = compute_reference(input_shape, winograd_info, data_type, act_info);
     }
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
index 4c1cc94d3d..0ab90c675f 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
@@ -255,7 +255,6 @@ protected:
         // We reshape the gemm output back if the tensor is high dimensional
         if (output_shape_collapsed != output_shape)
         {
-            // std::cout << "called reshape: \n";
             result = reference::reshape_layer(result, output_shape);
         }
 
diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
index a7c8a784d9..97a7adaf54 100644
--- a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, 2022 Arm Limited.
+ * Copyright (c) 2019, 2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,29 +36,27 @@ namespace reference
 template <typename T>
 SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float epsilon)
 {
-    // Create reference
-    SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
-
-    const int cols       = src.shape()[0];
-    const int batch_size = src.shape()[1];
-
-    for(int i = 0; i < batch_size; ++i)
-    {
-        T sum    = static_cast<T>(0.f);
-        T sum_sq = static_cast<T>(0.f);
-        for(int j = 0; j < cols; ++j)
-        {
-            const T value = src[j + i * cols];
-            sum += value;
-            sum_sq += value * value;
-        }
-        const T mean       = sum / static_cast<T>(cols);
-        const T var        = ((sum_sq / static_cast<T>(cols)) - (mean * mean)) + static_cast<T>(epsilon);
-        const T stddev_inv = static_cast<T>(1.f) / static_cast<T>(std::sqrt(var));
-        for(int j = 0; j < cols; ++j)
-        {
-            dst[j + i * cols] = (src[j + i * cols] - mean) * stddev_inv;
-        }
+   SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
+   const int cols       = src.shape()[0];
+   const int batch_size = src.shape()[1];
+   for(int i = 0; i < batch_size; ++i)
+   {
+         float sum    = static_cast<T>(0.f);
+         float  sum_sq = static_cast<T>(0.f);
+         for(int j = 0; j < cols; ++j)
+         {
+             const T value = src[j + i * cols];
+             sum += value;
+             sum_sq += value * value;
+         }
+         const float  mean       = sum / cols;
+         const float var        =  (((sum_sq / cols) - (mean * mean)) + epsilon);
+         const float stddev_inv =     1.f / std::sqrt(var);
+         for(int j = 0; j < cols; ++j)
+         {
+             const float res = (src[j + i * cols] - mean) * stddev_inv;
+             dst[j + i * cols] = static_cast<T>(res);
+         }
     }
     return dst;
 }
diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp
index 7aa3011d8f..c06bc752cb 100644
--- a/tests/validation/reference/Permute.cpp
+++ b/tests/validation/reference/Permute.cpp
@@ -67,6 +67,9 @@ template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, Permu
 template SimpleTensor<float>    permute(const SimpleTensor<float> &src, PermutationVector perm);
 template SimpleTensor<half>     permute(const SimpleTensor<half> &src, PermutationVector perm);
 template SimpleTensor<bfloat16> permute(const SimpleTensor<bfloat16> &src, PermutationVector perm);
+#ifdef ARM_COMPUTE_ENABLE_FP16
+template SimpleTensor<float16_t>    permute(const SimpleTensor<float16_t> &src, PermutationVector perm);
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/runtime/experimental/low_level/CpuGemmAssemblyDispatch.cpp b/tests/validation/runtime/experimental/low_level/CpuGemmAssemblyDispatch.cpp
new file mode 100644
index 0000000000..613ec24bff
--- /dev/null
+++ b/tests/validation/runtime/experimental/low_level/CpuGemmAssemblyDispatch.cpp
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/low_level/CpuGemmAssemblyDispatch.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/datasets/LargeGEMMDataset.h"
+#include "tests/datasets/SmallGEMMDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/validation/fixtures/CpuGemmAssemblyDispatchFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f(
+    0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */
+#ifdef ARM_COMPUTE_ENABLE_FP16
+RelativeTolerance<half_float::half> rel_tolerance_f16(half(
+    0.2)); /**< Relative tolerance value for comparing reference's output against implementation's output for FP16 data types */
+const AbsoluteTolerance<float>      abs_tolerance_f16(
+    0.2f); /**< Absolute tolerance value for comparing reference's output against implementation's output for FP16 data types */
+constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */
+#endif                                 /* ARM_COMPUTE_ENABLE_FP16 */
+/** CNN data types */
+const auto CNNDataTypes = make("DataType",
+                               {
+#ifdef ARM_COMPUTE_ENABLE_FP16
+                                   DataType::F16,
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+                                   DataType::F32,
+                               });
+
+const auto data_interleave = make("M", 8, 12) * make("N", 8, 12);
+const auto data_transpose  = make("M", 8, 14) * make("N", 7, 14);
+
+/** Zero padding test */
+template <typename FunctionType>
+bool validate_zero_padding(unsigned int dim0_value, unsigned int dim1_value)
+{
+    const TensorShape in_shape(dim0_value, dim1_value);
+    TensorInfo        in(in_shape, 1, DataType::U32);
+    TensorInfo        dst;
+
+    ARM_COMPUTE_EXPECT(in.is_resizable(), framework::LogLevel::ERRORS);
+
+    // Validate zero-padding
+    FunctionType func;
+
+    func.configure(&in, &dst);
+
+    return in.padding().empty();
+}
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(LOW_LEVEL)
+TEST_SUITE(CpuGemmAssemblyDispatch)
+
+/** Test case for memory injection in @ref experimental::op::ll::CpuGemmAssemblyDispatch.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
+{
+    auto       gemm      = std::make_unique<experimental::op::ll::CpuGemmAssemblyDispatch>();
+    const auto lhs_info  = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
+    const auto rhs_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    const auto c_info    = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    auto       dst_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    const auto gemm_info = GEMMInfo{};
+    gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, gemm_info);
+
+    // telhs are newly created every call of this lambda function
+    auto lhs = create_tensor<Tensor>(lhs_info);
+    auto rhs = create_tensor<Tensor>(rhs_info);
+    auto c   = create_tensor<Tensor>(c_info);
+    lhs.allocator()->allocate();
+    rhs.allocator()->allocate();
+    c.allocator()->allocate();
+
+    ITensorPack run_pack{{TensorType::ACL_SRC_0, &lhs}, {TensorType::ACL_SRC_1, &rhs}, {TensorType::ACL_SRC_2, &c}};
+    ITensorPack prep_pack{{TensorType::ACL_SRC_1, &rhs}, {TensorType::ACL_SRC_2, &c}};
+
+    auto mg = MemoryGroup{};
+    auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
+
+    auto run_conv = [&]() -> Tensor
+    {
+        auto dst = create_tensor<Tensor>(dst_info);
+        dst.allocator()->allocate();
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+        library->fill_tensor_value(Accessor(lhs), 1.f);
+        library->fill_tensor_value(Accessor(rhs), 2.f);
+        library->fill_tensor_value(Accessor(c), 3.f);
+        // This operator is configured once and captured by this lambda.
+        gemm->prepare(prep_pack);
+        gemm->run(run_pack);
+        return dst;
+    };
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] == reinterpret_cast<float *>(result_1.buffer())[i],
+                           framework::LogLevel::ERRORS);
+    }
+}
+
+/** Test case for memory injection in @ref experimental::op::ll::CpuGemmAssemblyDispatch.
+ *
+ * Make sure @ref experimental::op::ll::CpuGemmAssemblyDispatch still works through injecting the memory at configure time using the old API.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
+{
+    auto       gemm      = std::make_unique<experimental::op::ll::CpuGemmAssemblyDispatch>();
+    const auto lhs_info  = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
+    const auto rhs_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    const auto c_info    = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    auto       dst_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
+    const auto gemm_info = GEMMInfo{};
+    auto       run_conv  = [&]()
+    {
+        Tensor lhs = create_tensor<Tensor>(lhs_info);
+        Tensor rhs = create_tensor<Tensor>(rhs_info);
+        Tensor c   = create_tensor<Tensor>(c_info);
+        Tensor dst = create_tensor<Tensor>(dst_info);
+        gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, gemm_info);
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        c.allocator()->allocate();
+        dst.allocator()->allocate();
+        library->fill_tensor_value(Accessor(lhs), 1.f);
+        library->fill_tensor_value(Accessor(rhs), 2.f);
+        library->fill_tensor_value(Accessor(c), 3.f);
+
+        ITensorPack run_pack{{TensorType::ACL_SRC_0, &lhs},
+                             {TensorType::ACL_SRC_1, &rhs},
+                             {TensorType::ACL_SRC_2, &c},
+                             {TensorType::ACL_DST_0, &dst}};
+        ITensorPack prep_pack{{TensorType::ACL_SRC_1, &rhs}, {TensorType::ACL_SRC_2, &c}};
+        auto        mg = MemoryGroup{};
+        auto        ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
+
+        gemm->prepare(prep_pack);
+        gemm->run(run_pack);
+        lhs.allocator()->free();
+        rhs.allocator()->free();
+        c.allocator()->free();
+
+        return dst;
+    };
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT((reinterpret_cast<float *>(result_0.buffer()))[i] == (reinterpret_cast<float *>(result_1.buffer()))[i],
+                           framework::LogLevel::ERRORS);
+    };
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+               make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type
+                                                       TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),
+                                                     }),
+               make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32),
+                                                        TensorInfo(TensorShape(8U, 27U), 1, DataType::F32),
+                                                     }),
+               make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32),
+                                                        TensorInfo(TensorShape(8U, 13U), 1, DataType::F32),
+                                                     }),
+               make("Expected", { false, true })),
+               lhs_info, rhs_info, output_info, expected)
+{
+    const auto gemm_info = GEMMInfo();
+    bool is_valid = bool(experimental::op::ll::CpuGemmAssemblyDispatch::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), gemm_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+
+template <typename T>
+using CpuGemmAssemblyDispatchFixture = CpuGemmAssemblyDispatchValidationFixture<Tensor, Accessor, experimental::op::ll::CpuGemmAssemblyDispatch, T, false /* accumulate */>;
+template <typename T>
+using CpuGemmAccumulateFixture = CpuGemmAssemblyDispatchValidationFixture<Tensor, Accessor, experimental::op::ll::CpuGemmAssemblyDispatch, T, true /* accumulate */>;
+
+TEST_SUITE(Float)
+
+DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine(
+                                                                     zip(make("In0",{ TensorShape(21U, 13U) }),
+                                                                     make("In1", { TensorShape(33U, 21U) }),
+                                                                     make("Dst", { TensorShape(33U, 13U) })),
+                                                                     zip(
+                                                                     make("is_c_null", { false, false, false, true }),
+                                                                     make("Expected", { true, true, true, true }))),
+               shape_a, shape_b, shape_dst, is_c_null, expected)
+{
+    ARM_COMPUTE_UNUSED(is_c_null);
+    /* Accumulation test for GEMM kernels */
+    // Create tensors
+    TensorInfo in_a(shape_a, 1, DataType::F32);
+    TensorInfo in_b(shape_b, 1, DataType::F32);
+    TensorInfo in_c(shape_dst, 1, DataType::F32);
+    TensorInfo dst(shape_dst, 1, DataType::F32);
+
+    GEMMInfo gemm_info = GEMMInfo();
+    gemm_info.set_accumulate(true);
+
+    // Validate accumulation
+    Status status = experimental::op::ll::CpuGemmAssemblyDispatch::validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, gemm_info);
+    ARM_COMPUTE_EXPECT((expected ==  bool(status)), framework::LogLevel::ERRORS);
+}
+
+#ifdef ARM_COMPUTE_ENABLE_FP16
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmAssemblyDispatchFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(),
+                                                                                                         make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CpuGemmAssemblyDispatchFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMDataset(),
+                                                                                                       make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
+}
+
+
+TEST_SUITE_END() // FP16
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmAssemblyDispatchFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(),
+                                                                                                            make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CpuGemmAssemblyDispatchFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMDataset(),
+                                                                                                        make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f);
+}
+
+
+TEST_SUITE(ACCUMULATE)
+FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmAccumulateFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallAccumulateGEMMDataset(),
+                                                                                                        make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CpuGemmAccumulateFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeAccumulateGEMMDataset(),
+                                                                                                        make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f);
+}
+TEST_SUITE_END() // ACCUMULATE
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // CpuGemmAseemblyDispatch
+TEST_SUITE_END() // LOW_LEVEL
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuActivation.cpp b/tests/validation/runtime/experimental/operators/CpuActivation.cpp
new file mode 100644
index 0000000000..8b52cc7ffc
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuActivation.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuActivation.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/CpuActivationFixture.h"
+
+/*
+ * Tests for arm_compute::experimental::op::CpuActivation which is a shallow wrapper for
+ * arm_compute::cpu::CpuActivation. Any future testing to the functionalities of cpu::CpuActivation
+ * will be tested in tests/NEON/ActivationLayer.cpp given that op::CpuActivation remain a
+ * shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Define relative tolerance of the activation layer.
+ *
+ * @param[in] activation The activation function used.
+ *
+ * @return Relative tolerance depending on the activation function.
+ */
+RelativeTolerance<float> relative_tolerance(ActivationLayerInfo::ActivationFunction activation)
+{
+    switch(activation)
+    {
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+        case ActivationLayerInfo::ActivationFunction::ELU:
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+        case ActivationLayerInfo::ActivationFunction::TANH:
+        case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+        case ActivationLayerInfo::ActivationFunction::SWISH:
+        case ActivationLayerInfo::ActivationFunction::GELU:
+            return RelativeTolerance<float>(0.05f);
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+            return RelativeTolerance<float>(0.00001f);
+        default:
+            return RelativeTolerance<float>(0.f);
+    }
+}
+
+/** Define absolute tolerance of the activation layer.
+ *
+ * @param[in] activation The activation function used.
+ *
+ * @return Absolute tolerance depending on the activation function.
+ */
+AbsoluteTolerance<float> absolute_tolerance(ActivationLayerInfo::ActivationFunction activation)
+{
+    switch(activation)
+    {
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+        case ActivationLayerInfo::ActivationFunction::TANH:
+        case ActivationLayerInfo::ActivationFunction::SWISH:
+        case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+            return AbsoluteTolerance<float>(0.00001f);
+        default:
+            return AbsoluteTolerance<float>(0.f);
+    }
+}
+
+const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(),
+                                                   framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::SWISH }));
+
+/** Input data sets. */
+const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+TEST_SUITE(CpuActivation)
+
+template <typename T>
+using CpuActivationFixture = CpuActivationValidationFixture<Tensor, Accessor, experimental::op::CpuActivation, T>;
+
+TEST_SUITE(SmokeTest)
+FIXTURE_DATA_TEST_CASE(SmokeTest, CpuActivationFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
+                                                                                                       DataType::F32)))
+
+{
+    // Validate output
+    validate(Accessor(_target), _reference, relative_tolerance(_function), 0.f, absolute_tolerance(_function));
+}
+TEST_SUITE_END() // SmokeTest
+
+TEST_SUITE_END() // CpuActivation
+TEST_SUITE_END() // OPERATORS
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuAdd.cpp b/tests/validation/runtime/experimental/operators/CpuAdd.cpp
new file mode 100644
index 0000000000..97eaa9ce9e
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuAdd.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuAdd.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuAddKernel.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/validation/fixtures/CpuArithmeticOperationsFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", {false});
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(CpuAdd)
+
+using CpuAddFixture = CpuArithmeticAdditionValidationFixture<Tensor, Accessor, experimental::op::CpuAdd>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(
+    SmokeTest,
+    CpuAddFixture,
+    framework::DatasetMode::PRECOMMIT,
+    combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)),
+                    framework::dataset::make("ConvertPolicy", {ConvertPolicy::SATURATE, ConvertPolicy::WRAP})),
+            OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // U8
+
+TEST_SUITE_END() // CpuAdd
+TEST_SUITE_END() // Neon
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuDepthwiseConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuDepthwiseConv2d.cpp
new file mode 100644
index 0000000000..29ee2edf83
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuDepthwiseConv2d.cpp
@@ -0,0 +1,1231 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef __aarch64__
+
+#include "arm_compute/runtime/experimental/operators/CpuDepthwiseConv2d.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/validation/fixtures/CpuDepthwiseConv2dFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+namespace
+{
+constexpr RelativeTolerance<float> tolerance_f32(
+    0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(
+    1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(
+    1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8_SIGNED */
+
+const auto depth_multipliers       = make("DepthMultiplier", {1, 2, 8});
+const auto large_depth_multipliers = make("DepthMultiplier", {5, 32});
+
+// Activation Functions
+const auto NoActivation = make("ActivationInfo", ActivationLayerInfo());
+
+const auto ActivationFunctionsDataset =
+    make("ActivationInfo", {ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)});
+
+const auto ActivationFunctionsDatasetNightly =
+    make("ActivationInfo",
+         {
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+#ifdef __aarch64__
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU),
+#endif // __aarch64__
+         });
+
+const auto ActivationFunctionsQuantizedSmallDataset =
+    make("ActivationInfo", {ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)});
+
+const auto ActivationFunctionsQuantizedDataset =
+    make("ActivationInfo",
+         {
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f),
+         });
+
+// This is only used when there is fused activation
+const auto input_qinfo_dataset = make("InputQInfo",
+                                      {
+                                          QuantizationInfo(0.3f, 10),
+                                          QuantizationInfo(2.2f, 10),
+                                      });
+
+const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo());
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+TEST_SUITE(CpuDepthwiseConv2d)
+
+TEST_CASE(OpCpuDepthwiseConv2dMemoryInjection, framework::DatasetMode::ALL)
+{
+    auto conv = std::make_unique<arm_compute::experimental::op::CpuDepthwiseConv2d>();
+
+    auto src_shape     = TensorShape(7U, 7U);
+    auto weights_shape = TensorShape(1U, 1U);
+    auto bias_shape    = TensorShape(1U);
+    auto output_shape  = TensorShape(7U, 7U);
+
+    auto       src_info     = TensorInfo(src_shape, 1, DataType::F32, DataLayout::NHWC);
+    const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, DataLayout::NHWC);
+    const auto biases_info  = TensorInfo(bias_shape, 1, DataType::F32, DataLayout::NHWC);
+    auto       dst_info     = TensorInfo(output_shape, 1, DataType::F32, DataLayout::NHWC);
+
+    conv->configure(&src_info, &weights_info, &biases_info, &dst_info, PadStrideInfo(1, 1, 0, 0));
+    auto const status = conv->validate(&src_info, &weights_info, &biases_info, &dst_info, PadStrideInfo(1, 1, 0, 0));
+    ARM_COMPUTE_ASSERT(status);
+
+    // tensors are newly created every call of this lambda function
+    auto src     = create_tensor<Tensor>(src_info);
+    auto weights = create_tensor<Tensor>(weights_info);
+    auto biases  = create_tensor<Tensor>(biases_info);
+
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    biases.allocator()->allocate();
+
+    ITensorPack run_pack{
+        {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}};
+
+    auto mg = MemoryGroup{};
+    auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, run_pack);
+
+    auto run_conv = [&]() -> Tensor
+    {
+        auto dst = create_tensor<Tensor>(dst_info);
+        dst.allocator()->allocate();
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+        library->fill_tensor_value(Accessor(src), 1.f);
+        library->fill_tensor_value(Accessor(weights), 2.f);
+        library->fill_tensor_value(Accessor(biases), 3.f);
+        // This operator is configured once and captured by this lambda.
+        conv->prepare(run_pack);
+        conv->run(run_pack);
+        return dst;
+    };
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT((reinterpret_cast<float *>(result_0.buffer()))[i] ==
+                               (reinterpret_cast<float *>(result_1.buffer()))[i],
+                           framework::LogLevel::ERRORS);
+    }
+}
+
+DATA_TEST_CASE(
+    Validate3x3,
+    framework::DatasetMode::ALL,
+    zip(make("InputInfo",
+             {
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Mismatching data type input/weights
+                 TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32),     // Mismatching input feature maps
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Unsupported weights dimensions
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Mismatching depth multiplier
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::QASYMM8), // Invalid stride
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid biases size
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid biases dimensions
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid output size
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // patch size bigger than input width
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // dilation < 1
+             }),
+        make("WeightsInfo",
+             {
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(5U, 5U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::QASYMM8),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+             }),
+        make("BiasesInfo",
+             {
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::S32),
+                 TensorInfo(TensorShape(4U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+             }),
+        make("OutputInfo",
+             {
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::QASYMM8),
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+             }),
+        make("ConvInfo",
+             {
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(4, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+             }),
+        make("DepthMultiplier",
+             {
+                 1,
+                 1,
+                 1,
+                 3,
+                 1,
+                 1,
+                 1,
+                 1,
+                 1,
+                 1,
+             }),
+        make("Dilation",
+             {
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(25U, 1U),
+                 Size2D(0U, 1U),
+             }),
+        make("Expected", {false, false, false, false, false, false, false, false, false, false})),
+    input_info,
+    weights_info,
+    biases_info,
+    output_info,
+    conv_info,
+    depth_multiplier,
+    dilation,
+    expected)
+{
+    bool is_valid = bool(experimental::op::CpuDepthwiseConv2d::validate(
+        &input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false),
+        &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info,
+        depth_multiplier, ActivationLayerInfo(), dilation));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+
+DATA_TEST_CASE(
+    ValidateGeneric,
+    framework::DatasetMode::ALL,
+    zip(make("InputInfo",
+             {
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
+                 TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
+                 TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // Patch size bigger than input width
+                 TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // Dilation < 1
+             }),
+        make("WeightsInfo",
+             {
+                 TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+                 TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                 TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+             }),
+        make("BiasesInfo",
+             {
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(4U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(16U), 1, DataType::F32),
+                 TensorInfo(TensorShape(16U), 1, DataType::F32),
+             }),
+        make("OutputInfo",
+             {
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                 TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+             }),
+        make("ConvInfo",
+             {
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+                 PadStrideInfo(1, 1, 0, 0),
+             }),
+        make("DepthMultiplier",
+             {
+                 1,
+                 1,
+                 3,
+                 1,
+                 1,
+                 1,
+                 2,
+                 2,
+             }),
+        make("Dilation",
+             {
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(1U, 1U),
+                 Size2D(25U, 1U),
+                 Size2D(0U, 1U),
+             }),
+        make("Expected", {false, false, false, false, false, false, false, false})),
+    input_info,
+    weights_info,
+    biases_info,
+    output_info,
+    conv_info,
+    depth_multiplier,
+    dilation,
+    expected)
+{
+    bool is_valid = bool(experimental::op::CpuDepthwiseConv2d::validate(
+        &input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false),
+        &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info,
+        depth_multiplier, ActivationLayerInfo(), dilation));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+
+template <typename T>
+using CpuDepthwiseConv2dFixture =
+    CpuDepthwiseConv2dValidationFixture<Tensor, Accessor, experimental::op::CpuDepthwiseConv2d, T>;
+template <typename T>
+using CpuDepthwiseConv2dMixedDataLayoutFixture =
+    CpuDepthwiseConv2dValidationFixture<Tensor, Accessor, experimental::op::CpuDepthwiseConv2d, T, true>;
+template <typename T>
+using CpuDepthwiseConv2dVariableWeightsFixture =
+    CpuDepthwiseConv2dValidationFixture<Tensor, Accessor, experimental::op::CpuDepthwiseConv2d, T, false, false, true>;
+
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(make("In", TensorShape(33U, 27U, 11U, 3U)),
+                                   make("Weights", Size2D(3U, 4U)),
+                                   make("Info", PadStrideInfo(1, 2, 0, 1)),
+                                   make("Dilation", Size2D(2U, 2U)),
+                                   make("DepthMultiplier", {5}),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDatasetNightly))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout,
+                           CpuDepthwiseConv2dMixedDataLayoutFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   make("DepthMultiplier", {2}),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", ActivationLayerInfo())))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeCpuDepthwiseConv2dDataset(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3,
+                           CpuDepthwiseConv2dVariableWeightsFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3,
+                           CpuDepthwiseConv2dMixedDataLayoutFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", ActivationLayerInfo())))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5,
+                           CpuDepthwiseConv2dVariableWeightsFixture<float>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3,
+                           CpuDepthwiseConv2dFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3,
+                           CpuDepthwiseConv2dVariableWeightsFixture<float>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::F32),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Optimized
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+
+template <typename T>
+using CpuDepthwiseConv2dQuantizedFixture =
+    CpuDepthwiseConv2dValidationQuantizedFixture<Tensor, Accessor, experimental::op::CpuDepthwiseConv2d, T>;
+template <typename T>
+using CpuDepthwiseConv2dQuantizedMixedDataLayoutFixture =
+    CpuDepthwiseConv2dValidationQuantizedFixture<Tensor, Accessor, experimental::op::CpuDepthwiseConv2d, T, true>;
+using CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture =
+    CpuDepthwiseConv2dValidationQuantizedPerChannelFixture<Tensor,
+                                                           Accessor,
+                                                           experimental::op::CpuDepthwiseConv2d,
+                                                           uint8_t,
+                                                           int8_t>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(make("In", TensorShape(33U, 27U, 11U, 3U)),
+                                   make("Weights", Size2D(3U, 4U)),
+                                   make("Info", PadStrideInfo(1, 2, 0, 1)),
+                                   make("Dilation", Size2D(2U, 2U)),
+                                   make("DepthMultiplier", {5}),
+                                   make("DataType", DataType::QASYMM8),
+                                   make("SrcQuantizationInfo", {QuantizationInfo(0.3f, 10)}),
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.05f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout,
+                           CpuDepthwiseConv2dQuantizedMixedDataLayoutFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   make("DepthMultiplier", {2}),
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.8f, 1)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.7f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3WithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3,
+                           CpuDepthwiseConv2dQuantizedMixedDataLayoutFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5WithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3,
+                           CpuDepthwiseConv2dQuantizedFixture<uint8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Optimized
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(make("In", TensorShape(33U, 27U, 11U, 3U)),
+                                   make("Weights", Size2D(3U, 4U)),
+                                   make("Info", PadStrideInfo(1, 2, 0, 1)),
+                                   make("Dilation", Size2D(2U, 2U)),
+                                   make("DepthMultiplier", {5}),
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   make("SrcQuantizationInfo", {QuantizationInfo(0.3f, 10)}),
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.05f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.8f, 1)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.7f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                   large_depth_multipliers,
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3WithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5WithActivation,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 10)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3,
+                           CpuDepthwiseConv2dQuantizedFixture<int8_t>,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("DataType", DataType::QASYMM8_SIGNED),
+                                   IgnoredQuantizationInfo,
+                                   IgnoredQuantizationInfo,
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   NoActivation))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
+}
+TEST_SUITE_END() // Optimized
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QSYMM8_PER_CHANNEL)
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations,
+                           CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(make("In", TensorShape(33U, 27U, 11U, 3U)),
+                                   make("Weights", Size2D(3U, 4U)),
+                                   make("Info", PadStrideInfo(1, 2, 0, 1)),
+                                   make("Dilation", Size2D(2U, 2U)),
+                                   make("DepthMultiplier", {5}),
+                                   make("InputDataType", DataType::QASYMM8),
+                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+                                   make("SrcQuantizationInfo", {QuantizationInfo(0.3f, 10)}),
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.05f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsQuantizedDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("InputDataType", DataType::QASYMM8),
+                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall,
+                           CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("InputDataType", DataType::QASYMM8),
+                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge,
+                           CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                   depth_multipliers,
+                                   make("InputDataType", DataType::QASYMM8),
+                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3,
+                           CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture,
+                           framework::DatasetMode::PRECOMMIT,
+                           combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("InputDataType", DataType::QASYMM8),
+                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3,
+                           CpuDepthwiseConv2dQuantizedSymmetricPerChannelFixture,
+                           framework::DatasetMode::NIGHTLY,
+                           combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                   make("DepthMultiplier", 1),
+                                   make("InputDataType", DataType::QASYMM8),
+                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+                                   input_qinfo_dataset,
+                                   make("DstQuantizationInfo", {QuantizationInfo(0.5f, 4)}),
+                                   make("DataLayout", {DataLayout::NHWC}),
+                                   make("ActivationInfo", {ActivationLayerInfo()})))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Optimized
+TEST_SUITE_END() // QSYMM8_PER_CHANNEL
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END() // CpuDepthwiseConv2d
+TEST_SUITE_END() // Operators
+TEST_SUITE_END() // Neon
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // __aarch64__
diff --git a/tests/validation/runtime/experimental/operators/CpuElementwise.cpp b/tests/validation/runtime/experimental/operators/CpuElementwise.cpp
new file mode 100644
index 0000000000..b2007ea22a
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuElementwise.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuElementwise.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/validation/fixtures/CpuElementwiseFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_div_fp32(0.000001f);
+
+const auto ElementwiseFP32Dataset = combine(
+    combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
+    framework::dataset::make("DataType", DataType::F32));
+
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", {false, true});
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", {false});
+} // namespace
+
+TEST_SUITE(NEON)
+
+TEST_SUITE(CpuElementwiseDivision)
+template <typename T>
+using CpuElementwiseDivisionFixture =
+    CpuElementwiseDivisionValidationFixture<Tensor, Accessor, experimental::op::CpuElementwiseDivision, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(SmokeTest,
+                       CpuElementwiseDivisionFixture<float>,
+                       framework::DatasetMode::ALL,
+                       combine(combine(datasets::SmallShapes(), ElementwiseFP32Dataset), InPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_div_fp32, 0.01);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // CpuElementwiseMin
+
+TEST_SUITE(CpuElementwiseMax)
+template <typename T>
+using CpuElementwiseMaxFixture =
+    CpuElementwiseMaxValidationFixture<Tensor, Accessor, experimental::op::CpuElementwiseMax, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(SmokeTest,
+                       CpuElementwiseMaxFixture<float>,
+                       framework::DatasetMode::ALL,
+                       combine(combine(datasets::SmallShapes(), ElementwiseFP32Dataset), InPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // CpuElementwiseMin
+
+TEST_SUITE(CpuElementwiseMin)
+
+template <typename T>
+using CpuElementwiseMinFixture =
+    CpuElementwiseMinValidationFixture<Tensor, Accessor, experimental::op::CpuElementwiseMin, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(SmokeTest,
+                       CpuElementwiseMinFixture<float>,
+                       framework::DatasetMode::ALL,
+                       combine(combine(datasets::SmallShapes(), ElementwiseFP32Dataset), InPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // CpuElementwiseMin
+
+TEST_SUITE_END() // Neon
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuGemm.cpp b/tests/validation/runtime/experimental/operators/CpuGemm.cpp
index c6df429a4d..9d85f90712 100644
--- a/tests/validation/runtime/experimental/operators/CpuGemm.cpp
+++ b/tests/validation/runtime/experimental/operators/CpuGemm.cpp
@@ -29,9 +29,9 @@
 #include "tests/validation/fixtures/GEMMFixture.h"
 
 /*
- * Tests for arm_compute::experimental::ops::CpuGemm which is a shallow wrapper for
+ * Tests for arm_compute::experimental::op::CpuGemm which is a shallow wrapper for
  * arm_compute::cpu::CpuGemm. Any future testing to the functionalities of cpu::CpuGemm will
- * be tested in tests/NEON/GEMM.cpp given that ops::CpuGemm remain a shallow wrapper.
+ * be tested in tests/NEON/GEMM.cpp given that op::CpuGemm remain a shallow wrapper.
 */
 
 namespace arm_compute
@@ -55,16 +55,16 @@ TEST_SUITE(NEON)
 TEST_SUITE(OPERATORS)
 
 TEST_SUITE(CPUGEMM)
-/** Test case for memory injection in @ref arm_compute::experimental::ops::CpuGemm.
+/** Test case for memory injection in @ref arm_compute::experimental::op::CpuGemm.
  *
  * Configure the operator once and inject memory at run-time in multiple executions.
  *
  * Checks performed in order:
  * - Both runs compute the same output
  */
-TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL)
+TEST_CASE(OpCpuGemmMemoryInjection, framework::DatasetMode::ALL)
 {
-    auto       gemm      = std::make_unique<arm_compute::experimental::ops::CpuGemm>();
+    auto       gemm      = std::make_unique<arm_compute::experimental::op::CpuGemm>();
     const auto lhs_info  = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
     const auto rhs_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
     const auto c_info    = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
@@ -108,7 +108,7 @@ TEST_CASE(OpsCpuGemmMemoryInjection, framework::DatasetMode::ALL)
     }
 }
 
-DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine(
+DATA_TEST_CASE(OpCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combine(
                                                                      zip(make("In0",{ TensorShape(21U, 13U) }),
                                                                      make("In1", { TensorShape(33U, 21U) }),
                                                                      make("Dst", { TensorShape(33U, 13U) })),
@@ -130,7 +130,7 @@ DATA_TEST_CASE(OpsCpuGemmValidateAccumulate, framework::DatasetMode::ALL, combin
     gemm_info.set_accumulate(true);
 
     // Validate accumulation
-    arm_compute::experimental::ops::CpuGemm gemm;
+    arm_compute::experimental::op::CpuGemm gemm;
     Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info);
     ARM_COMPUTE_EXPECT((expected ==  bool(status)), framework::LogLevel::ERRORS);
 }
diff --git a/tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp
new file mode 100644
index 0000000000..9d87a3d2e5
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuGemmConv2d.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuGemmConv2d.h"
+
+#include "arm_compute/core/CoreTypes.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/datasets/TinyConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/Globals.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/Utils.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+#include "tests/validation/fixtures/CpuGemmConv2dFixture.h"
+#include "tests/validation/Validation.h"
+/*
+ * Tests for arm_compute::experimental::op::CpuGemmGemmConv2d which is a shallow wrapper for
+ * arm_compute::cpu::CpuGemmConv2d. Any future testing to the functionalities of cpu::CpuGemmConv2d will
+ * be tested in tests/validation/NEON/ConvolutionLayer.cpp given that op::CpuGemmConv2d remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const RelativeTolerance<float> rel_tolerance_f32(0.01f);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+TEST_SUITE(CpuGemmConv2d)
+/** Test case for memory injection in @ref arm_compute::experimental::op::CpuGemmConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(OpCpuGemmConv2dMemoryInjection, framework::DatasetMode::ALL)
+{
+    auto conv = std::make_unique<arm_compute::experimental::op::CpuGemmConv2d>();
+
+    const auto src_info     = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
+    const auto weights_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
+    const auto biases_info  = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
+    auto       dst_info     = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
+    const auto pad_info     = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
+
+    conv->configure(&src_info, &weights_info, &biases_info, &dst_info, pad_info);
+    auto const status = conv->validate(&src_info, &weights_info, &biases_info, &dst_info, pad_info);
+    ARM_COMPUTE_ASSERT(status);
+
+    auto src     = create_tensor<Tensor>(src_info);
+    auto weights = create_tensor<Tensor>(weights_info);
+    auto biases  = create_tensor<Tensor>(biases_info);
+
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    biases.allocator()->allocate();
+
+    ITensorPack run_pack{
+        {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}};
+    ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}};
+
+    auto mg = MemoryGroup{};
+    auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
+
+    auto run_conv = [&]() -> Tensor
+    {
+        auto dst = create_tensor<Tensor>(dst_info);
+        dst.allocator()->allocate();
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+        library->fill_tensor_value(Accessor(src), 1.f);
+        library->fill_tensor_value(Accessor(weights), 2.f);
+        library->fill_tensor_value(Accessor(biases), 3.f);
+        // This operator is configured once and captured by this lambda.
+        conv->prepare(prep_pack);
+        conv->run(run_pack);
+        return dst;
+    };
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT((reinterpret_cast<float *>(result_0.buffer()))[i] ==
+                               (reinterpret_cast<float *>(result_1.buffer()))[i],
+                           framework::LogLevel::ERRORS);
+    }
+}
+
+using CpuGemmConv2dFixture = CpuGemmConv2dValidationFixture<Tensor, Accessor, experimental::op::CpuGemmConv2d>;
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(SmokeTest,
+                       CpuGemmConv2dFixture,
+                       framework::DatasetMode::PRECOMMIT,
+                       datasets::TinyConvolutionLayerDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference, rel_tolerance_f32);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE_END() // CpuGemmConv2d
+TEST_SUITE_END() // OPERATORS
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp
new file mode 100644
index 0000000000..783fcfc75b
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuGemmDirectConv2d.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuGemmDirectConv2d.h"
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/experimental/Types.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/datasets/TinyConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/Globals.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/Utils.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+#include "tests/validation/fixtures/CpuGemmDirectConv2dFixture.h"
+#include "tests/validation/Validation.h"
+/*
+ * Tests for arm_compute::experimental::op::CpuGemmDirectConv2d which is a shallow wrapper for
+ * arm_compute::cpu::CpuGemmDirectConv2d. Any future testing to the functionalities of cpu::CpuGemmDirectConv2d will
+ * be tested in tests/validation/NEON/ConvolutionLayer.cpp given that op::CpuGemmDirectConv2d remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+
+namespace
+{
+const RelativeTolerance<float> rel_tolerance_f32(0.01f);
+} // namespace
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+
+TEST_SUITE(CpuGemmDirectConv2d)
+/** Test case for memory injection in @ref arm_compute::experimental::op::CpuGemmDirectConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(OpCpuGemmDirectConv2dMemoryInjection, framework::DatasetMode::ALL)
+{
+    auto conv = std::make_unique<arm_compute::experimental::op::CpuGemmDirectConv2d>();
+
+    auto src_shape     = TensorShape(23U, 27U, 5U);
+    auto weights_shape = TensorShape(23U, 3U, 5U, 21U);
+    auto bias_shape    = TensorShape(21U);
+    auto output_shape  = TensorShape(11U, 25U, 21U);
+
+    const auto src_info     = TensorInfo(src_shape, 1, DataType::F32, DataLayout::NHWC);
+    const auto weights_info = TensorInfo(weights_shape, 1, DataType::F32, DataLayout::NHWC);
+    const auto biases_info  = TensorInfo(bias_shape, 1, DataType::F32, DataLayout::NHWC);
+    auto       dst_info     = TensorInfo(output_shape, 1, DataType::F32, DataLayout::NHWC);
+    const auto conv_info    = Conv2dInfo{PadStrideInfo(2, 1, 0, 0), Size2D(1, 1), ActivationLayerInfo(), false, 1};
+
+    conv->configure(&src_info, &weights_info, &biases_info, &dst_info, conv_info);
+    auto const status = conv->validate(&src_info, &weights_info, &biases_info, &dst_info, conv_info);
+    ARM_COMPUTE_ASSERT(status);
+
+    // tensors are newly created every call of this lambda function
+    auto src     = create_tensor<Tensor>(src_info);
+    auto weights = create_tensor<Tensor>(weights_info);
+    auto biases  = create_tensor<Tensor>(biases_info);
+
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    biases.allocator()->allocate();
+
+    ITensorPack run_pack{
+        {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}};
+    ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weights}, {TensorType::ACL_SRC_2, &biases}};
+
+    auto mg = MemoryGroup{};
+    auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
+
+    auto run_conv = [&]() -> Tensor
+    {
+        auto dst = create_tensor<Tensor>(dst_info);
+        dst.allocator()->allocate();
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+        library->fill_tensor_value(Accessor(src), 1.f);
+        library->fill_tensor_value(Accessor(weights), 2.f);
+        library->fill_tensor_value(Accessor(biases), 3.f);
+        // This operator is configured once and captured by this lambda.
+        conv->prepare(prep_pack);
+        conv->run(run_pack);
+        return dst;
+    };
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT((reinterpret_cast<float *>(result_0.buffer()))[i] ==
+                               (reinterpret_cast<float *>(result_1.buffer()))[i],
+                           framework::LogLevel::ERRORS);
+    }
+}
+
+using CpuGemmDirectConv2dFixture =
+    CpuGemmDirectConv2dValidationFixture<Tensor, Accessor, experimental::op::CpuGemmDirectConv2d>;
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(SmokeTest,
+                       CpuGemmDirectConv2dFixture,
+                       framework::DatasetMode::PRECOMMIT,
+                       datasets::TinyConvolutionLayerDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference, rel_tolerance_f32);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE_END() // CPUGEMMDIRECTCONV2D
+TEST_SUITE_END() // OPERATORS
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuMul.cpp b/tests/validation/runtime/experimental/operators/CpuMul.cpp
new file mode 100644
index 0000000000..8cad6210a1
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuMul.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuMul.h"
+
+#include "arm_compute/core/Rounding.h"
+
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/validation/fixtures/CpuMulFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const float scale_255   = 1.f / 255.f;
+const float scale_other = 1.f / 32768.f;
+
+/** Tests for in-place computation
+ * With current interface storing TensorInfo with quantization information
+ * in the kernel, it is difficult to have different tensor metadata
+ * (e.g., quantization information, data type, different shape for broadcasting)
+ * when an input is used as the output of the computation.
+ * So, the following dataset for in-place computation is used only when
+ * the exact same input and output Tensor object makes sense
+ * (i.e., all the tensor metadata is the same) whereas if output is
+ * expected to have either different quantization information, data type
+ * or different shape we are not testing in-place computation.
+ */
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", {false});
+} // namespace
+
+using CpuMulU8U8toS16Fixture =
+    CpuMulValidationFixture<Tensor, Accessor, experimental::op::CpuMul, uint8_t, uint8_t, int16_t>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(CpuMul)
+
+TEST_SUITE(U8U8toS16)
+FIXTURE_DATA_TEST_CASE(
+    SmokeTest0,
+    CpuMulU8U8toS16Fixture,
+    framework::DatasetMode::PRECOMMIT,
+    combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                                                            framework::dataset::make("DataTypeIn1", DataType::U8)),
+                                                    framework::dataset::make("DataTypeIn2", DataType::U8)),
+                                            framework::dataset::make("DataTypeOut", DataType::S16)),
+                                    framework::dataset::make("Scale", {scale_255})),
+                            datasets::ConvertPolicies()),
+                    framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
+            OutOfPlaceDataSet))
+{
+    // Validate output
+    validate_wrap(Accessor(_target), _reference, AbsoluteTolerance<int16_t>(1), 0.f);
+}
+
+FIXTURE_DATA_TEST_CASE(
+    SmokeTest1,
+    CpuMulU8U8toS16Fixture,
+    framework::DatasetMode::PRECOMMIT,
+    combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                                                            framework::dataset::make("DataTypeIn1", DataType::U8)),
+                                                    framework::dataset::make("DataTypeIn2", DataType::U8)),
+                                            framework::dataset::make("DataTypeOut", DataType::S16)),
+                                    framework::dataset::make("Scale", {scale_other})),
+                            datasets::ConvertPolicies()),
+                    framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_ZERO)),
+            OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // U8U8toS16
+
+TEST_SUITE_END() // CpuMul
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuSub.cpp b/tests/validation/runtime/experimental/operators/CpuSub.cpp
new file mode 100644
index 0000000000..22f5ae8d7b
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuSub.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuSub.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "src/common/cpuinfo/CpuIsaInfo.h"
+#include "src/cpu/kernels/CpuAddKernel.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/validation/fixtures/CpuArithmeticOperationsFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", {false});
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(CpuSub)
+
+using CpuSubFixture = CpuArithmeticSubtractionValidationFixture<Tensor, Accessor, experimental::op::CpuSub>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(
+    SmokeTest,
+    CpuSubFixture,
+    framework::DatasetMode::PRECOMMIT,
+    combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)),
+                    framework::dataset::make("ConvertPolicy", {ConvertPolicy::SATURATE, ConvertPolicy::WRAP})),
+            OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // U8
+
+TEST_SUITE_END() // CpuSub
+TEST_SUITE_END() // Neon
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp b/tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp
new file mode 100644
index 0000000000..b8fa14833e
--- /dev/null
+++ b/tests/validation/runtime/experimental/operators/CpuWinogradConv2d.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/experimental/operators/CpuWinogradConv2d.h"
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/experimental/Types.h"
+
+#include "src/core/helpers/MemoryHelpers.h"
+#include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/Globals.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/Utils.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+#include "tests/validation/fixtures/CpuWinogradConv2dFixture.h"
+#include "tests/validation/Validation.h"
+/*
+ * Tests for arm_compute::experimental::op::CpuWinogradConv2d which is a shallow wrapper for
+ * arm_compute::cpu::CpuWinogradConv2d. Any future testing to the functionalities of cpu::CpuWinogradConv2d will
+ * be tested in tests/validation/NEON/ConvolutionLayer.cpp given that op::CpuWinogradConv2d remain a shallow wrapper.
+*/
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+namespace
+{
+const AbsoluteTolerance<float> abs_tolerance_f32(0.002f); /**< Absolute tolerance for FP32 types */
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(OPERATORS)
+TEST_SUITE(CpuWinogradConv2d)
+/** Test case for memory injection in @ref arm_compute::experimental::op::CpuWinogradConv2d.
+ *
+ * Configure the operator once and inject memory at run-time in multiple executions.
+ *
+ * Checks performed in order:
+ * - Both runs compute the same output
+ */
+TEST_CASE(OpCpuWinogradConv2dMemoryInjection, framework::DatasetMode::ALL)
+{
+    auto                winograd = std::make_unique<experimental::op::CpuWinogradConv2d>();
+    const auto          src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
+    const auto          w_info   = TensorInfo(TensorShape(1U), 1, DataType::F32);
+    const auto          b_info   = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
+    auto                dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
+    const PadStrideInfo pad_info{};
+
+    winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
+
+    // telhs are newly created every call of this lambda function
+    auto a = create_tensor<Tensor>(src_info);
+    auto b = create_tensor<Tensor>(b_info);
+    auto c = create_tensor<Tensor>(w_info);
+    a.allocator()->allocate();
+    b.allocator()->allocate();
+    c.allocator()->allocate();
+
+    ITensorPack run_pack{{TensorType::ACL_SRC_0, &a}, {TensorType::ACL_SRC_1, &b}, {TensorType::ACL_SRC_2, &c}};
+    ITensorPack prep_pack{{TensorType::ACL_SRC_1, &b}, {TensorType::ACL_SRC_2, &c}};
+
+    auto mg       = MemoryGroup{};
+    auto ws       = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
+    auto run_conv = [&]() -> Tensor
+    {
+        auto dst = create_tensor<Tensor>(dst_info);
+        dst.allocator()->allocate();
+
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+        library->fill_tensor_value(Accessor(a), 1.f);
+        library->fill_tensor_value(Accessor(b), 2.f);
+        library->fill_tensor_value(Accessor(c), 3.f);
+
+        // This operator is configured once and captured by this lambda.
+        winograd->prepare(prep_pack);
+        winograd->run(run_pack);
+        return dst;
+    };
+
+    auto result_0 = run_conv();
+    auto result_1 = run_conv();
+
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT((reinterpret_cast<float *>(result_0.buffer()))[i] ==
+                               (reinterpret_cast<float *>(result_1.buffer()))[i],
+                           framework::LogLevel::ERRORS);
+    }
+}
+
+using CpuWinogradConv2dFixture =
+    CpuWinogradConv2dValidationFixture<Tensor, Accessor, experimental::op::CpuWinogradConv2d>;
+
+const auto ActivationFunctionsDataset =
+    make("ActivationInfo",
+         {ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+          ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)});
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(SmokeTest,
+                       CpuWinogradConv2dFixture,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, abs_tolerance_f32);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // CpuWinogradConv2d
+TEST_SUITE_END() // OPERATORS
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute