4 files changed, 33 insertions, 24 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index d4a9f68beb..9df2e08956 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -41,12 +41,15 @@ class NEGEMMInterleave4x4Kernel;
 class NEGEMMMatrixAdditionKernel;
 class NEGEMMMatrixMultiplyKernel;
 class NEGEMMTranspose1xWKernel;
-class NEGEMMAssemblyDispatch;
+namespace cpu
+{
+class CpuGemmAssemblyDispatch;
+}
 
 /** Basic function to execute GEMM. This function calls the following kernels:
  *
  * If optimized assembly is available:
- *  -# @ref NEGEMMAssemblyDispatch
+ *  -# @ref cpu::CpuGemmAssemblyDispatch
  *  -# @ref NEActivationLayer (if alpha != 1.0)
  * Else:
  *  -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix)
@@ -119,16 +122,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                                 _memory_group;
-    IWeightsManager                            *_weights_manager;
-    std::unique_ptr<NEGEMMInterleave4x4Kernel>  _interleave_kernel;
-    std::unique_ptr<NEGEMMTranspose1xWKernel>   _transpose_kernel;
-    std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
-    std::unique_ptr<NEGEMMAssemblyDispatch>     _asm_glue;
-    std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
-    NEActivationLayer                           _alpha_scale_func;
-    NEArithmeticAddition                        _add_bias;
-    NEActivationLayer                           _activation_func;
+    MemoryGroup                                   _memory_group;
+    IWeightsManager                              *_weights_manager;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>    _interleave_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>     _transpose_kernel;
+    std::unique_ptr<NEGEMMMatrixMultiplyKernel>   _mm_kernel;
+    std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue;
+    std::unique_ptr<NEGEMMMatrixAdditionKernel>   _ma_kernel;
+    NEActivationLayer                             _alpha_scale_func;
+    NEArithmeticAddition                          _add_bias;
+    NEActivationLayer                             _activation_func;
 
     Tensor         _tmp_a;
     Tensor         _tmp_b;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index b2ffd038de..6c71f0e188 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -36,13 +36,16 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
-class NEGEMMAssemblyDispatch;
+namespace cpu
+{
+class CpuGemmAssemblyDispatch;
+}
 
 /** Basic function to compute the convolution layer. This function calls the following kernels/functions:
  *
  * Supports only NHWC data layout
  *
- * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref cpu::CpuGemmAssemblyDispatch
  * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch
  *
  * Weights are transformed from OHWI to HWIO format using the following kernels:
@@ -111,13 +114,13 @@ public:
     void prepare() override;
 
 private:
-    std::unique_ptr<NEGEMMAssemblyDispatch> _gemm_asm_func;
-    NEActivationLayer                       _activation_func;
-    NEPermute                               _weights_permute_func;
-    const ITensor                          *_original_weights;
-    Tensor                                  _permuted_weights;
-    bool                                    _is_prepared;
-    bool                                    _run_activation;
+    std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _gemm_asm_func;
+    NEActivationLayer                             _activation_func;
+    NEPermute                                     _weights_permute_func;
+    const ITensor                                *_original_weights;
+    Tensor                                        _permuted_weights;
+    bool                                          _is_prepared;
+    bool                                          _run_activation;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 780723e752..a292712bd7 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -44,7 +44,10 @@ class NEGEMMLowpOffsetContributionOutputStageKernel;
 class NEGEMMLowpMatrixAReductionKernel;
 class NEGEMMLowpMatrixBReductionKernel;
 class NEGEMMTranspose1xWKernel;
-class NEGEMMAssemblyDispatch;
+namespace cpu
+{
+class CpuGemmAssemblyDispatch;
+}
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available:
  *
@@ -135,7 +138,7 @@ public:
 private:
     MemoryGroup                                                    _memory_group;
     IWeightsManager                                               *_weights_manager;
-    std::unique_ptr<NEGEMMAssemblyDispatch>                        _asm_glue;
+    std::unique_ptr<cpu::CpuGemmAssemblyDispatch>                  _asm_glue;
     std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel>                _mm_kernel;
     std::unique_ptr<NEGEMMInterleave4x4Kernel>                     _mtx_a_reshape_kernel;
     std::unique_ptr<NEGEMMTranspose1xWKernel>                      _mtx_b_reshape_kernel;
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 77f9093ed4..f9ebf608cb 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -47,7 +47,7 @@ class ICPPKernel;
  * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
  * -# @ref NEWinogradLayerTransformInputKernel
  * -# @ref NEWinogradLayerTransformOutputKernel
- * -# @ref NEGEMMAssemblyDispatch
+ * -# @ref cpu::CpuGemmAssemblyDispatch
  * -# @ref CPPPermute (three times: weights, input and output)
  *
  * @note  Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true