Move NEGEMMAssemblyDispatch as an internal function

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I89ee26c1595d510c5048904cae9422528b76cd45 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4662 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2020-12-03 18:51:58 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2020-12-09 17:39:16 +0000
commit: ec2256b81e6d6f655dcfbc76683738fbfeb82bcc (patch)
tree: 262ea08d3df23910dae663a79c915e585681530f
parent: 98e33b97b92c912f058bfb3295adad1bcad3e80f (diff)
download: ComputeLibrary-ec2256b81e6d6f655dcfbc76683738fbfeb82bcc.tar.gz
11 files changed, 58 insertions, 45 deletions
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 5ac94102fc..f35144481d 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -75,7 +75,6 @@
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 645ab56417..124f027227 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -30,17 +30,19 @@
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/Tensor.h"
 
 #include <memory>
 
 namespace arm_compute
 {
+// Forward declarations
 class NEGEMMInterleave4x4Kernel;
 class NEGEMMMatrixAdditionKernel;
 class NEGEMMMatrixMultiplyKernel;
 class NEGEMMTranspose1xWKernel;
+class NEGEMMAssemblyDispatch;
+
 /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
  *
  * If optimized assembly is available:
@@ -112,7 +114,7 @@ private:
     std::unique_ptr<NEGEMMInterleave4x4Kernel>  _interleave_kernel;
     std::unique_ptr<NEGEMMTranspose1xWKernel>   _transpose_kernel;
     std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
-    NEGEMMAssemblyDispatch                      _asm_glue;
+    std::unique_ptr<NEGEMMAssemblyDispatch>     _asm_glue;
     std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
     NEActivationLayer                           _alpha_scale_func;
     NEArithmeticAddition                        _add_bias;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index 7cae39397f..2b3c162eab 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -28,7 +28,6 @@
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -37,6 +36,8 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEGEMMAssemblyDispatch;
+
 /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
  *
  * Supports only NHWC data layout
@@ -60,6 +61,8 @@ public:
     NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete;
     /** Default move assignment operator */
     NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default;
+    /** Destructor */
+    ~NEGEMMConv2d();
     /** Set the input and output tensors.
      *
      * @param[in]  input   Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -96,13 +99,13 @@ public:
     void prepare() override;
 
 private:
-    NEGEMMAssemblyDispatch _gemm_asm_func;
-    NEActivationLayer      _activation_func;
-    NEPermute              _weights_permute_func;
-    const ITensor         *_original_weights;
-    Tensor                 _permuted_weights;
-    bool                   _is_prepared;
-    bool                   _run_activation;
+    std::unique_ptr<NEGEMMAssemblyDispatch> _gemm_asm_func;
+    NEActivationLayer                       _activation_func;
+    NEPermute                               _weights_permute_func;
+    const ITensor                          *_original_weights;
+    Tensor                                  _permuted_weights;
+    bool                                    _is_prepared;
+    bool                                    _run_activation;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index cb1d6bd782..8eea9d7d24 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -27,8 +27,8 @@
 #include "NEActivationLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/Tensor.h"
 
 #include <memory>
@@ -45,6 +45,7 @@ class NEGEMMLowpOffsetContributionOutputStageKernel;
 class NEGEMMLowpMatrixAReductionKernel;
 class NEGEMMLowpMatrixBReductionKernel;
 class NEGEMMTranspose1xWKernel;
+class NEGEMMAssemblyDispatch;
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
  *
@@ -115,7 +116,7 @@ public:
 private:
     MemoryGroup                                                    _memory_group;
     IWeightsManager                                               *_weights_manager;
-    NEGEMMAssemblyDispatch                                         _asm_glue;
+    std::unique_ptr<NEGEMMAssemblyDispatch>                        _asm_glue;
     std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel>                _mm_kernel;
     std::unique_ptr<NEGEMMInterleave4x4Kernel>                     _mtx_a_reshape_kernel;
     std::unique_ptr<NEGEMMTranspose1xWKernel>                      _mtx_b_reshape_kernel;
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 7ad4831082..ecdd72c436 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -95,6 +95,7 @@ v21.02 Public major release
    - NEGEMMInterleave4x4
    - NEGEMMTranspose1xW
    - NEComputeAllAnchors / CLComputeAllAnchors
+   - NEGEMMAssemblyDispatch
  - Removed kernels:
    - NEGEMMMatrixVectorMultiplyKernel
    - NELocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedMatrixMultiplyKernel
@@ -486,7 +487,7 @@ v20.05 Public major release
      - @ref NEDepthConvertLayerKernel
      - @ref NEDepthConvertLayer
      - @ref NEGEMMConvolutionLayer
-     - @ref NEGEMMAssemblyDispatch
+     - NEGEMMAssemblyDispatch
  - Added new data type QASYMM8_SIGNED support for:
      - @ref CLDirectConvolutionLayer
      - @ref CLDeconvolutionLayer
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 03f5aa37c1..6d83480cb9 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -31,7 +31,6 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "src/core/CPP/Validate.h"
 #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
@@ -39,6 +38,7 @@
 #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
 #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 
 #include <cmath>
 
@@ -61,7 +61,7 @@ AsmGemmInfo init_assembly_metadata(const GEMMInfo &info)
 } // namespace
 
 NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
-    : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(),
+    : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique<NEGEMMAssemblyDispatch>()), _ma_kernel(),
       _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
       _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
 {
@@ -90,8 +90,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
     if(run_optimised)
     {
         const ITensor *c_to_use = is_c_bias ? c : nullptr;
-        _asm_glue.configure(a, b, c_to_use, d, asm_info);
-        ARM_COMPUTE_ERROR_ON(!_asm_glue.is_configured());
+        _asm_glue->configure(a, b, c_to_use, d, asm_info);
+        ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured());
 
         // Scale product by alpha
         if(_run_alpha_scale)
@@ -312,9 +312,9 @@ void NEGEMM::run()
 
     MemoryGroupResourceScope scope_mg(_memory_group);
 
-    if(_asm_glue.is_configured())
+    if(_asm_glue->is_configured())
     {
-        _asm_glue.run();
+        _asm_glue->run();
         if(_run_alpha_scale)
         {
             _alpha_scale_func.run();
@@ -361,20 +361,20 @@ void NEGEMM::prepare()
     if(!_is_prepared)
     {
         const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b);
-        if(_asm_glue.is_configured())
+        if(_asm_glue->is_configured())
         {
             if(!original_b_managed_by_weights_manager)
             {
                 ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
             }
 
-            _asm_glue.prepare();
+            _asm_glue->prepare();
             if(!original_b_managed_by_weights_manager)
             {
                 _original_b->mark_as_unused();
             }
         }
-        else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured())
+        else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured())
         {
             if(!original_b_managed_by_weights_manager)
             {
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index b54389cf5f..1c86393406 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "src/core/CPP/Validate.h"
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index 8f9498d0f5..466e60183a 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H
+#ifndef SRC_NEGEMMASSEMBLYDISPATCH_H
+#define SRC_NEGEMMASSEMBLYDISPATCH_H
 
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -122,4 +122,4 @@ private:
     IWeightsManager           *_weights_manager; /**< Pointer to the weights manager */
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */
+#endif /* SRC_NEGEMMASSEMBLYDISPATCH_H */
diff --git a/src/runtime/NEON/functions/NEGEMMConv2d.cpp b/src/runtime/NEON/functions/NEGEMMConv2d.cpp
index 860b6bb4e1..b8349d98db 100644
--- a/src/runtime/NEON/functions/NEGEMMConv2d.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConv2d.cpp
@@ -22,9 +22,11 @@
  * SOFTWARE.
  */
 #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
+
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 
 #include <set>
 
@@ -81,9 +83,13 @@ AsmGemmInfo init_assembly_metadata(const Conv2dInfo &info, bool is_indirect)
 } // namespace
 
 NEGEMMConv2d::NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager)
-    : _gemm_asm_func(memory_manager), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), _run_activation(false)
+    : _gemm_asm_func(std::make_unique<NEGEMMAssemblyDispatch>(memory_manager)), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false),
+      _run_activation(false)
 {
 }
+
+NEGEMMConv2d::~NEGEMMConv2d() = default;
+
 void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -101,10 +107,10 @@ void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITens
     {
         asm_info.output_stage = calculate_output_stage_metadata(input->info(), weights->info(), output->info(), info.act_info);
     }
-    _gemm_asm_func.configure(input, &_permuted_weights, biases, output, asm_info);
+    _gemm_asm_func->configure(input, &_permuted_weights, biases, output, asm_info);
 
     // Configure activation
-    if(info.act_info.enabled() && !_gemm_asm_func.is_activation_supported(info.act_info))
+    if(info.act_info.enabled() && !_gemm_asm_func->is_activation_supported(info.act_info))
     {
         _activation_func.configure(output, nullptr, info.act_info);
         _run_activation = true;
@@ -150,7 +156,7 @@ void NEGEMMConv2d::run()
 {
     prepare();
 
-    _gemm_asm_func.run();
+    _gemm_asm_func->run();
     if(_run_activation)
     {
         _activation_func.run();
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 50c7fe4c66..921626f0fe 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -42,6 +42,7 @@
 #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
 #include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
 #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 
 namespace arm_compute
 {
@@ -65,10 +66,10 @@ using namespace arm_compute::misc::shape_calculator;
 NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default;
 
 NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
-    : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(),
-      _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(),
-      _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), _b_offset(0),
-      _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false),
+    : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(std::make_unique<NEGEMMAssemblyDispatch>(memory_manager, weights_manager)), _mm_kernel(), _mtx_a_reshape_kernel(),
+      _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(),
+      _convert_to_signed_asymm(), _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0),
+      _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false),
       _run_activation(false), _flip_signedness(false)
 {
 }
@@ -145,14 +146,14 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
         {
             if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
             {
-                _asm_glue.configure(a_to_use, b, c, output, asm_info);
-                _fused_assembly_path = _asm_glue.is_configured();
+                _asm_glue->configure(a_to_use, b, c, output, asm_info);
+                _fused_assembly_path = _asm_glue->is_configured();
             }
             else
             {
-                _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info);
+                _asm_glue->configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info);
             }
-            _assembly_path = _asm_glue.is_configured();
+            _assembly_path = _asm_glue->is_configured();
             break;
         }
         default:
@@ -510,9 +511,9 @@ void NEGEMMLowpMatrixMultiplyCore::run()
     }
 
     // Run GEMM
-    if(_asm_glue.is_configured())
+    if(_asm_glue->is_configured())
     {
-        _asm_glue.run();
+        _asm_glue->run();
     }
     else
     {
@@ -575,21 +576,21 @@ void NEGEMMLowpMatrixMultiplyCore::prepare()
     {
         const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b);
         // Run assembly reshape
-        if(_asm_glue.is_configured())
+        if(_asm_glue->is_configured())
         {
             if(!original_b_managed_by_weights_manager)
             {
                 ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
             }
 
-            _asm_glue.prepare();
+            _asm_glue->prepare();
             if(!original_b_managed_by_weights_manager)
             {
                 _original_b->mark_as_unused();
             }
         }
         // Run non-assembly reshape
-        else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured())
+        else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured())
         {
             if(!original_b_managed_by_weights_manager)
             {
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index 265df9246f..bd3bdd6a26 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -28,13 +28,13 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "src/core/CPP/Validate.h"
 #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
 #include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
 #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
 #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
+#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 
 #include "src/core/NEON/kernels/convolution/common/utils.hpp"
 #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp"
author	Georgios Pinitas <georgios.pinitas@arm.com>	2020-12-03 18:51:58 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2020-12-09 17:39:16 +0000
commit	ec2256b81e6d6f655dcfbc76683738fbfeb82bcc (patch)
tree	262ea08d3df23910dae663a79c915e585681530f
parent	98e33b97b92c912f058bfb3295adad1bcad3e80f (diff)
download	ComputeLibrary-ec2256b81e6d6f655dcfbc76683738fbfeb82bcc.tar.gz