3 files changed, 17 insertions, 12 deletions
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 9b5fafb979..a5d1cb5dac 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -33,9 +33,6 @@ def filter_clang_tidy_lines( lines ):
     for i in range(0, len(lines)):
         line = lines[i]
 
-        if "/assembly/" in line:
-            continue
-
         if "/arm_gemm/" in line:
             continue
 
diff --git a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
index 7b1f3e7ba0..97c20dbd4e 100644
--- a/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
+++ b/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp
@@ -58,6 +58,7 @@ struct Kernel<float, float>
 template <typename To, typename Tr>
 Window NEGEMMNativeWrapperKernel<To, Tr>::configure_internal(float alpha, float beta)
 {
+    ARM_COMPUTE_UNUSED(alpha);
     using strategy = typename Kernel<To, Tr>::strategy;
 
     _beta = beta;
@@ -107,6 +108,7 @@ void NEGEMMNativeWrapperKernel<To, Tr>::run_internal(const Window &window, const
 
     auto on_new_row_size = [&](unsigned int start, unsigned int end)
     {
+        ARM_COMPUTE_UNUSED(start);
         m_end = std::min(end, _params.M);
     };
 
diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
index e207ab007a..20aa1496b6 100644
--- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
+++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
@@ -45,7 +45,7 @@ public:
     static constexpr unsigned int NUM_BUFFERS = 3;
 
     explicit BufferManagerMultipleThreads(unsigned int max_num_users)
-        : _max_num_users(max_num_users)
+        : _buffers(), _max_num_users(max_num_users)
     {
     }
     unsigned int num_buffers() const override
@@ -106,11 +106,15 @@ public:
         ARM_COMPUTE_ERROR_ON(buf.index != index); // Should have blocked in lock_to_reshape_if_needed()
         // Check if it's already ready to use:
         if(buf.state == State::IN_USE)
+        {
             return;
+        }
         std::unique_lock<std::mutex> lock(buf.mutex);
         //Double check it didn't change while we were acquiring the lock:
         if(buf.state == State::IN_USE)
+        {
             return;
+        }
         buf.sem.wait(lock);
     }
     /* Mark the buffer at the given index as not used by this thread anymore.
@@ -143,7 +147,8 @@ private:
         State                   state{ State::FREE };
         std::mutex              mutex{};
         std::condition_variable sem{};
-    } _buffers[NUM_BUFFERS];
+    };
+    std::array<struct Buffer, NUM_BUFFERS> _buffers;
     Buffer &get_buffer_from_index(unsigned int index)
     {
         return _buffers[index % NUM_BUFFERS];
@@ -161,6 +166,7 @@ public:
     }
     bool lock_to_reshape_if_needed(unsigned int index) override
     {
+        ARM_COMPUTE_UNUSED(index);
         return true;
     }
     void mark_as_reshaped(unsigned int index) override
@@ -231,10 +237,10 @@ void NEGEMMInterleavedWrapper::prepare()
                 {
                     //For each block of rows in "M"
                     auto workload_mm = this->_mm_workloads.begin();
-                    for(auto workload_a = this->_a_workloads.begin(); workload_a != this->_a_workloads.end(); workload_a++)
+                    for(auto &workload_a : this->_a_workloads)
                     {
                         // Transform one k_block from A:
-                        this->_transform_a->transform(*workload_a, info, this->_batch_window, start_offset, end_offset);
+                        this->_transform_a->transform(workload_a, info, this->_batch_window, start_offset, end_offset);
                         // Then perform the matrix multiplication for each x block along N:
                         for(unsigned int i = 0; i < num_x_blocks; i++)
                         {
@@ -243,7 +249,7 @@ void NEGEMMInterleavedWrapper::prepare()
                         }
                     }
                 };
-                _workloads.push_back(workload);
+                _workloads.emplace_back(workload);
             }
             else
             {
@@ -255,10 +261,10 @@ void NEGEMMInterleavedWrapper::prepare()
                     //If there is only one thread then only reshape the B blocks as you need them:
                     unsigned int workload_b_next = num_threads == 1 ? this->_b_workloads.size() : 1;
 
-                    for(auto workload_a = this->_a_workloads.begin(); workload_a != this->_a_workloads.end(); workload_a++)
+                    for(auto &workload_a : this->_a_workloads)
                     {
                         // Transform one k_block from A:
-                        this->_transform_a->transform(*workload_a, info, this->_batch_window, start_offset, end_offset);
+                        this->_transform_a->transform(workload_a, info, this->_batch_window, start_offset, end_offset);
                         // Then perform the matrix multiplication for each x block along N:
                         for(unsigned int i = 0; i < num_x_blocks; i++)
                         {
@@ -287,7 +293,7 @@ void NEGEMMInterleavedWrapper::prepare()
                         }
                     }
                 };
-                _workloads.push_back(workload);
+                _workloads.emplace_back(workload);
             }
         }
         if(!_pretranspose_b && num_windows > 1 && num_windows % num_threads != 0)
@@ -325,7 +331,7 @@ void NEGEMMInterleavedWrapper::prepare()
                         workload_b++;
                     }
                 };
-                _workloads.push_back(workload);
+                _workloads.emplace_back(workload);
             }
         }