From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001
From: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Date: Wed, 27 Sep 2023 17:46:17 +0100
Subject: Apply clang-format on repository

Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.

Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/

There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.

Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
---
 src/core/NEON/kernels/assembly/winograd.hpp | 181 ++++++++++++++++------------
 1 file changed, 106 insertions(+), 75 deletions(-)

(limited to 'src/core/NEON/kernels/assembly/winograd.hpp')

diff --git a/src/core/NEON/kernels/assembly/winograd.hpp b/src/core/NEON/kernels/assembly/winograd.hpp
index 50290757ec..dbf95d23cd 100644
--- a/src/core/NEON/kernels/assembly/winograd.hpp
+++ b/src/core/NEON/kernels/assembly/winograd.hpp
@@ -45,17 +45,24 @@ struct ConvolutionArgs
     Shape2D              kernel_shape;
     arm_gemm::Activation activation;
 
-    ConvolutionArgs(
-        unsigned int   n_batches,
-        const Shape2D &input_shape,
-        unsigned int   n_input_channels,
-        unsigned int pad_top, unsigned int pad_left,
-        const Shape2D              &output_shape,
-        unsigned int                n_output_channels,
-        const Shape2D               kernel_shape,
-        const arm_gemm::Activation &activation = {})
-        : n_batches(n_batches), input_shape(input_shape), n_input_channels(n_input_channels), pad_top(pad_top), pad_left(pad_left), output_shape(output_shape), n_output_channels(n_output_channels),
-          kernel_shape(kernel_shape), activation(activation)
+    ConvolutionArgs(unsigned int                n_batches,
+                    const Shape2D              &input_shape,
+                    unsigned int                n_input_channels,
+                    unsigned int                pad_top,
+                    unsigned int                pad_left,
+                    const Shape2D              &output_shape,
+                    unsigned int                n_output_channels,
+                    const Shape2D               kernel_shape,
+                    const arm_gemm::Activation &activation = {})
+        : n_batches(n_batches),
+          input_shape(input_shape),
+          n_input_channels(n_input_channels),
+          pad_top(pad_top),
+          pad_left(pad_left),
+          output_shape(output_shape),
+          n_output_channels(n_output_channels),
+          kernel_shape(kernel_shape),
+          activation(activation)
     {
     }
 };
@@ -105,23 +112,30 @@ public:
     virtual unsigned int get_transformed_tile_rows(void) const = 0;
     virtual unsigned int get_transformed_tile_cols(void) const = 0;
 
-    void execute(
-        const ConvolutionArgs &args,
-        const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel,
-        void *outptr, const WinogradDomainSpec &wds,
-        unsigned int thread_id, unsigned int n_threads) const
+    void execute(const ConvolutionArgs    &args,
+                 const void               *inptr,
+                 size_t                    ld_in_row,
+                 size_t                    ld_in_col,
+                 size_t                    ld_input_channel,
+                 void                     *outptr,
+                 const WinogradDomainSpec &wds,
+                 unsigned int              thread_id,
+                 unsigned int              n_threads) const
     {
-        this->execute(
-            args, inptr, ld_in_row, ld_in_col, ld_input_channel,
-            outptr, wds.weight_ld_matrix, wds.weight_ld_row,
-            thread_id, n_threads);
+        this->execute(args, inptr, ld_in_row, ld_in_col, ld_input_channel, outptr, wds.weight_ld_matrix,
+                      wds.weight_ld_row, thread_id, n_threads);
     }
 
-    virtual void execute(
-        const ConvolutionArgs &args,
-        const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel,
-        void *outptr, size_t ld_out_matrix, size_t ld_out_row,
-        unsigned int thread_id, unsigned int n_threads) const = 0;
+    virtual void execute(const ConvolutionArgs &args,
+                         const void            *inptr,
+                         size_t                 ld_in_row,
+                         size_t                 ld_in_col,
+                         size_t                 ld_input_channel,
+                         void                  *outptr,
+                         size_t                 ld_out_matrix,
+                         size_t                 ld_out_row,
+                         unsigned int           thread_id,
+                         unsigned int           n_threads) const = 0;
 };
 
 } // namespace weight_transform
@@ -136,27 +150,35 @@ public:
     virtual unsigned int get_input_rows(void) const = 0;
     virtual unsigned int get_input_cols(void) const = 0;
 
-    virtual size_t get_working_space_size(
-        const ConvolutionArgs &args,
-        unsigned int           n_threads) const = 0;
-
-    void execute(
-        const ConvolutionArgs &args,
-        const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col,
-        void *outptr, const WinogradDomainSpec &wds,
-        void *working_space, unsigned int thread_id, unsigned int n_threads) const
+    virtual size_t get_working_space_size(const ConvolutionArgs &args, unsigned int n_threads) const = 0;
+
+    void execute(const ConvolutionArgs    &args,
+                 const void               *inptr,
+                 size_t                    ld_in_batch,
+                 size_t                    ld_in_row,
+                 size_t                    ld_in_col,
+                 void                     *outptr,
+                 const WinogradDomainSpec &wds,
+                 void                     *working_space,
+                 unsigned int              thread_id,
+                 unsigned int              n_threads) const
     {
-        this->execute(
-            args, inptr, ld_in_batch, ld_in_row, ld_in_col,
-            outptr, wds.input_ld_batch, wds.input_ld_matrix, wds.input_ld_row,
-            working_space, thread_id, n_threads);
+        this->execute(args, inptr, ld_in_batch, ld_in_row, ld_in_col, outptr, wds.input_ld_batch, wds.input_ld_matrix,
+                      wds.input_ld_row, working_space, thread_id, n_threads);
     }
 
-    virtual void execute(
-        const ConvolutionArgs &args,
-        const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col,
-        void *outptr, size_t ld_out_batch, size_t ld_out_matrix, size_t ld_out_row,
-        void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0;
+    virtual void execute(const ConvolutionArgs &args,
+                         const void            *inptr,
+                         size_t                 ld_in_batch,
+                         size_t                 ld_in_row,
+                         size_t                 ld_in_col,
+                         void                  *outptr,
+                         size_t                 ld_out_batch,
+                         size_t                 ld_out_matrix,
+                         size_t                 ld_out_row,
+                         void                  *working_space,
+                         unsigned int           thread_id,
+                         unsigned int           n_threads) const = 0;
 };
 
 } // namespace input_transform
@@ -177,31 +199,37 @@ public:
     virtual unsigned int get_kernel_rows(void) const = 0;
     virtual unsigned int get_kernel_cols(void) const = 0;
 
-    virtual size_t get_working_space_size(
-        const ConvolutionArgs &args,
-        unsigned int           n_threads) const = 0;
-
-    void execute(
-        const ConvolutionArgs &args,
-        const void *inptr, const WinogradDomainSpec &wds,
-        const void *bias,
-        void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col,
-        void *working_space, unsigned int thread_id, unsigned int n_threads) const
+    virtual size_t get_working_space_size(const ConvolutionArgs &args, unsigned int n_threads) const = 0;
+
+    void execute(const ConvolutionArgs    &args,
+                 const void               *inptr,
+                 const WinogradDomainSpec &wds,
+                 const void               *bias,
+                 void                     *outptr,
+                 size_t                    ld_out_batch,
+                 size_t                    ld_out_row,
+                 size_t                    ld_out_col,
+                 void                     *working_space,
+                 unsigned int              thread_id,
+                 unsigned int              n_threads) const
     {
-        this->execute(
-            args,
-            inptr, wds.output_ld_batch, wds.output_ld_matrix, wds.output_ld_row,
-            bias,
-            outptr, ld_out_batch, ld_out_row, ld_out_col,
-            working_space, thread_id, n_threads);
+        this->execute(args, inptr, wds.output_ld_batch, wds.output_ld_matrix, wds.output_ld_row, bias, outptr,
+                      ld_out_batch, ld_out_row, ld_out_col, working_space, thread_id, n_threads);
     }
 
-    virtual void execute(
-        const ConvolutionArgs &args,
-        const void *inptr, size_t ld_in_batch, size_t ld_in_matrix, size_t ld_in_row,
-        const void *bias,
-        void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col,
-        void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0;
+    virtual void execute(const ConvolutionArgs &args,
+                         const void            *inptr,
+                         size_t                 ld_in_batch,
+                         size_t                 ld_in_matrix,
+                         size_t                 ld_in_row,
+                         const void            *bias,
+                         void                  *outptr,
+                         size_t                 ld_out_batch,
+                         size_t                 ld_out_row,
+                         size_t                 ld_out_col,
+                         void                  *working_space,
+                         unsigned int           thread_id,
+                         unsigned int           n_threads) const = 0;
 };
 
 } // namespace output_transform
@@ -210,7 +238,7 @@ struct WinogradImpl
 {
     const output_transform::ITransform *output_transform = nullptr;
     const weight_transform::ITransform *weight_transform = nullptr;
-    const input_transform::ITransform *input_transform  = nullptr;
+    const input_transform::ITransform  *input_transform  = nullptr;
     std::unique_ptr<arm_gemm::GemmArgs> gemm_args;
     WinogradDomainSpec                  winograd_spec;
 };
@@ -220,15 +248,18 @@ struct WinogradImpl
  * Assigns to the pointers in the `dest` struct and returns true or false to
  * indicate whether the given problem can be executed or not.
  */
-template <typename TIn, typename TWeight = TIn, typename TOut = TIn, typename TWinogradIn = TIn, typename TWinogradOut = TOut>
-bool get_implementation(
-    WinogradImpl &dest, // Destination for the selected implementation
-    const CPUInfo *,
-    const ConvolutionArgs &,
-    int  max_threads,
-    bool fast_mode,
-    const WinogradConfig *,
-    const arm_gemm::GemmConfig *);
+template <typename TIn,
+          typename TWeight      = TIn,
+          typename TOut         = TIn,
+          typename TWinogradIn  = TIn,
+          typename TWinogradOut = TOut>
+bool get_implementation(WinogradImpl &dest, // Destination for the selected implementation
+                        const CPUInfo *,
+                        const ConvolutionArgs &,
+                        int  max_threads,
+                        bool fast_mode,
+                        const WinogradConfig *,
+                        const arm_gemm::GemmConfig *);
 
 } // namespace winograd
 } // namespace arm_conv
-- 
cgit v1.2.1