Apply clang-format on repository

Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
author: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> 2023-09-27 17:46:17 +0100
committer: felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> 2023-09-28 12:08:05 +0000
commit: afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree: 03bc7d5a762099989b16a656fa8d397b490ed70e /src/cpu/kernels/assembly
parent: bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
download: ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
5 files changed, 148 insertions, 86 deletions
diff --git a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h
index 10bf8e4ff7..6e8f32ef47 100644
--- a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
+
 #include "src/core/NEON/INEKernel.h"
 #include "src/cpu/kernels/assembly/arm_gemm_compute_iface.hpp"
 
@@ -57,13 +58,12 @@ class CpuGemmAssemblyWrapperKernel final : public INEKernel
 public:
     /** Constructor
      */
-    CpuGemmAssemblyWrapperKernel()
-        : _kernel(nullptr), _name("CpuGemmAssemblyWrapperKernel")
+    CpuGemmAssemblyWrapperKernel() : _kernel(nullptr), _name("CpuGemmAssemblyWrapperKernel")
     {
     }
 
-    CpuGemmAssemblyWrapperKernel(CpuGemmAssemblyWrapperKernel &)  = delete;
-    CpuGemmAssemblyWrapperKernel(CpuGemmAssemblyWrapperKernel &&) = default;
+    CpuGemmAssemblyWrapperKernel(CpuGemmAssemblyWrapperKernel &)            = delete;
+    CpuGemmAssemblyWrapperKernel(CpuGemmAssemblyWrapperKernel &&)           = default;
     CpuGemmAssemblyWrapperKernel &operator=(CpuGemmAssemblyWrapperKernel &) = delete;
 
     const char *name() const override
@@ -110,7 +110,7 @@ public:
 
         INEKernel::configure(win);
 
-        if(!kernel_name_tag.empty())
+        if (!kernel_name_tag.empty())
         {
             _name += "/" + kernel_name_tag;
         }
@@ -132,7 +132,7 @@ public:
 
 private:
     arm_gemm::GemmCommon<TypeInput, TypeOutput> *_kernel;
-    std::string _name;
+    std::string                                  _name;
 };
 } // namespace kernel
 } // namespace cpu
diff --git a/src/cpu/kernels/assembly/arm_gemm.hpp b/src/cpu/kernels/assembly/arm_gemm.hpp
index 4c127b4ec3..9a913c5c58 100644
--- a/src/cpu/kernels/assembly/arm_gemm.hpp
+++ b/src/cpu/kernels/assembly/arm_gemm.hpp
@@ -23,13 +23,12 @@
  */
 #pragma once
 
+#include "arm_gemm_local.hpp"
+#include "gemm_common.hpp"
 #include <cstring>
 #include <memory>
 #include <vector>
 
-#include "arm_gemm_local.hpp"
-#include "gemm_common.hpp"
-
 namespace arm_gemm
 {
 enum class GemmMethod
@@ -111,8 +110,7 @@ struct GemmConfig
     unsigned int outer_block_size = 0;
     WeightFormat weight_format    = WeightFormat::ANY;
 
-    GemmConfig(GemmMethod method)
-        : method(method)
+    GemmConfig(GemmMethod method) : method(method)
     {
     }
     GemmConfig()
@@ -133,8 +131,7 @@ struct Activation
     float param1;
     float param2;
 
-    Activation(Type type = Type::None, float p1 = 0.0f, float p2 = 0.0f)
-        : type(type), param1(p1), param2(p2)
+    Activation(Type type = Type::None, float p1 = 0.0f, float p2 = 0.0f) : type(type), param1(p1), param2(p2)
     {
     }
 };
@@ -156,12 +153,32 @@ public:
     bool              _fast_mode;
     const GemmConfig *_cfg;
 
-    GemmArgs(const CPUInfo *ci, unsigned int M, unsigned int N,
-             unsigned int K, unsigned int Ksections, unsigned int nbatches,
-             unsigned int nmulti, bool indirect_input, Activation act, const int maxthreads,
-             bool fixed_format = false, bool fast_mode = false, const GemmConfig *cfg = nullptr)
-        : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _Ksections(Ksections), _nbatches(nbatches), _nmulti(nmulti), _indirect_input(indirect_input), _act(act), _maxthreads(maxthreads),
-          _fixed_format(fixed_format), _fast_mode(fast_mode), _cfg(cfg)
+    GemmArgs(const CPUInfo    *ci,
+             unsigned int      M,
+             unsigned int      N,
+             unsigned int      K,
+             unsigned int      Ksections,
+             unsigned int      nbatches,
+             unsigned int      nmulti,
+             bool              indirect_input,
+             Activation        act,
+             const int         maxthreads,
+             bool              fixed_format = false,
+             bool              fast_mode    = false,
+             const GemmConfig *cfg          = nullptr)
+        : _ci(ci),
+          _Msize(M),
+          _Nsize(N),
+          _Ksize(K),
+          _Ksections(Ksections),
+          _nbatches(nbatches),
+          _nmulti(nmulti),
+          _indirect_input(indirect_input),
+          _act(act),
+          _maxthreads(maxthreads),
+          _fixed_format(fixed_format),
+          _fast_mode(fast_mode),
+          _cfg(cfg)
     {
     }
 };
@@ -187,23 +204,51 @@ public:
     Requantize32() = default;
 
     // Constructor for per-tensor quantization
-    Requantize32(const int32_t *bias, size_t bias_multi_stride,
-                 int32_t a_offset, int32_t b_offset, int32_t c_offset,
-                 int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
-        : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max<int32_t>(requant_shift, 0)),
-          per_layer_right_shift(std::min<int32_t>(requant_shift, 0)), per_layer_mul(requant_mul), minval(minv), maxval(maxv)
+    Requantize32(const int32_t *bias,
+                 size_t         bias_multi_stride,
+                 int32_t        a_offset,
+                 int32_t        b_offset,
+                 int32_t        c_offset,
+                 int32_t        requant_shift,
+                 int32_t        requant_mul,
+                 int32_t        minv,
+                 int32_t        maxv)
+        : bias(bias),
+          bias_multi_stride(bias_multi_stride),
+          a_offset(a_offset),
+          b_offset(b_offset),
+          c_offset(c_offset),
+          per_channel_requant(false),
+          per_layer_left_shift(std::max<int32_t>(requant_shift, 0)),
+          per_layer_right_shift(std::min<int32_t>(requant_shift, 0)),
+          per_layer_mul(requant_mul),
+          minval(minv),
+          maxval(maxv)
     {
     }
 
     // Constructor for per-channel quantization
-    Requantize32(const int32_t *bias, size_t bias_multi_stride,
-                 int32_t a_offset, int32_t b_offset, int32_t c_offset,
+    Requantize32(const int32_t *bias,
+                 size_t         bias_multi_stride,
+                 int32_t        a_offset,
+                 int32_t        b_offset,
+                 int32_t        c_offset,
                  const int32_t *requant_left_shifts,
                  const int32_t *requant_right_shifts,
                  const int32_t *requant_muls,
-                 int32_t minv, int32_t maxv)
-        : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(true), per_channel_left_shifts(requant_left_shifts),
-          per_channel_right_shifts(requant_right_shifts), per_channel_muls(requant_muls), minval(minv), maxval(maxv)
+                 int32_t        minv,
+                 int32_t        maxv)
+        : bias(bias),
+          bias_multi_stride(bias_multi_stride),
+          a_offset(a_offset),
+          b_offset(b_offset),
+          c_offset(c_offset),
+          per_channel_requant(true),
+          per_channel_left_shifts(requant_left_shifts),
+          per_channel_right_shifts(requant_right_shifts),
+          per_channel_muls(requant_muls),
+          minval(minv),
+          maxval(maxv)
     {
     }
 };
diff --git a/src/cpu/kernels/assembly/arm_gemm_compute_iface.hpp b/src/cpu/kernels/assembly/arm_gemm_compute_iface.hpp
index 718fcd1fb4..0672e899b6 100644
--- a/src/cpu/kernels/assembly/arm_gemm_compute_iface.hpp
+++ b/src/cpu/kernels/assembly/arm_gemm_compute_iface.hpp
@@ -27,7 +27,6 @@
 #include "arm_compute/core/Window.h"
 
 #include "ndrange.hpp"
-
 #include <cassert>
 
 /* This file contains mapping between integral types used in arm_compute and arm_gemm
@@ -38,8 +37,7 @@
 namespace arm_gemm
 {
 //we want to unify the maximum number of dimensions used beween arm_gemm and arm compute library
-constexpr std::size_t ndrange_max =
-    arm_compute::Dimensions<unsigned int>::num_max_dimensions;
+constexpr std::size_t ndrange_max = arm_compute::Dimensions<unsigned int>::num_max_dimensions;
 
 using ndrange_t = NDRange<ndrange_max>;
 using ndcoord_t = NDCoordinate<ndrange_max>;
@@ -56,7 +54,7 @@ inline arm_compute::Window to_window(const ndrange_t &ndr)
 {
     arm_compute::Window win;
 
-    for(unsigned int i = 0; i != ndrange_max; ++i)
+    for (unsigned int i = 0; i != ndrange_max; ++i)
     {
         //populate the window with the dimensions of the NDRange
         win.set(i, arm_compute::Window::Dimension(0, ndr.get_size(i)));
@@ -75,7 +73,7 @@ inline arm_compute::Window to_window(const ndcoord_t &ndc)
 {
     arm_compute::Window win;
 
-    for(unsigned int i = 0; i != ndrange_max; ++i)
+    for (unsigned int i = 0; i != ndrange_max; ++i)
     {
         const auto start = ndc.get_position(i);
         const auto size  = ndc.get_size(i);
@@ -98,15 +96,12 @@ inline arm_compute::Window to_window(const ndcoord_t &ndc)
  */
 inline ndrange_t to_ndrange(const arm_compute::Window &win)
 {
-    return
-    {
-        static_cast<unsigned int>(win[0].end() - win[0].start()),
-        static_cast<unsigned int>(win[1].end() - win[1].start()),
-        static_cast<unsigned int>(win[2].end() - win[2].start()),
-        static_cast<unsigned int>(win[3].end() - win[3].start()),
-        static_cast<unsigned int>(win[4].end() - win[4].start()),
-        static_cast<unsigned int>(win[5].end() - win[5].start())
-    };
+    return {static_cast<unsigned int>(win[0].end() - win[0].start()),
+            static_cast<unsigned int>(win[1].end() - win[1].start()),
+            static_cast<unsigned int>(win[2].end() - win[2].start()),
+            static_cast<unsigned int>(win[3].end() - win[3].start()),
+            static_cast<unsigned int>(win[4].end() - win[4].start()),
+            static_cast<unsigned int>(win[5].end() - win[5].start())};
 }
 
 /** Convert an `arm_compute::Window` to an `arm_gemm::NDCoord` of the same max dimensions
@@ -116,15 +111,12 @@ inline ndrange_t to_ndrange(const arm_compute::Window &win)
  */
 inline ndcoord_t to_ndcoord(const arm_compute::Window &win)
 {
-    return
-    {
-        { static_cast<unsigned int>(win[0].start()), static_cast<unsigned int>(win[0].end() - win[0].start()) },
-        { static_cast<unsigned int>(win[1].start()), static_cast<unsigned int>(win[1].end() - win[1].start()) },
-        { static_cast<unsigned int>(win[2].start()), static_cast<unsigned int>(win[2].end() - win[2].start()) },
-        { static_cast<unsigned int>(win[3].start()), static_cast<unsigned int>(win[3].end() - win[3].start()) },
-        { static_cast<unsigned int>(win[4].start()), static_cast<unsigned int>(win[4].end() - win[4].start()) },
-        { static_cast<unsigned int>(win[5].start()), static_cast<unsigned int>(win[5].end() - win[5].start()) }
-    };
+    return {{static_cast<unsigned int>(win[0].start()), static_cast<unsigned int>(win[0].end() - win[0].start())},
+            {static_cast<unsigned int>(win[1].start()), static_cast<unsigned int>(win[1].end() - win[1].start())},
+            {static_cast<unsigned int>(win[2].start()), static_cast<unsigned int>(win[2].end() - win[2].start())},
+            {static_cast<unsigned int>(win[3].start()), static_cast<unsigned int>(win[3].end() - win[3].start())},
+            {static_cast<unsigned int>(win[4].start()), static_cast<unsigned int>(win[4].end() - win[4].start())},
+            {static_cast<unsigned int>(win[5].start()), static_cast<unsigned int>(win[5].end() - win[5].start())}};
 }
 
 } //namespace arm_gemm
diff --git a/src/cpu/kernels/assembly/gemm_common.hpp b/src/cpu/kernels/assembly/gemm_common.hpp
index 834cd1061e..6fe9f13f02 100644
--- a/src/cpu/kernels/assembly/gemm_common.hpp
+++ b/src/cpu/kernels/assembly/gemm_common.hpp
@@ -25,7 +25,6 @@
 
 #include "convolution_parameters.hpp"
 #include "ndrange.hpp"
-
 #include <cstddef>
 
 namespace arm_gemm
@@ -51,10 +50,19 @@ public:
      * appropriately typed pointers.  If B is pretransposed (see below) then
      * the settings for B here are ignored.
      */
-    virtual void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride,
-                                    const void *B, const int ldb, /* batches share B */ const int B_multi_stride,
-                                    void *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
-                                    const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) = 0;
+    virtual void set_arrays_generic(const void                                   *A,
+                                    const int                                     lda,
+                                    const int                                     A_batch_stride,
+                                    const int                                     A_multi_stride,
+                                    const void                                   *B,
+                                    const int                                     ldb,
+                                    /* batches share B */ const int               B_multi_stride,
+                                    void                                         *C,
+                                    const int                                     ldc,
+                                    const int                                     C_batch_stride,
+                                    const int                                     C_multi_stride,
+                                    const void                                   *bias,
+                                    /* no row or batch stride needed */ const int bias_multi_stride) = 0;
 
     /** @returns an ndrange containing ranges of the compute space which can be
      * broken up and parallelised over
@@ -73,7 +81,7 @@ public:
      * This has an empty default implementation, as GEMMs which don't care
      * about thread count can safely ignore this.
      */
-    virtual void set_nthreads(int) {};
+    virtual void set_nthreads(int){};
 
     /* Whether this GEMM can be dynamically scheduled or not. */
     virtual bool supports_dynamic_scheduling() const
@@ -95,7 +103,7 @@ public:
         return 0;
     }
     /* Provide working space buffer - the void * passed in must remain allocated for the duration of any execute calls. */
-    virtual void set_working_space(void *) {};
+    virtual void set_working_space(void *){};
 
     /*** "Pretransposed" interface (optional) ***/
     /* Is this object set up for pretranspose?  If so, pretranspose_array() needs to be called before execute(); */
@@ -122,7 +130,8 @@ public:
     /* The "real" version of this depends on the templated operand type (see below).  */
     virtual void pretranspose_B_array_generic(void *, const void *, const int, const int) = 0;
     /* Threaded version with window start/end parameters */
-    virtual void pretranspose_B_array_part_generic(void *, const void *, const int, const int, const size_t, const size_t) = 0;
+    virtual void
+    pretranspose_B_array_part_generic(void *, const void *, const int, const int, const size_t, const size_t) = 0;
 
     /* Set pretransposed data - the void * passed in must previously have been passed to pretranspose_B_array() for the same or a similar GEMM. */
     virtual void set_pretransposed_B_data(void *)
@@ -186,10 +195,19 @@ protected:
 public:
     /* Pass in the pointers to the arrays to be operated on and their
      * strides (templated version with appropriate types). */
-    virtual void set_arrays(const To *A, const int lda, const int A_batch_stride, const int A_multi_stride,
-                            const To *B, const int ldb, /* batches share B */ const int B_multi_stride,
-                            Tr *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
-                            const Tr *bias, /* no row or batch stride needed */ const int bias_multi_stride)
+    virtual void set_arrays(const To                                     *A,
+                            const int                                     lda,
+                            const int                                     A_batch_stride,
+                            const int                                     A_multi_stride,
+                            const To                                     *B,
+                            const int                                     ldb,
+                            /* batches share B */ const int               B_multi_stride,
+                            Tr                                           *C,
+                            const int                                     ldc,
+                            const int                                     C_batch_stride,
+                            const int                                     C_multi_stride,
+                            const Tr                                     *bias,
+                            /* no row or batch stride needed */ const int bias_multi_stride)
     {
         _Aptr              = A;
         _lda               = lda;
@@ -207,25 +225,33 @@ public:
     }
 
     /* Implementation of the void * overload which casts its arguments to the appropriate type. */
-    void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride,
-                            const void *B, const int ldb, /* batches share B */ const int B_multi_stride,
-                            void *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
-                            const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) override
+    void set_arrays_generic(const void                                   *A,
+                            const int                                     lda,
+                            const int                                     A_batch_stride,
+                            const int                                     A_multi_stride,
+                            const void                                   *B,
+                            const int                                     ldb,
+                            /* batches share B */ const int               B_multi_stride,
+                            void                                         *C,
+                            const int                                     ldc,
+                            const int                                     C_batch_stride,
+                            const int                                     C_multi_stride,
+                            const void                                   *bias,
+                            /* no row or batch stride needed */ const int bias_multi_stride) override
     {
-        set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride,
-                   static_cast<const To *>(B), ldb, B_multi_stride,
-                   static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride,
+        set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride, static_cast<const To *>(B), ldb,
+                   B_multi_stride, static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride,
                    static_cast<const Tr *>(bias), bias_multi_stride);
     }
 
     /*** "Pretransposed" interface ***/
 
     /* Compute col sums over all columns */
-    virtual void requantize_bias(void *, const To *, const int, const int) {};
+    virtual void requantize_bias(void *, const To *, const int, const int){};
 
     /* Perform pretranspose - the void * passed in must remain allocated for the duration of any execute calls. */
     /* Arguments are: output buffer pointer, source pointer, source row stride, source multi stride */
-    virtual void pretranspose_B_array(void *, const To *, const int, const int) {};
+    virtual void pretranspose_B_array(void *, const To *, const int, const int){};
 
     /* Implementation of the void * overload which casts its arguments to the appropriate type. */
     void pretranspose_B_array_generic(void *out, const void *in, const int row_stride, const int multi_stride) override
@@ -237,12 +263,14 @@ public:
      * The fallback/backwards compatible version of the threaded interface exposes a window size of 1 and
      * just calls the non-threaded functions to do the work.  This is valid as with window size of 1 the only
      * legal values for start and end are 0 and 1 respectively. */
-    virtual void pretranspose_B_array_part(void *out, const To *in, const int row_stride, const int multi_stride, size_t, size_t)
+    virtual void
+    pretranspose_B_array_part(void *out, const To *in, const int row_stride, const int multi_stride, size_t, size_t)
     {
         pretranspose_B_array(out, in, row_stride, multi_stride);
     };
 
-    void pretranspose_B_array_part_generic(void *out, const void *in, const int row_stride, const int multi_stride, size_t start, size_t end) override
+    void pretranspose_B_array_part_generic(
+        void *out, const void *in, const int row_stride, const int multi_stride, size_t start, size_t end) override
     {
         pretranspose_B_array_part(out, static_cast<const To *>(in), row_stride, multi_stride, start, end);
     }
diff --git a/src/cpu/kernels/assembly/ndrange.hpp b/src/cpu/kernels/assembly/ndrange.hpp
index 1c8261aef7..baccdc0d88 100644
--- a/src/cpu/kernels/assembly/ndrange.hpp
+++ b/src/cpu/kernels/assembly/ndrange.hpp
@@ -45,8 +45,7 @@ private:
         unsigned int   m_end = 0;
 
     public:
-        NDRangeIterator(const NDRange &p, unsigned int s, unsigned int e)
-            : m_parent(p), m_pos(s), m_end(e)
+        NDRangeIterator(const NDRange &p, unsigned int s, unsigned int e) : m_parent(p), m_pos(s), m_end(e)
         {
         }
 
@@ -59,12 +58,12 @@ private:
         {
             unsigned int r = m_pos;
 
-            if(d < (D - 1))
+            if (d < (D - 1))
             {
                 r %= m_parent.m_totalsizes[d];
             }
 
-            if(d > 0)
+            if (d > 0)
             {
                 r /= m_parent.m_totalsizes[d - 1];
             }
@@ -98,9 +97,9 @@ private:
     {
         unsigned int t = 1;
 
-        for(unsigned int i = 0; i < D; i++)
+        for (unsigned int i = 0; i < D; i++)
         {
-            if(m_sizes[i] == 0)
+            if (m_sizes[i] == 0)
             {
                 m_sizes[i] = 1;
             }
@@ -116,14 +115,12 @@ public:
     NDRange(const NDRange &rhs)            = default;
 
     template <typename... T>
-    NDRange(T... ts)
-        : m_sizes{ ts... }
+    NDRange(T... ts) : m_sizes{ts...}
     {
         set_totalsizes();
     }
 
-    NDRange(const std::array<unsigned int, D> &n)
-        : m_sizes(n)
+    NDRange(const std::array<unsigned int, D> &n) : m_sizes(n)
     {
         set_totalsizes();
     }
@@ -163,7 +160,7 @@ public:
         std::array<int_t, N> sizes{};
 
         std::size_t i = 0;
-        for(auto &p : list)
+        for (auto &p : list)
         {
             m_positions[i] = p.first;
             sizes[i++]     = p.second;
author	Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>	2023-09-27 17:46:17 +0100
committer	felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>	2023-09-28 12:08:05 +0000
commit	afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree	03bc7d5a762099989b16a656fa8d397b490ed70e /src/cpu/kernels/assembly
parent	bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
download	ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz