diff options
author | Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-27 17:46:17 +0100 |
---|---|---|
committer | felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-28 12:08:05 +0000 |
commit | afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch) | |
tree | 03bc7d5a762099989b16a656fa8d397b490ed70e /src/cpu/kernels/assembly/gemm_common.hpp | |
parent | bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff) | |
download | ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz |
Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/cpu/kernels/assembly/gemm_common.hpp')
-rw-r--r-- | src/cpu/kernels/assembly/gemm_common.hpp | 74 |
1 files changed, 51 insertions, 23 deletions
diff --git a/src/cpu/kernels/assembly/gemm_common.hpp b/src/cpu/kernels/assembly/gemm_common.hpp index 834cd1061e..6fe9f13f02 100644 --- a/src/cpu/kernels/assembly/gemm_common.hpp +++ b/src/cpu/kernels/assembly/gemm_common.hpp @@ -25,7 +25,6 @@ #include "convolution_parameters.hpp" #include "ndrange.hpp" - #include <cstddef> namespace arm_gemm @@ -51,10 +50,19 @@ public: * appropriately typed pointers. If B is pretransposed (see below) then * the settings for B here are ignored. */ - virtual void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride, - const void *B, const int ldb, /* batches share B */ const int B_multi_stride, - void *C, const int ldc, const int C_batch_stride, const int C_multi_stride, - const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) = 0; + virtual void set_arrays_generic(const void *A, + const int lda, + const int A_batch_stride, + const int A_multi_stride, + const void *B, + const int ldb, + /* batches share B */ const int B_multi_stride, + void *C, + const int ldc, + const int C_batch_stride, + const int C_multi_stride, + const void *bias, + /* no row or batch stride needed */ const int bias_multi_stride) = 0; /** @returns an ndrange containing ranges of the compute space which can be * broken up and parallelised over @@ -73,7 +81,7 @@ public: * This has an empty default implementation, as GEMMs which don't care * about thread count can safely ignore this. */ - virtual void set_nthreads(int) {}; + virtual void set_nthreads(int){}; /* Whether this GEMM can be dynamically scheduled or not. */ virtual bool supports_dynamic_scheduling() const @@ -95,7 +103,7 @@ public: return 0; } /* Provide working space buffer - the void * passed in must remain allocated for the duration of any execute calls. */ - virtual void set_working_space(void *) {}; + virtual void set_working_space(void *){}; /*** "Pretransposed" interface (optional) ***/ /* Is this object set up for pretranspose? If so, pretranspose_array() needs to be called before execute(); */ @@ -122,7 +130,8 @@ public: /* The "real" version of this depends on the templated operand type (see below). */ virtual void pretranspose_B_array_generic(void *, const void *, const int, const int) = 0; /* Threaded version with window start/end parameters */ - virtual void pretranspose_B_array_part_generic(void *, const void *, const int, const int, const size_t, const size_t) = 0; + virtual void + pretranspose_B_array_part_generic(void *, const void *, const int, const int, const size_t, const size_t) = 0; /* Set pretransposed data - the void * passed in must previously have been passed to pretranspose_B_array() for the same or a similar GEMM. */ virtual void set_pretransposed_B_data(void *) @@ -186,10 +195,19 @@ protected: public: /* Pass in the pointers to the arrays to be operated on and their * strides (templated version with appropriate types). */ - virtual void set_arrays(const To *A, const int lda, const int A_batch_stride, const int A_multi_stride, - const To *B, const int ldb, /* batches share B */ const int B_multi_stride, - Tr *C, const int ldc, const int C_batch_stride, const int C_multi_stride, - const Tr *bias, /* no row or batch stride needed */ const int bias_multi_stride) + virtual void set_arrays(const To *A, + const int lda, + const int A_batch_stride, + const int A_multi_stride, + const To *B, + const int ldb, + /* batches share B */ const int B_multi_stride, + Tr *C, + const int ldc, + const int C_batch_stride, + const int C_multi_stride, + const Tr *bias, + /* no row or batch stride needed */ const int bias_multi_stride) { _Aptr = A; _lda = lda; @@ -207,25 +225,33 @@ public: } /* Implementation of the void * overload which casts its arguments to the appropriate type. */ - void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride, - const void *B, const int ldb, /* batches share B */ const int B_multi_stride, - void *C, const int ldc, const int C_batch_stride, const int C_multi_stride, - const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) override + void set_arrays_generic(const void *A, + const int lda, + const int A_batch_stride, + const int A_multi_stride, + const void *B, + const int ldb, + /* batches share B */ const int B_multi_stride, + void *C, + const int ldc, + const int C_batch_stride, + const int C_multi_stride, + const void *bias, + /* no row or batch stride needed */ const int bias_multi_stride) override { - set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride, - static_cast<const To *>(B), ldb, B_multi_stride, - static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride, + set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride, static_cast<const To *>(B), ldb, + B_multi_stride, static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride, static_cast<const Tr *>(bias), bias_multi_stride); } /*** "Pretransposed" interface ***/ /* Compute col sums over all columns */ - virtual void requantize_bias(void *, const To *, const int, const int) {}; + virtual void requantize_bias(void *, const To *, const int, const int){}; /* Perform pretranspose - the void * passed in must remain allocated for the duration of any execute calls. */ /* Arguments are: output buffer pointer, source pointer, source row stride, source multi stride */ - virtual void pretranspose_B_array(void *, const To *, const int, const int) {}; + virtual void pretranspose_B_array(void *, const To *, const int, const int){}; /* Implementation of the void * overload which casts its arguments to the appropriate type. */ void pretranspose_B_array_generic(void *out, const void *in, const int row_stride, const int multi_stride) override @@ -237,12 +263,14 @@ public: * The fallback/backwards compatible version of the threaded interface exposes a window size of 1 and * just calls the non-threaded functions to do the work. This is valid as with window size of 1 the only * legal values for start and end are 0 and 1 respectively. */ - virtual void pretranspose_B_array_part(void *out, const To *in, const int row_stride, const int multi_stride, size_t, size_t) + virtual void + pretranspose_B_array_part(void *out, const To *in, const int row_stride, const int multi_stride, size_t, size_t) { pretranspose_B_array(out, in, row_stride, multi_stride); }; - void pretranspose_B_array_part_generic(void *out, const void *in, const int row_stride, const int multi_stride, size_t start, size_t end) override + void pretranspose_B_array_part_generic( + void *out, const void *in, const int row_stride, const int multi_stride, size_t start, size_t end) override { pretranspose_B_array_part(out, static_cast<const To *>(in), row_stride, multi_stride, start, end); } |