aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transform.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transform.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transform.hpp67
1 files changed, 28 insertions, 39 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transform.hpp b/src/core/NEON/kernels/arm_gemm/transform.hpp
index c80bb59941..35e61b05a4 100644
--- a/src/core/NEON/kernels/arm_gemm/transform.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transform.hpp
@@ -35,63 +35,51 @@
* being a multiple of the block sizes.
*/
template <unsigned IntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize>
-struct TransformImpl
-{
+struct TransformImpl {
template <typename TOut, typename TIn>
- static void Transform(TOut *out, const TIn *const in, const int stride,
- const int y0, const int ymax, const int x0, const int xmax)
- {
+ static void Transform(TOut* out, const TIn* const in, const int stride,
+ const int y0, const int ymax, const int x0, const int xmax) {
const int n_whole_y_blocks = (ymax - y0) / IntBy;
- const int y_remainders = (ymax - y0) % IntBy;
- const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0);
+ const int y_remainders = (ymax - y0) % IntBy;
+ const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0);
const int n_whole_x_blocks = (xmax - x0) / BlockBy;
- const int x_remainders = (xmax - x0) % BlockBy;
- const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0);
+ const int x_remainders = (xmax - x0) % BlockBy;
+ const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0);
// "Y" loop: advance down the rows of the source IntBy rows at a time.
// Set up fill_rows to show the number rows to copy from, and blank_rows
// for the number of blank rows to add.
- for(int y_block = 0; y_block < n_y_blocks; y_block++)
- {
- int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders;
+ for (int y_block=0 ; y_block < n_y_blocks; y_block++) {
+ int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders;
int blank_rows = IntBy - fill_rows;
int y_base = y0 + (y_block * IntBy);
// So now advance along this block of rows, BlockBy columns at a time.
- for(int x_block = 0; x_block < n_x_blocks; x_block++)
- {
- int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders;
+ for (int x_block=0 ; x_block < n_x_blocks; x_block++) {
+ int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders;
int blank_cols = BlockBy - fill_cols;
int x_base = x0 + (x_block * BlockBy);
- for(int row = 0; row < fill_rows; row++)
- {
- for(int col = 0; col < fill_cols; col++)
- {
+ for (int row = 0; row < fill_rows; row++) {
+ for (int col = 0; col < fill_cols; col++) {
// In-range copy. If it's transposed, we reverse the sense of rows and columns here.
- if(Transposed)
- {
+ if (Transposed) {
*out++ = static_cast<TOut>(in[(x_base + col) * stride + y_base + row]);
- }
- else
- {
+ } else {
*out++ = static_cast<TOut>(in[(y_base + row) * stride + x_base + col]);
}
}
// "col" tail - row is in range but column is out of range.
- for(int col = 0; col < blank_cols; col++)
- {
+ for (int col=0; col < blank_cols; col++) {
*out++ = static_cast<TOut>(0);
}
}
// "row" tail - row is out of range so fill with zeros always.
- for(int row = 0; row < blank_rows; row++)
- {
- for(int col = 0; col < (fill_cols + blank_cols); col++)
- {
+ for (int row = 0; row < blank_rows; row++) {
+ for (int col=0; col < (fill_cols + blank_cols); col++) {
*out++ = static_cast<TOut>(0);
}
}
@@ -100,9 +88,8 @@ struct TransformImpl
}
template <typename T>
- static inline void Transform(T *out, const T *const in, const int stride,
- const int k0, const int kmax, const int x0, const int xmax)
- {
+ static inline void Transform(T* out, const T* const in, const int stride,
+ const int k0, const int kmax, const int x0, const int xmax) {
Transform<T, T>(out, in, stride, k0, kmax, x0, xmax);
}
};
@@ -110,13 +97,15 @@ struct TransformImpl
/*****************************************************************************/
template <unsigned int IntBy, unsigned int BlockBy, bool Transposed, typename TOut, typename TIn>
void Transform(
- TOut *out, const TIn *const in, const int stride,
- const int k0, const int kmax, const int x0, const int xmax)
-{
- // Redirect to a specialised implementation predicated on argument size.
- TransformImpl<IntBy, BlockBy, Transposed, sizeof(TOut), sizeof(TIn)>::Transform(
- out, in, stride, k0, kmax, x0, xmax);
+ TOut* out, const TIn* const in, const int stride,
+ const int k0, const int kmax, const int x0, const int xmax
+) {
+ // Redirect to a specialised implementation predicated on argument size.
+ TransformImpl<IntBy, BlockBy, Transposed, sizeof(TOut), sizeof(TIn)>::Transform(
+ out, in, stride, k0, kmax, x0, xmax
+ );
}
/*****************************************************************************/
#include "transforms/list.hpp"
+