diff options
Diffstat (limited to 'arm_compute/core/NEON/kernels/winograd/tensor.hpp')
-rw-r--r-- | arm_compute/core/NEON/kernels/winograd/tensor.hpp | 225 |
1 files changed, 96 insertions, 129 deletions
diff --git a/arm_compute/core/NEON/kernels/winograd/tensor.hpp b/arm_compute/core/NEON/kernels/winograd/tensor.hpp index 70ef65d2a5..6567eeb23d 100644 --- a/arm_compute/core/NEON/kernels/winograd/tensor.hpp +++ b/arm_compute/core/NEON/kernels/winograd/tensor.hpp @@ -23,39 +23,44 @@ */ #pragma once -#include <cstdio> #include <cstdlib> #include <random> #include "alloc.hpp" -/*****************************************************************************/ -/* Padding definitions */ -enum PaddingType { - PADDING_SAME, PADDING_VALID +enum TensorOrder +{ + NHWC, ///< [Batch x Height x Width x Channels] + NCHW, ///< [Batch x Channels x Height x Width] }; -/*****************************************************************************/ -/* Shape of a kernel */ -struct KernelShape { - int n_output_channels, n_rows, n_cols, n_input_channels; - - int size(void) const { - return n_output_channels * n_rows * n_cols * n_input_channels; +struct Tensor4DShape +{ + int n_batches, n_rows, n_cols, n_channels; + TensorOrder ordering; + + // Create a new tensor with the default (NHWC) ordering + inline Tensor4DShape( + const int n_batches, + const int n_rows, + const int n_cols, + const int n_channels, + const TensorOrder ordering=NHWC + ) : n_batches(n_batches), + n_rows(n_rows), + n_cols(n_cols), + n_channels(n_channels), + ordering(ordering) + { } -}; - -struct Tensor4DShape { - int n_batches, - n_rows, - n_cols, - n_channels; - int size() const { + inline int size() const + { return n_batches * n_rows * n_cols * n_channels; } - bool TestEq(const Tensor4DShape& other) const { + inline bool TestEq(const Tensor4DShape& other) const + { return (n_batches == other.n_batches && n_rows == other.n_rows && n_cols == other.n_cols && @@ -63,148 +68,110 @@ struct Tensor4DShape { } }; + +enum WeightOrder +{ + HWIO, ///< [Height x Width x Input channels x Output channels] + OIHW, ///< [Output channels x Input channels x Height x Width] +}; + +struct KernelShape +{ + int n_output_channels, n_rows, n_cols, n_input_channels; + WeightOrder ordering; + + inline KernelShape( + const int n_output_channels, + const int n_rows, + const int n_cols, + const int n_input_channels, + const WeightOrder ordering=HWIO + ) : n_output_channels(n_output_channels), + n_rows(n_rows), + n_cols(n_cols), + n_input_channels(n_input_channels), + ordering(ordering) + { + } + + inline int size(void) const + { + return n_output_channels * n_rows * n_cols * n_input_channels; + } +}; + + template <typename ShapeT, typename T> -class Tensor4D final { +class Tensor4D final +{ public: Tensor4D(ShapeT shape) : - _shape(shape), - _data(reinterpret_cast<T*>(ALLOCATE(size_bytes()))) { + shape(shape), + _data(reinterpret_cast<T*>(ALLOCATE(size_bytes()))) + { Clear(); } + Tensor4D(const Tensor4D<ShapeT, T>&) = delete; + Tensor4D operator=(const Tensor4D<ShapeT, T>&) = delete; + ~Tensor4D() { free(_data); } - T* ptr() const { + inline T* ptr() const { return _data; } - const ShapeT& shape() const { - return _shape; + inline size_t size_bytes() const { + return shape.size() * sizeof(T); } - size_t size_bytes() const { - return _shape.size() * sizeof(T); - } - - bool TestEq(Tensor4D<ShapeT, T>& other) const; - T& element(int, int, int, int) const; - void Print() const; + inline T& element(int, int, int, int) const; - void Clear() { + inline void Clear() { Fill(static_cast<T>(0)); } - void Fill(T val) { - for (int i = 0; i < _shape.size(); i++) + inline void Fill(T val) { + for (int i = 0; i < shape.size(); i++) _data[i] = val; } - void TestPattern() { - for (int i = 0; i < _shape.size(); i++) - _data[i] = static_cast<T>(i); - } - - void Rand(const int seed=2311) { - std::mt19937 gen(seed); - std::uniform_int_distribution<> dis(-50, +50); - - for (int i = 0; i < _shape.size(); i++) { - _data[i] = static_cast<T>(dis(gen)); - } - } - Tensor4D(const Tensor4D &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - Tensor4D &operator=(const Tensor4D &) = delete; - /** Allow instances of this class to be moved */ - Tensor4D(Tensor4D &&) = default; - /** Allow instances of this class to be moved */ - Tensor4D &operator=(Tensor4D &&) = default; - + const ShapeT shape; private: - const ShapeT _shape; T* const _data; }; template <> -inline float& Tensor4D<Tensor4DShape, float>::element(int n, int i, int j, int c) const { - int index = ((n*_shape.n_rows + i)*_shape.n_cols + j)*_shape.n_channels + c; - return _data[index]; -} - - -template <> -inline float& Tensor4D<KernelShape, float>::element(int oc, int i, int j, int ic) const { - int index = ((i*_shape.n_cols + j)*_shape.n_input_channels + ic)*_shape.n_output_channels + oc; - return _data[index]; -} - -template <> -inline bool Tensor4D<Tensor4DShape, float>::TestEq(Tensor4D<Tensor4DShape, float>& other) const { - // Test equivalence, printing errors - // First test the shapes are the same - if (!_shape.TestEq(other.shape())) { - printf("Tensors have different shapes.\n"); - return false; - } else { - int incorrects = 0; - - for (int n = 0; n < _shape.n_batches; n++) { - for (int i = 0; i < _shape.n_rows; i++) { - for (int j = 0; j < _shape.n_cols; j++) { - for (int c = 0; c < _shape.n_channels; c++) { - // Check elements for equivalence - const auto a = this->element(n, i, j, c); - const auto b = other.element(n, i, j, c); - - if (a != b) { - printf("Difference at element {%d, %d, %d, %d}: %.3f != %.3f\n", n, i, j, c, a, b); - - if (++incorrects > 100) { - printf("More than 100 incorrect values, stopping test.\n"); - return false; - } - } - } - } - } - } - - return incorrects == 0; +inline float& Tensor4D<Tensor4DShape, float>::element(int n, int i, int j, int c) const +{ + int index; + if (shape.ordering == NHWC) + { + index = ((n*shape.n_rows + i)*shape.n_cols + j)*shape.n_channels + c; } -} - - -template <> -inline void Tensor4D<Tensor4DShape, float>::Print() const { - for (int n = 0; n < _shape.n_batches; n++) { - for (int c = 0; c < _shape.n_channels; c++) { - for (int i = 0; i < _shape.n_rows; i++) { - for (int j = 0; j < _shape.n_cols; j++) { - printf("%5.2f ", element(n, i, j, c)); - } - printf("\n"); - } - printf("\n"); - } + else // NCHW + { + index = ((n*shape.n_channels + c)*shape.n_rows + i)*shape.n_cols + j; } + return _data[index]; } template <> -inline void Tensor4D<KernelShape, float>::Print() const { - for (int oc = 0; oc < _shape.n_output_channels; oc++) { - for (int ic = 0; ic < _shape.n_input_channels; ic++) { - for (int i = 0; i < _shape.n_rows; i++) { - for (int j = 0; j < _shape.n_cols; j++) { - printf("%5.2f ", element(oc, i, j, ic)); - } - printf("\n"); - } - printf("\n"); - } +inline float& Tensor4D<KernelShape, float>::element(int oc, int i, int j, int ic) const +{ + int index; + if (shape.ordering == HWIO) + { + index = ((i*shape.n_cols + j)*shape.n_input_channels + ic)*shape.n_output_channels + oc; } + else // OIHW + { + index = ((oc*shape.n_input_channels + ic)*shape.n_rows + i)*shape.n_cols + j; + } + return _data[index]; } |