aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-01-23 09:36:04 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commitd6ca478a7e410f8f529c2e505305b46d9fe21a9b (patch)
tree5c50c06e07f812890f127b1c4933996987f74f17 /arm_compute
parentd05dce46a14a7b67f322328ecd95bf96bdd30bae (diff)
downloadComputeLibrary-d6ca478a7e410f8f529c2e505305b46d9fe21a9b.tar.gz
COMPMID-784: Added support for biases in WinogradLayer.
1) Updated to the latest code from the RSH repo. 2) Moved winograd transforms into kernels. 3) Added support for biases Change-Id: I7f39f34a599b49d7d9b549cc10a4f4d4a8007ab8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117474 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h125
-rw-r--r--arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp1
-rw-r--r--arm_compute/core/NEON/kernels/winograd/transforms/input.hpp2
-rw-r--r--arm_compute/core/NEON/kernels/winograd/transforms/output.hpp21
-rw-r--r--arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp12
-rw-r--r--arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp89
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradLayer.h37
7 files changed, 194 insertions, 93 deletions
diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
index b85f372896..ea6c8d813d 100644
--- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
@@ -32,6 +32,8 @@ namespace arm_compute
{
class ITensor;
class NEWinogradLayerKernel;
+class NEWinogradLayerTransformInputKernel;
+class NEWinogradLayerTransformWeightsKernel;
class Winograd3x3F32 final
{
@@ -48,10 +50,15 @@ public:
* @param[out] weights_storage Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size
* @param[in] input Pointer to NHWC ordered input tensor, in the spatial domain.
* @param[out] winograd_input Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`.
+ * @param[in] biases Pointer to the biases vector.
* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
* @param[out] winograd_output Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`.
*/
friend class NEWinogradLayerKernel;
+ friend class NEWinogradLayerTransformInputKernel;
+ friend class NEWinogradLayerTransformOutputKernel;
+ friend class NEWinogradLayerTransformWeightsKernel;
+
Winograd3x3F32(
const int n_batches,
const int n_input_channels,
@@ -67,16 +74,124 @@ public:
float *const winograd_output);
~Winograd3x3F32();
- void transform_weights();
- void transform_input();
- void transform_output();
private:
class Private;
std::unique_ptr<Private> _pimpl;
};
-class NEWinogradLayerKernel : public INEKernel
+class INEWinogradLayerTransformKernel : public INEKernel
+{
+public:
+ /** Constructor */
+ INEWinogradLayerTransformKernel();
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEWinogradLayerTransformKernel(const INEWinogradLayerTransformKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEWinogradLayerTransformKernel &operator=(const INEWinogradLayerTransformKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ INEWinogradLayerTransformKernel(INEWinogradLayerTransformKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ INEWinogradLayerTransformKernel &operator=(INEWinogradLayerTransformKernel &&) = default;
+
+ virtual ~INEWinogradLayerTransformKernel() = default;
+
+ /** Initialise the kernel
+ *
+ * @param[in] convolver A pointer to the winograd convolver, this object must have been configured and is ready to execute 16 GEMMS .
+ */
+ virtual void configure(Winograd3x3F32 *convolver);
+
+protected:
+ Winograd3x3F32 *_convolver;
+};
+
+class NEWinogradLayerTransformInputKernel final : public INEWinogradLayerTransformKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEWinogradLayerTransformInputKernel";
+ }
+ // Inherited methods overridden:
+ void configure(Winograd3x3F32 *convolver) override;
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+};
+
+class NEWinogradLayerTransformOutputKernel final : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEWinogradLayerTransformOutputKernel";
+ }
+ /** Constructor */
+ NEWinogradLayerTransformOutputKernel();
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
+
+ ~NEWinogradLayerTransformOutputKernel() = default;
+
+ /** Configure the output transform kernel.
+ *
+ * @param[in] biases Pointer to the biases tensor.
+ * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
+ * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
+ * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
+ * @param[in] n_batches Number of batches in the input tensor.
+ * @param[in] n_rows Number of rows in output tensor.
+ * @param[in] n_cols Number of columns in output tensor.
+ * @param[in] n_channels Number of feature maps in the output tensor.
+ */
+ void configure(
+ const ITensor *biases,
+ const float *const output_workingspace,
+ const int matrix_stride,
+ float *const output,
+ const int n_batches,
+ const int n_rows,
+ const int n_cols,
+ const int n_channels);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ const ITensor *_biases;
+ const float *_output_workspace;
+ int _matrix_stride;
+ int _matrix_row_stride;
+ float *_output;
+ int _n_batches;
+ int _n_rows;
+ int _n_cols;
+ int _n_channels;
+};
+
+class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEWinogradLayerTransformWeightsKernel";
+ }
+ // Inherited methods overridden:
+ void configure(Winograd3x3F32 *convolver) override;
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+};
+
+class NEWinogradLayerKernel final : public INEKernel
{
public:
const char *name() const override
@@ -95,7 +210,7 @@ public:
/** Allow instances of this class to be moved */
NEWinogradLayerKernel &operator=(NEWinogradLayerKernel &&) = default;
- virtual ~NEWinogradLayerKernel() = default;
+ ~NEWinogradLayerKernel() = default;
/** Initialise the kernel
*
diff --git a/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp b/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp
index 725f6cab65..6a9984a24a 100644
--- a/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/direct_convolution.hpp
@@ -29,6 +29,7 @@
void direct_convolution(
const Tensor4D<Tensor4DShape, float>& input,
const Tensor4D<KernelShape, float>& kernel,
+ const Tensor4D<Tensor4DShape, float>& biases,
Tensor4D<Tensor4DShape, float>& output,
const PaddingType padding
);
diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp
index 39b444184e..075765a513 100644
--- a/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/transforms/input.hpp
@@ -71,7 +71,7 @@ namespace winograd
const int row_offset = (tile_i == 0) ?
0 : ((padding_type == PADDING_VALID) ? 0 : 1);
const T* const input_base_row = (
- input_base_batch + ((inner_tile_rows - 2)*tile_i - row_offset)*input_row_stride
+ input_base_batch + ((inner_tile_rows - (kernel_rows - 1))*tile_i - row_offset)*input_row_stride
);
T* const outptr_base_row = outptr_base_batch + tile_i*output_row_stride;
diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp b/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp
index 7fa5ee9617..0dd719751b 100644
--- a/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/transforms/output.hpp
@@ -35,6 +35,7 @@ namespace winograd
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output
)
{
@@ -69,8 +70,9 @@ namespace winograd
// Process the row
process_tile_row(
tile_N, output_shape.n_channels, matrix_tile_row, matrix_stride,
- matrix_row_stride, outptr_row, output_row_stride,
- output_col_stride, row_pad_bottom, pad_right
+ matrix_row_stride, biases,
+ outptr_row, output_row_stride, output_col_stride, row_pad_bottom,
+ pad_right
);
}
}
@@ -85,6 +87,7 @@ namespace winograd
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output,
const int output_row_stride,
const int output_col_stride,
@@ -102,7 +105,7 @@ namespace winograd
// Perform the output transformation
tile_fns[row_pad_bottom][tile_pad_right](
- n_channels, matrix_row, matrix_stride,
+ n_channels, matrix_row, matrix_stride, biases,
outptr, output_row_stride, output_col_stride
);
}
@@ -131,14 +134,17 @@ namespace winograd
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output,
const int n_batches,
const int n_rows,
const int n_cols,
const int n_channels
- ) : _matrix_base(matrix_base), _matrix_stride(matrix_stride), _matrix_row_stride(matrix_row_stride),
- _outptr(output), _n_batches(n_batches), _n_rows(n_rows), _n_cols(n_cols), _n_channels(n_channels),
- _tile_M(iceildiv(n_rows, output_tile_rows)), _tile_N(iceildiv(n_cols, output_tile_cols))
+ ) : _matrix_base(matrix_base), _biases(biases),
+ _matrix_stride(matrix_stride), _matrix_row_stride(matrix_row_stride),
+ _outptr(output), _n_batches(n_batches), _n_rows(n_rows), _n_cols(n_cols),
+ _n_channels(n_channels), _tile_M(iceildiv(n_rows, output_tile_rows)),
+ _tile_N(iceildiv(n_cols, output_tile_cols))
{
}
@@ -168,7 +174,8 @@ namespace winograd
_n_batches, _n_rows, _n_cols, _n_channels, NHWC
};
execute(
- output_shape, _matrix_base, _matrix_stride, _matrix_row_stride, _outptr
+ output_shape, _matrix_base, _matrix_stride, _matrix_row_stride, _biases,
+ _outptr
);
}
} // namespace winograd
diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp b/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp
index adca48a6d6..2ea70f182b 100644
--- a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp
@@ -183,7 +183,7 @@ class WinogradGEMM
const int row_pad_top,
const int row_pad_left,
const int row_pad_bottom,
- const int row_pad_right
+ const int n_cols
);
static constexpr int max_pad_bottom = inner_tile_rows - 1;
@@ -225,6 +225,7 @@ class WinogradGEMM
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output
);
@@ -236,6 +237,7 @@ class WinogradGEMM
const T* const matrix_base, /** Pointer to base of matrices. */
const int matrix_stride, /** Stride between matrices. */
const int matrix_row_stride, /** Stride within a matrix. */
+ const T* const biases, /** Pointer to biases vector. */
T* const output, /** Pointer to output tensor. */
const int n_batches, /** Number of batches in output tensor. */
const int n_rows, /** Number of rows in output tensor. */
@@ -257,6 +259,7 @@ class WinogradGEMM
const T* const matrix_base,
const int matrix_stride,
const int matrix_row_stride,
+ const T* const biases,
T* const output,
const int output_row_stride,
const int output_col_stride,
@@ -270,14 +273,15 @@ class WinogradGEMM
/** Prepare a single tile of the output tensor. */
template <int pad_bottom, int pad_right>
- static void process_tile(int, const T*, int, T*, int, int);
+ static void process_tile(int, const T*, int, const T*, T*, int, int);
// Array of methods to produce tiles of output tensor.
- typedef void (*TileFn)(int, const T*, int, T*, int, int);
+ typedef void (*TileFn)(int, const T*, int, const T*, T*, int, int);
static const TileFn tile_fns[max_pad_bottom][max_pad_right];
/** Member constants for instances of the transform. */
const T* const _matrix_base;
+ const T* const _biases;
const int _matrix_stride, _matrix_row_stride;
T* const _outptr;
const int _n_batches, _n_rows, _n_cols, _n_channels, _tile_M, _tile_N;
@@ -328,6 +332,7 @@ class WinogradGEMM
void execute(
TOut* const output,
const TIn* const input,
+ const TOut* const biases,
void* working_space=NULL,
const int n_threads=1
);
@@ -336,6 +341,7 @@ class WinogradGEMM
void execute(
TOut* const output,
const TIn* const input,
+ const TOut* const biases,
const int n_threads
);
diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp b/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp
index 4559312df4..1db63d750b 100644
--- a/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp
+++ b/arm_compute/core/NEON/kernels/winograd/winograd_layer.hpp
@@ -74,87 +74,56 @@ class WinogradConvolutionLayer
/** Determine how much memory (in units of TIn) to allocate for the
* transformed weights.
- *
- * @param[in] n_output_channels Number of output feature maps.
- * @param[in] n_input_channels Number of input feature maps.
*/
static unsigned int get_weight_storage_size(
- const int n_output_channels,
- const int n_input_channels
+ const int n_output_channels, /** Number of output feature maps. */
+ const int n_input_channels /** Number of input feature maps. */
);
/** Determine how much memory (in units of TIn) to allocate for the
* transformed input.
- *
- * @param[in] n_batches Number of batches in the input tensor.
- * @param[in] n_channels Number of feature maps in the input tensor.
- * @param[in] n_rows Number of rows in each feature map.
- * @param[in] n_cols Number of columns in each feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
*/
static unsigned int get_input_storage_size(
- const int n_batches,
- const int n_channels,
- const int n_rows,
- const int n_cols,
- const bool same_padding
+ const int n_batches, /** Number of batches in the input tensor. */
+ const int n_channels, /** Number of feature maps in the input tensor. */
+ const int n_rows, /** Number of rows in each feature map. */
+ const int n_cols, /** Number of columns in each feature map. */
+ const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
);
/** Determine how much memory (in units of TOut) to allocate for the
* (Winograd domain) output.
- *
- * @param[in] n_batches Number of batches in the output tensor.
- * @param[in] n_rows Number of rows in each feature map of the input tensor.
- * @param[in] n_cols Number of columns in each feature map of the input tensor.
- * @param[in] n_output_channels Number of feature maps in the output tensor.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
*/
static unsigned int get_output_storage_size(
- const int n_batches,
- const int n_rows,
- const int n_cols,
- const int n_output_channels,
- const bool same_padding
+ const int n_batches, /** Number of batches in the output tensor. */
+ const int n_rows, /** Number of rows in each feature map of the input tensor. */
+ const int n_cols, /** Number of columns in each feature map of the input tensor. */
+ const int n_output_channels, /** Number of feature maps in the output tensor. */
+ const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
);
- /** Get the shape (rows, cols) of a feature map of the output tensor.
- *
- * @param[in] n_input_rows Number of rows in the input feature map.
- * @param[in] n_input_cols Number of columns in the input feature map.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- */
+ /** Get the shape (rows, cols) of a feature map of the output tensor. */
static std::pair<int, int> get_output_feature_map_shape(
- const int n_input_rows,
- const int n_input_cols,
- const bool same_padding
+ const int n_input_rows, /** Number of rows in the input feature map. */
+ const int n_input_cols, /** Number of columns in the input feature map. */
+ const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
);
/** Create a new Winograd convolution layer.
- * @param[in] n_batches Number of batches in the input and output tensors.
- * @param[in] n_input_channels Number of feature maps in a batch of the input tensor.
- * @param[in] n_input_rows Number of rows in a feature map of the input tensor.
- * @param[in] n_input_cols Number of columns in a feature map of the input tensor.
- * @param[in] n_output_channels Number of feature maps in the output tensor.
- * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
- * @param[in] weights Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps.
- * @param[out] weights_storage Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size
- * @param[in] input Pointer to NHWC ordered input tensor, in the spatial domain.
- * @param[out] winograd_input Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`.
- * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
- * @param[out] winograd_output Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`.
*/
WinogradConvolutionLayer(
- const int n_batches,
- const int n_input_channels,
- const int n_input_rows,
- const int n_input_cols,
- const int n_output_channels,
- const bool same_padding,
- const TIn* const weights,
- TIn* const weights_storage,
- const TIn* const input,
- TIn* const winograd_input,
- TOut* const output,
- TOut* const winograd_output
+ const int n_batches, /** Number of batches in the input and output tensors. */
+ const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */
+ const int n_input_rows, /** Number of rows in a feature map of the input tensor. */
+ const int n_input_cols, /** Number of columns in a feature map of the input tensor. */
+ const int n_output_channels, /** Number of feature maps in the output tensor. */
+ const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */
+ const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */
+ TIn* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */
+ const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */
+ TIn* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */
+ const TOut* const biases, /** Pointer to biases vector. */
+ TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */
+ TOut* const winograd_output /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */
);
};
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
index 60cdc97469..1682495f0d 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
@@ -52,7 +52,7 @@ public:
* Data types supported: F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
* Currently only 3x3 kernels are supported.
- * @param[in] biases Not supported, biases will be ignored.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
@@ -68,22 +68,25 @@ public:
NEWinogradLayer &operator=(const NEWinogradLayer &) = delete;
private:
- MemoryGroup _memory_group;
- NEWinogradLayerKernel _winograd_kernel;
- CPPPermute _permute_input;
- CPPPermute _permute_weights;
- CPPPermute _permute_output;
- Tensor _input_workspace;
- Tensor _output_workspace;
- Tensor _kernel_storage;
- Tensor _input_nhwc;
- Tensor _output_nhwc;
- Tensor _weights_hwio;
- const ITensor *_input;
- const ITensor *_weights;
- ITensor *_output;
- bool _reshaped_kernel;
- std::unique_ptr<Winograd3x3F32> _conv;
+ MemoryGroup _memory_group;
+ NEWinogradLayerKernel _winograd_kernel;
+ NEWinogradLayerTransformInputKernel _transform_input_kernel;
+ NEWinogradLayerTransformOutputKernel _transform_output_kernel;
+ NEWinogradLayerTransformWeightsKernel _transform_weights_kernel;
+ CPPPermute _permute_input;
+ CPPPermute _permute_weights;
+ CPPPermute _permute_output;
+ Tensor _input_workspace;
+ Tensor _output_workspace;
+ Tensor _kernel_storage;
+ Tensor _input_nhwc;
+ Tensor _output_nhwc;
+ Tensor _weights_hwio;
+ const ITensor *_input;
+ const ITensor *_weights;
+ ITensor *_output;
+ bool _reshaped_kernel;
+ std::unique_ptr<Winograd3x3F32> _conv;
};
}
#endif /* __ARM_COMPUTE_NEWINOGRADLAYER_H__ */