aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-07-16 18:53:52 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commite1553374d037dbf84999258d5bc88927891770cc (patch)
tree0a62bbe5b668397465242c79b0a7c9c584d8d75f /src
parent578225e8000c0fc2c4f81a44ae7f06fe44b0fc6a (diff)
downloadComputeLibrary-e1553374d037dbf84999258d5bc88927891770cc.tar.gz
COMPMID-1357: Stop passing around raw pointers in NEWinogradConvolution
First step to allow us to enable the memory manager in this function Change-Id: Ic42fdac4c74cd21973c71130b59883e4a87d3dca Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140167 Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp125
-rw-r--r--src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp84
2 files changed, 58 insertions, 151 deletions
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
index 50e69a8adf..b295a0c685 100644
--- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp
@@ -40,38 +40,6 @@ namespace arm_compute
namespace
{
-Status validate_arguments_winograd_gemm(const ITensorInfo *a, const ITensorInfo *b, const ITensor *c, const ITensorInfo *output, const float alpha, const float beta,
- const GEMMInfo &gemm_info = GEMMInfo())
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(a);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(b);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
-
- if(c != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, c->info());
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != c->info()->dimension(1), "The matrix C must have the same number of rows as the matrix A");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(b->dimension(0) != c->info()->dimension(0), "The matrix C must have the same number of columns as the matrix B");
- }
-
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(b->dimension(0) != output->dimension(0), "The output matrix must have the same number of columns as the matrix B");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != output->dimension(1), "The output matrix must have the same number of rows as the matrix A");
- ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() != a->num_dimensions());
- }
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(0) != b->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
- ARM_COMPUTE_UNUSED(alpha, beta);
- return Status{};
-}
-
Status validate_arguments_winograd_weight_trans(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
@@ -232,78 +200,6 @@ std::pair<Status, Window> validate_and_configure_window_winograd_output_trans(IT
return std::make_pair(err, win);
}
} // namespace
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::NEWinogradLayerBatchedGEMMKernel()
- : _gemms()
-{
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-void NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
- const unsigned int n_gemms,
- const int M, const int K, const int N,
- const int a_matrix_stride,
- const int a_row_stride,
- const int b_matrix_stride,
- const int b_row_stride,
- const int c_matrix_stride,
- const int c_row_stride,
- const TIn *const a_ptr,
- const TIn *const b_ptr,
- TOut *const c_ptr)
-{
- _gemms = support::cpp14::make_unique<MultiGEMM>(n_gemms, M, K, N, a_matrix_stride, a_row_stride, b_matrix_stride, b_row_stride, c_matrix_stride, c_row_stride, a_ptr, b_ptr, c_ptr);
- Window win;
- auto win_last = _gemms->get_window();
- win.set(Window::DimX, Window::Dimension(0, win_last, 1));
- INEKernel::configure(win);
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-void NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- const size_t first_gemm = window.x().start();
- const size_t last_gemm = window.x().end();
- _gemms->run(first_gemm, last_gemm);
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-unsigned int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_number_gemms() const
-{
- return WinogradBase::N_GEMMS;
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_output_tile_rows() const
-{
- return _output_tile_rows;
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_output_tile_cols() const
-{
- return _output_tile_cols;
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-int NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::get_number_blocks() const
-{
- return WinogradConv::N_BLOCK;
-}
-
-template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
-Status NEWinogradLayerBatchedGEMMKernel<TIn, TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensor *c,
- const ITensorInfo *output, const float alpha, const float beta, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_winograd_gemm(a, b, c, output, alpha, beta, gemm_info));
- return Status{};
-}
-
-template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>;
-template class NEWinogradLayerBatchedGEMMKernel<float, float, 4, 4, 3, 3>;
-template class NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>;
// Weights transform
@@ -332,7 +228,7 @@ int NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, Ker
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
void NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
const ITensor *weights_hwio,
- T *const output,
+ ITensor *output,
const int matrix_stride, /** Stride across matrices in the output. */
const int num_output_channels, /** Number of filters. */
const int num_input_channels) /** Number of channels in each filter. */
@@ -344,7 +240,7 @@ void NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, Ke
_num_input_channels = num_input_channels;
const int matrix_row_stride = roundup(num_output_channels, WinogradConv::N_BLOCK);
- WeightsTransform transform(nullptr, output, matrix_stride, matrix_row_stride, num_output_channels, num_input_channels);
+ WeightsTransform transform(nullptr, nullptr, matrix_stride, matrix_row_stride, num_output_channels, num_input_channels);
Window win;
auto win_last = transform.get_window();
win.set(Window::DimX, Window::Dimension(0, win_last, 1));
@@ -358,7 +254,7 @@ void NEWinogradLayerTransformWeightsKernel<T, OutputTileRows, OutputTileCols, Ke
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
const int matrix_row_stride = roundup(_num_output_channels, WinogradConv::N_BLOCK);
- WeightsTransform transform(reinterpret_cast<T *>(_weights_hwio->buffer()), _output, _matrix_stride, matrix_row_stride, _num_output_channels, _num_input_channels);
+ WeightsTransform transform(reinterpret_cast<T *>(_weights_hwio->buffer()), reinterpret_cast<T *>(_output->buffer()), _matrix_stride, matrix_row_stride, _num_output_channels, _num_input_channels);
const size_t fst = window.x().start();
const size_t lst = window.x().end();
transform.run(fst, lst);
@@ -423,7 +319,7 @@ void NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Kern
const int num_cols, /* Number of columns in input tensor. */
const int num_channels, /* Number of channels in input tensor. */
const PaddingType padding, /* Padding type. */
- T *const output, /* Base of output matrices. */
+ ITensor *output, /* Base of output matrices. */
const int matrix_stride) /* Stride between output matrices. */
{
_input_nhwc = input_nhwc;
@@ -434,7 +330,7 @@ void NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Kern
_padding = padding;
_output = output;
_matrix_stride = matrix_stride;
- InputTransform transform(nullptr, num_batches, num_rows, num_cols, num_channels, padding, output, matrix_stride, num_channels);
+ InputTransform transform(nullptr, num_batches, num_rows, num_cols, num_channels, padding, nullptr, matrix_stride, num_channels);
Window win;
auto win_last = transform.get_window();
win.set(Window::DimX, Window::Dimension(0, win_last, 1));
@@ -447,7 +343,8 @@ void NEWinogradLayerTransformInputKernel<T, OutputTileRows, OutputTileCols, Kern
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- InputTransform input_transform(reinterpret_cast<const T *>(_input_nhwc->buffer()), _num_batches, _num_rows, _num_cols, _num_channels, _padding, _output, _matrix_stride, _num_channels);
+ InputTransform input_transform(reinterpret_cast<const T *>(_input_nhwc->buffer()), _num_batches, _num_rows, _num_cols, _num_channels, _padding, reinterpret_cast<T *>(_output->buffer()),
+ _matrix_stride, _num_channels);
// The code below cannot be moved to configure because biases hasn't been allocated at that point
const size_t fst = window.x().start();
@@ -511,9 +408,9 @@ Tensor4DShape NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTile
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, KernelRows, KernelCols>::configure(
const ITensor *biases,
- const T *const output_workingspace,
+ const ITensor *output_workingspace,
const int matrix_stride,
- ITensor *const output_nhwc,
+ ITensor *output_nhwc,
const int num_batches,
const int num_rows,
const int num_cols,
@@ -529,7 +426,7 @@ void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, Ker
_num_cols = num_cols;
_num_channels = num_channels;
// We don't have the biases buffer at this stage as it hasn't been allocated, we pass in nullptr OutputTransform is only used here to compute the window
- OutputTransform output_transform(_output_workspace, _matrix_stride, _matrix_row_stride, nullptr, nullptr, _num_batches, _num_rows, _num_cols, _num_channels);
+ OutputTransform output_transform(nullptr, _matrix_stride, _matrix_row_stride, nullptr, nullptr, _num_batches, _num_rows, _num_cols, _num_channels);
Window win;
auto win_last = output_transform.get_window();
@@ -548,7 +445,7 @@ void NEWinogradLayerTransformOutputKernel<T, OutputTileRows, OutputTileCols, Ker
ARM_COMPUTE_ERROR_ON_NULLPTR(_output_workspace);
ARM_COMPUTE_ERROR_ON_NULLPTR(_output_nhwc);
- OutputTransform output_transform(_output_workspace, _matrix_stride, _matrix_row_stride,
+ OutputTransform output_transform(reinterpret_cast<T *>(_output_workspace->buffer()), _matrix_stride, _matrix_row_stride,
(_biases ? reinterpret_cast<T *>(_biases->buffer()) : nullptr), reinterpret_cast<T *>(_output_nhwc->buffer()),
_num_batches, _num_rows, _num_cols, _num_channels, 0, _output_nhwc->info()->strides_in_bytes()[2] / sizeof(T), _output_nhwc->info()->strides_in_bytes()[1] / sizeof(T));
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index 29da0803a3..a71eade9a1 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -155,29 +155,32 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
{
if(input->info()->dimension(width_idx) > 4 && input->info()->dimension(height_idx) > 4)
{
- transform_input_kernel = support::cpp14::make_unique<NEWinogradLayerTransformInputKernel<float, 4, 4, 3, 3>>();
- transform_weights_kernel = support::cpp14::make_unique<NEWinogradLayerTransformWeightsKernel<float, 4, 4, 3, 3>>();
- transform_output_kernel = support::cpp14::make_unique<NEWinogradLayerTransformOutputKernel<float, 4, 4, 3, 3>>();
- n_gemms = NEWinogradLayerBatchedGEMMKernel<float, float, 4, 4, 3, 3>::WinogradBase::N_GEMMS;
- N_BLOCK = NEWinogradLayerBatchedGEMMKernel<float, float, 4, 4, 3, 3>::WinogradConv::N_BLOCK;
+ using config = NEWinogradLayerConfiguration<float, float, 4, 4, 3, 3>;
+ transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
+ transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
+ transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
+ n_gemms = config::WinogradBase::N_GEMMS;
+ N_BLOCK = config::WinogradConv::N_BLOCK;
}
else
{
- transform_input_kernel = support::cpp14::make_unique<NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>>();
- transform_weights_kernel = support::cpp14::make_unique<NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>>();
- transform_output_kernel = support::cpp14::make_unique<NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>>();
- n_gemms = NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>::WinogradBase::N_GEMMS;
- N_BLOCK = NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 3, 3>::WinogradConv::N_BLOCK;
+ using config = NEWinogradLayerConfiguration<float, float, 2, 2, 3, 3>;
+ transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
+ transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
+ transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
+ n_gemms = config::WinogradBase::N_GEMMS;
+ N_BLOCK = config::WinogradConv::N_BLOCK;
}
break;
}
case 5:
{
- transform_input_kernel = support::cpp14::make_unique<NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>>();
- transform_weights_kernel = support::cpp14::make_unique<NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>>();
- transform_output_kernel = support::cpp14::make_unique<NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>>();
- n_gemms = NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>::WinogradBase::N_GEMMS;
- N_BLOCK = NEWinogradLayerBatchedGEMMKernel<float, float, 2, 2, 5, 5>::WinogradConv::N_BLOCK;
+ using config = NEWinogradLayerConfiguration<float, float, 2, 2, 5, 5>;
+ transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
+ transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
+ transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
+ n_gemms = config::WinogradBase::N_GEMMS;
+ N_BLOCK = config::WinogradConv::N_BLOCK;
break;
}
default:
@@ -195,21 +198,28 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
const int out_channels = output->info()->dimension(channel_idx);
const Tensor4DShape in_shape(internal_get_input_shape(input));
+ const DataType data_type = input->info()->data_type();
const size_t data_type_size = input->info()->element_size();
// Get the memory required to instantiate a new Winograd operator.
constexpr size_t storage_alignment = 64;
// Kernel Storage
const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels,
- in_channels) * data_type_size + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
+ in_channels)
+ * data_type_size
+ + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
// Input storage
const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols,
- use_same_padding) * data_type_size + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
+ use_same_padding)
+ * data_type_size
+ + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
// Output storage
const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels,
- use_same_padding) * data_type_size + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
+ use_same_padding)
+ * data_type_size
+ + storage_alignment - 1; /* FIXME: remove alignment after COMPMID-1088 */
;
const KernelShape kernel_shape({ out_channels, static_cast<int>(kernel_size.height), static_cast<int>(kernel_size.width), in_channels });
const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(kernel_shape);
@@ -229,28 +239,28 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
const int output_matrix_row_stride = kernel_matrix_row_stride;
TensorShape a_shape(k, m, 1, n_gemms);
- Strides a_strides(element_size_from_data_type(DataType::F32));
+ Strides a_strides(data_type_size);
a_strides.set(1, a_strides[0] * k);
+ //a_strides.set(2, data_type_size * input_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
a_strides.set(2, 0);
- //a_strides.set(2, element_size_from_data_type(DataType::F32) * input_matrix_stride / n_gemms);
- a_strides.set(3, element_size_from_data_type(DataType::F32) * input_matrix_stride);
+ a_strides.set(3, data_type_size * input_matrix_stride);
TensorShape b_shape(n, k, n_gemms);
- Strides b_strides(element_size_from_data_type(DataType::F32));
- b_strides.set(1, element_size_from_data_type(DataType::F32) * kernel_matrix_row_stride);
- b_strides.set(2, element_size_from_data_type(DataType::F32) * kernel_matrix_stride);
+ Strides b_strides(data_type_size);
+ b_strides.set(1, data_type_size * kernel_matrix_row_stride);
+ b_strides.set(2, data_type_size * kernel_matrix_stride);
TensorShape d_shape(n, m, 1, n_gemms);
- Strides d_strides(element_size_from_data_type(DataType::F32));
- d_strides.set(1, element_size_from_data_type(DataType::F32) * output_matrix_row_stride);
+ Strides d_strides(data_type_size);
+ d_strides.set(1, data_type_size * output_matrix_row_stride);
+ //d_strides.set(2, data_type_size * output_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
d_strides.set(2, 0);
- //d_strides.set(2, element_size_from_data_type(DataType::F32) * output_matrix_stride / n_gemms);
- d_strides.set(3, element_size_from_data_type(DataType::F32) * output_matrix_stride);
+ d_strides.set(3, data_type_size * output_matrix_stride);
TensorInfo a_info, b_info, d_info;
- a_info.init(a_shape, 1, DataType::F32, a_strides, 0, input_storage_size);
- b_info.init(b_shape, 1, DataType::F32, b_strides, 0, kernel_storage_size);
- d_info.init(d_shape, 1, DataType::F32, d_strides, 0, output_storage_size);
+ a_info.init(a_shape, 1, data_type, a_strides, 0, input_storage_size);
+ b_info.init(b_shape, 1, data_type, b_strides, 0, kernel_storage_size);
+ d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
_input_workspace.allocator()->init(a_info, storage_alignment);
_input_workspace.allocator()->allocate();
@@ -276,12 +286,12 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
_permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
_input_nhwc.allocator()->allocate();
transform_input_kernel->configure(&_input_nhwc, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
- reinterpret_cast<float *>(_input_workspace.buffer()), input_matrix_stride);
+ &_input_workspace, input_matrix_stride);
}
else
{
transform_input_kernel->configure(_input, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
- reinterpret_cast<float *>(_input_workspace.buffer()), input_matrix_stride);
+ &_input_workspace, input_matrix_stride);
}
// Configure WeightsTransform
@@ -290,14 +300,14 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
_permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 2U, 0U, 1U));
- transform_weights_kernel->configure(&_weights_hwio, reinterpret_cast<float *>(_kernel_storage.buffer()), kernel_matrix_stride, out_channels, in_channels);
+ transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
}
else
{
// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
_permute_weights.configure(weights, &_weights_hwio, PermutationVector(3U, 0U, 1U, 2U));
- transform_weights_kernel->configure(&_weights_hwio, reinterpret_cast<float *>(_kernel_storage.buffer()), kernel_matrix_stride, out_channels, in_channels);
+ transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
}
_weights_hwio.allocator()->allocate();
@@ -306,13 +316,13 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
if(data_layout == DataLayout::NCHW)
{
- transform_output_kernel->configure(biases, reinterpret_cast<float *>(_output_workspace.buffer()),
+ transform_output_kernel->configure(biases, &_output_workspace,
output_matrix_stride, &_output_nhwc,
in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
}
else
{
- transform_output_kernel->configure(biases, reinterpret_cast<float *>(_output_workspace.buffer()),
+ transform_output_kernel->configure(biases, &_output_workspace,
output_matrix_stride, _output,
in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
}