From 2899e00a6fa57242a9bcae1d08a9a7e1e80f14e7 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 16 Apr 2019 14:32:25 +0100 Subject: COMPMID-2049: Add support for deconvolution for qasymm8 on NEON Change-Id: I02890c7542f6036edad9cbba9fdcf2312c70070a Signed-off-by: Usama Arif Reviewed-on: https://review.mlplatform.org/c/1000 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/CPP/kernels/CPPUpsampleKernel.h | 4 +- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 16 +++--- src/core/CPP/kernels/CPPUpsampleKernel.cpp | 21 ++++---- .../NEON/functions/NEDeconvolutionLayer.cpp | 17 ++++--- tests/validation/NEON/DeconvolutionLayer.cpp | 58 ++++++++++++++++++++++ 5 files changed, 90 insertions(+), 26 deletions(-) diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h index e814c76c7d..4e61356760 100644 --- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h +++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,7 +55,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to upsample. Data types supported: F32 + * @param[in] input The input tensor to upsample. Data types supported: F32/QASYMM8 * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] info Padding info. * @param[in] inner_border_right The number of zeros added to right edge of the input. diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index dad5d81b14..25512fa147 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -90,9 +90,9 @@ public: * * @note This method will be deprecated in the next release. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @param[in] inner_border_right The number of zeros added to right edge of the input. @@ -105,9 +105,9 @@ public: * * @note This method will be deprecated in the next release. * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @param[in] inner_border_right The number of zeros added to right edge of the input. @@ -120,9 +120,9 @@ public: /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @@ -130,9 +130,9 @@ public: void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp index d77d9c118f..f04728d30d 100644 --- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp +++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -73,14 +73,15 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); // Initialize _scaled_output buffer - const int width_scaled = _output->info()->dimension(0); - const int height_scaled = _output->info()->dimension(1); - const int stride_x = _info.stride().first; - const int stride_y = _info.stride().second; - const int start_x = _info.pad().first; - const int start_y = _inner_border.second + _info.pad().second; - const int end_y = height_scaled - _info.pad().second; - const int end_x = width_scaled - _inner_border.first - _info.pad().first; + const int width_scaled = _output->info()->dimension(0); + const int height_scaled = _output->info()->dimension(1); + const int stride_x = _info.stride().first; + const int stride_y = _info.stride().second; + const int start_x = _info.pad().first; + const int start_y = _inner_border.second + _info.pad().second; + const int end_y = height_scaled - _info.pad().second; + const int end_x = width_scaled - _inner_border.first - _info.pad().first; + const size_t element_size = _input->info()->element_size(); std::fill_n(_output->buffer(), _output->info()->total_size(), 0); @@ -95,7 +96,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) execute_window_loop(window, [&](const Coordinates & id) { - *(reinterpret_cast(out.ptr())) = *(reinterpret_cast(in.ptr())); + memcpy(out.ptr(), in.ptr(), element_size); }, in, out); } diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index fdc959c4a9..aff335e5e3 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -51,8 +51,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf unsigned int inner_border_right, unsigned int inner_border_top) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32, DataType::QASYMM8); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1); ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); @@ -68,7 +68,11 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - if(bias != nullptr) + if(is_data_type_quantized_asymmetric(input->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + } + else { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); } @@ -111,10 +115,11 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _inner_border = std::make_pair(inner_border_right, inner_border_top); _is_prepared = false; - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; + const DataLayout data_layout = input->info()->data_layout(); + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; - _weights_flipped.allocator()->init(TensorInfo(weights->info()->tensor_shape(), 1, weights->info()->data_type())); + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped); auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1), diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index 4a05535e09..fc37c02279 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -44,6 +44,8 @@ namespace validation namespace { constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 }); @@ -213,6 +215,62 @@ TEST_SUITE_END() // W1x1 TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +template +using NEDeconvolutionLayerQuantizedFixture4x4 = DeconvolutionValidationQuantizedFixture; + +template +using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantizedFixture; + +template +using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) + +TEST_SUITE(W4x4) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W4x4 + +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W3x3 + +TEST_SUITE(W1x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W1x1 + +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // DeconvolutionLayer TEST_SUITE_END() // NEON } // namespace validation -- cgit v1.2.1