aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUsama Arif <usama.arif@arm.com>2019-04-16 14:32:25 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-17 15:49:59 +0000
commit2899e00a6fa57242a9bcae1d08a9a7e1e80f14e7 (patch)
tree78f9ed1fa6fe5cde2acf9a47170e72b96418d730
parentdcd949d8c71d879ff656c13d068e8af3dd31eef1 (diff)
downloadComputeLibrary-2899e00a6fa57242a9bcae1d08a9a7e1e80f14e7.tar.gz
COMPMID-2049: Add support for deconvolution for qasymm8 on NEON
Change-Id: I02890c7542f6036edad9cbba9fdcf2312c70070a Signed-off-by: Usama Arif <usama.arif@arm.com> Reviewed-on: https://review.mlplatform.org/c/1000 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/CPP/kernels/CPPUpsampleKernel.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h16
-rw-r--r--src/core/CPP/kernels/CPPUpsampleKernel.cpp21
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp17
-rw-r--r--tests/validation/NEON/DeconvolutionLayer.cpp58
5 files changed, 90 insertions, 26 deletions
diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
index e814c76c7d..4e61356760 100644
--- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,7 +55,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to upsample. Data types supported: F32
+ * @param[in] input The input tensor to upsample. Data types supported: F32/QASYMM8
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] info Padding info.
* @param[in] inner_border_right The number of zeros added to right edge of the input.
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index dad5d81b14..25512fa147 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -90,9 +90,9 @@ public:
*
* @note This method will be deprecated in the next release.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
* @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
* @param[in] inner_border_right The number of zeros added to right edge of the input.
@@ -105,9 +105,9 @@ public:
*
* @note This method will be deprecated in the next release.
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
* @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
* @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
* @param[in] inner_border_right The number of zeros added to right edge of the input.
@@ -120,9 +120,9 @@ public:
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
* @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
*
@@ -130,9 +130,9 @@ public:
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
* @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
* @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
*
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index d77d9c118f..f04728d30d 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,14 +73,15 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
// Initialize _scaled_output buffer
- const int width_scaled = _output->info()->dimension(0);
- const int height_scaled = _output->info()->dimension(1);
- const int stride_x = _info.stride().first;
- const int stride_y = _info.stride().second;
- const int start_x = _info.pad().first;
- const int start_y = _inner_border.second + _info.pad().second;
- const int end_y = height_scaled - _info.pad().second;
- const int end_x = width_scaled - _inner_border.first - _info.pad().first;
+ const int width_scaled = _output->info()->dimension(0);
+ const int height_scaled = _output->info()->dimension(1);
+ const int stride_x = _info.stride().first;
+ const int stride_y = _info.stride().second;
+ const int start_x = _info.pad().first;
+ const int start_y = _inner_border.second + _info.pad().second;
+ const int end_y = height_scaled - _info.pad().second;
+ const int end_x = width_scaled - _inner_border.first - _info.pad().first;
+ const size_t element_size = _input->info()->element_size();
std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
@@ -95,7 +96,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
execute_window_loop(window, [&](const Coordinates & id)
{
- *(reinterpret_cast<float *>(out.ptr())) = *(reinterpret_cast<const float *>(in.ptr()));
+ memcpy(out.ptr(), in.ptr(), element_size);
},
in, out);
}
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index fdc959c4a9..aff335e5e3 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -51,8 +51,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
unsigned int inner_border_right, unsigned int inner_border_top)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
@@ -68,7 +68,11 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- if(bias != nullptr)
+ if(is_data_type_quantized_asymmetric(input->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ }
+ else
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
}
@@ -111,10 +115,11 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_inner_border = std::make_pair(inner_border_right, inner_border_top);
_is_prepared = false;
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
+ const DataLayout data_layout = input->info()->data_layout();
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
- _weights_flipped.allocator()->init(TensorInfo(weights->info()->tensor_shape(), 1, weights->info()->data_type()));
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
_flip_weights.configure(weights, &_weights_flipped);
auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 4a05535e09..fc37c02279 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -44,6 +44,8 @@ namespace validation
namespace
{
constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+constexpr float tolerance_num = 0.07f; /**< Tolerance number */
const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
* framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
@@ -213,6 +215,62 @@ TEST_SUITE_END() // W1x1
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture4x4 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 4, 4>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ data_layouts_dataset),
+ framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit, framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ data_layouts_dataset),
+ framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ data_layouts_dataset),
+ framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ data_layouts_dataset),
+ framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // DeconvolutionLayer
TEST_SUITE_END() // NEON
} // namespace validation