aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2d.cpp22
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2d.h3
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp9
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayer.cpp34
-rw-r--r--tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h31
5 files changed, 90 insertions, 9 deletions
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp
index c93ffb113d..f69ecdc5bf 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -88,6 +88,7 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI
_is_nchw = src->data_layout() == DataLayout::NCHW;
_permute = _is_nchw;
_is_prepared = false;
+ _are_weights_const = weights->are_values_constant();
// Configure pipeline
_is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
@@ -218,6 +219,25 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &t
void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
{
+ // if weights are not constant then we need to repack so that weights
+ // can be updated in-place
+ if(!_are_weights_const)
+ {
+ auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
+
+ ITensorPack pack_opt;
+ pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
+ pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
+ pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
+
+ // Prepare optimized function
+ _dwc_optimized_func->prepare(pack_opt);
+
+ return;
+ }
+
if(!_is_prepared)
{
auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.h b/src/cpu/operators/CpuDepthwiseConv2d.h
index 15e52ef515..3d8719ee44 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.h
+++ b/src/cpu/operators/CpuDepthwiseConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -143,6 +143,7 @@ private:
bool _permute{ false };
bool _is_activationlayer_enabled{ false };
bool _is_prepared{ false };
+ bool _are_weights_const{ true };
};
/** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index e75b082ca5..a5b9eca56e 100644
--- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl
{
std::unique_ptr<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel> asm_kernel{ nullptr };
bool is_prepared{ false };
+ bool are_weights_const{ true };
experimental::MemoryRequirements mem_req{};
};
@@ -62,6 +63,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src,
const CPUInfo &ci = NEScheduler::get().cpu_info();
const unsigned int num_threads = NEScheduler::get().num_threads();
_pImpl->is_prepared = false;
+ _pImpl->are_weights_const = weights->are_values_constant();
// If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()
if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info))
@@ -107,10 +109,11 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors)
{
- if(!_pImpl->is_prepared)
+ const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+
+ if((!_pImpl->are_weights_const && weights != nullptr) || !_pImpl->is_prepared)
{
// Pack weights and bias
- const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
const ITensor *bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
ITensor *storage = tensors.get_tensor(TensorType::ACL_INT_1);
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 7260eec42d..ab49ee1962 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -244,6 +244,8 @@ template <typename T>
using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
template <typename T>
using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, false, false, true>;
TEST_SUITE(Float)
TEST_SUITE(F32)
@@ -354,6 +356,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture<float
{
validate(Accessor(_target), _reference, tolerance_f32);
}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DepthMultiplier", 1)),
@@ -373,6 +385,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture<float
{
validate(Accessor(_target), _reference, tolerance_f32);
}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DepthMultiplier", 1)),
@@ -383,6 +405,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float
{
validate(Accessor(_target), _reference, tolerance_f32);
}
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
TEST_SUITE_END() // Optimized
TEST_SUITE_END() // F32
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 58e5c528e7..a08080a665 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -59,7 +59,7 @@ public:
void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation,
unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info,
- DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false)
+ DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false)
{
ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
_mixed_layout = mixed_layout;
@@ -75,6 +75,7 @@ public:
_depth_multiplier = depth_multiplier;
_dilation = dilation;
_in_place = in_place;
+ _run_twice = run_twice;
_bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
@@ -105,6 +106,9 @@ public:
// Create tensors
_src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
_weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+ if(_run_twice) {
+ _weights.info()->set_are_values_constant(false);
+ }
_biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
TensorType *target_to_use = nullptr;
if(!_in_place)
@@ -151,6 +155,16 @@ public:
fill(AccessorType(_weights), 1);
fill(AccessorType(_biases), 2);
+ // Run with variable input
+ if(_run_twice) {
+ _dwc.run();
+
+ // Fill tensors with a new seed
+ fill(AccessorType(_src), 3);
+ fill(AccessorType(_weights), 4);
+ fill(AccessorType(_biases), 5);
+ }
+
if(_mixed_layout)
{
mix_layout(_dwc, _src, _target);
@@ -171,6 +185,16 @@ public:
fill(src, 0);
fill(weights, 1);
fill(biases, 2);
+ if(_run_twice) {
+ SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
+ if(_act_info.enabled()) {
+ reference::activation_layer<T>(depth_out, _act_info);
+ }
+
+ fill(src, 3);
+ fill(weights, 4);
+ fill(biases, 5);
+ }
SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
_reference = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out;
@@ -258,9 +282,10 @@ protected:
Size2D _dilation{};
bool _mixed_layout{ false };
bool _in_place{ false };
+ bool _run_twice{ false };
};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false, bool run_twice = false>
class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
{
public:
@@ -270,7 +295,7 @@ public:
{
DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(),
- data_layout, act_info, mixed_layout, in_place);
+ data_layout, act_info, mixed_layout, in_place, run_twice);
}
};