diff options
-rw-r--r-- | src/cpu/operators/CpuDepthwiseConv2d.cpp | 22 | ||||
-rw-r--r-- | src/cpu/operators/CpuDepthwiseConv2d.h | 3 | ||||
-rw-r--r-- | src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp | 9 | ||||
-rw-r--r-- | tests/validation/NEON/DepthwiseConvolutionLayer.cpp | 34 | ||||
-rw-r--r-- | tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h | 31 |
5 files changed, 90 insertions, 9 deletions
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp index c93ffb113d..f69ecdc5bf 100644 --- a/src/cpu/operators/CpuDepthwiseConv2d.cpp +++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -88,6 +88,7 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI _is_nchw = src->data_layout() == DataLayout::NCHW; _permute = _is_nchw; _is_prepared = false; + _are_weights_const = weights->are_values_constant(); // Configure pipeline _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info); @@ -218,6 +219,25 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &t void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors) { + // if weights are not constant then we need to repack so that weights + // can be updated in-place + if(!_are_weights_const) + { + auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); + auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2); + auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4); + + ITensorPack pack_opt; + pack_opt.add_tensor(TensorType::ACL_SRC_1, weights); + pack_opt.add_tensor(TensorType::ACL_SRC_2, bias); + pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights); + + // Prepare optimized function + _dwc_optimized_func->prepare(pack_opt); + + return; + } + if(!_is_prepared) { auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); diff --git a/src/cpu/operators/CpuDepthwiseConv2d.h b/src/cpu/operators/CpuDepthwiseConv2d.h index 15e52ef515..3d8719ee44 100644 --- a/src/cpu/operators/CpuDepthwiseConv2d.h +++ b/src/cpu/operators/CpuDepthwiseConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -143,6 +143,7 @@ private: bool _permute{ false }; bool _is_activationlayer_enabled{ false }; bool _is_prepared{ false }; + bool _are_weights_const{ true }; }; /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp index e75b082ca5..a5b9eca56e 100644 --- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp +++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,7 @@ struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl { std::unique_ptr<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel> asm_kernel{ nullptr }; bool is_prepared{ false }; + bool are_weights_const{ true }; experimental::MemoryRequirements mem_req{}; }; @@ -62,6 +63,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src, const CPUInfo &ci = NEScheduler::get().cpu_info(); const unsigned int num_threads = NEScheduler::get().num_threads(); _pImpl->is_prepared = false; + _pImpl->are_weights_const = weights->are_values_constant(); // If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured() if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info)) @@ -107,10 +109,11 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors) void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors) { - if(!_pImpl->is_prepared) + const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); + + if((!_pImpl->are_weights_const && weights != nullptr) || !_pImpl->is_prepared) { // Pack weights and bias - const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); const ITensor *bias = tensors.get_const_tensor(TensorType::ACL_SRC_2); ITensor *storage = tensors.get_tensor(TensorType::ACL_INT_1); diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index 7260eec42d..ab49ee1962 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -244,6 +244,8 @@ template <typename T> using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>; template <typename T> using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>; +template <typename T> +using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, false, false, true>; TEST_SUITE(Float) TEST_SUITE(F32) @@ -354,6 +356,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture<float { validate(Accessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), @@ -373,6 +385,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture<float { validate(Accessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), @@ -383,6 +405,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float { validate(Accessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} TEST_SUITE_END() // Optimized TEST_SUITE_END() // F32 diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index 58e5c528e7..a08080a665 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -59,7 +59,7 @@ public: void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type, QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info, - DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false) + DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false) { ARM_COMPUTE_ERROR_ON(mixed_layout && in_place); _mixed_layout = mixed_layout; @@ -75,6 +75,7 @@ public: _depth_multiplier = depth_multiplier; _dilation = dilation; _in_place = in_place; + _run_twice = run_twice; _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type; @@ -105,6 +106,9 @@ public: // Create tensors _src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout); _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); + if(_run_twice) { + _weights.info()->set_are_values_constant(false); + } _biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); TensorType *target_to_use = nullptr; if(!_in_place) @@ -151,6 +155,16 @@ public: fill(AccessorType(_weights), 1); fill(AccessorType(_biases), 2); + // Run with variable input + if(_run_twice) { + _dwc.run(); + + // Fill tensors with a new seed + fill(AccessorType(_src), 3); + fill(AccessorType(_weights), 4); + fill(AccessorType(_biases), 5); + } + if(_mixed_layout) { mix_layout(_dwc, _src, _target); @@ -171,6 +185,16 @@ public: fill(src, 0); fill(weights, 1); fill(biases, 2); + if(_run_twice) { + SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info); + if(_act_info.enabled()) { + reference::activation_layer<T>(depth_out, _act_info); + } + + fill(src, 3); + fill(weights, 4); + fill(biases, 5); + } SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info); _reference = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out; @@ -258,9 +282,10 @@ protected: Size2D _dilation{}; bool _mixed_layout{ false }; bool _in_place{ false }; + bool _run_twice{ false }; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false, bool run_twice = false> class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: @@ -270,7 +295,7 @@ public: { DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), - data_layout, act_info, mixed_layout, in_place); + data_layout, act_info, mixed_layout, in_place, run_twice); } }; |