From a7077e9b8cc2d93a84249ee665776d09963e08a0 Mon Sep 17 00:00:00 2001 From: Milos Puzovic Date: Fri, 28 Oct 2022 16:49:15 +0100 Subject: Updateable weights in depthwise convolution Check whether weights are defined as constant, if they are not constant then do not pack them if they are already packed so that they can be updated. Signed-off-by: Milos Puzovic Change-Id: I73447e31e3660b05f8f40e04ea4ea2003eb9b802 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8539 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Reviewed-by: Gian Marco Iodice Benchmark: Arm Jenkins --- src/cpu/operators/CpuDepthwiseConv2d.cpp | 22 +++++++++++++- src/cpu/operators/CpuDepthwiseConv2d.h | 3 +- .../CpuDepthwiseConv2dAssemblyDispatch.cpp | 9 ++++-- .../validation/NEON/DepthwiseConvolutionLayer.cpp | 34 +++++++++++++++++++++- .../fixtures/DepthwiseConvolutionLayerFixture.h | 31 ++++++++++++++++++-- 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp index c93ffb113d..f69ecdc5bf 100644 --- a/src/cpu/operators/CpuDepthwiseConv2d.cpp +++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -88,6 +88,7 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI _is_nchw = src->data_layout() == DataLayout::NCHW; _permute = _is_nchw; _is_prepared = false; + _are_weights_const = weights->are_values_constant(); // Configure pipeline _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info); @@ -218,6 +219,25 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &t void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors) { + // if weights are not constant then we need to repack so that weights + // can be updated in-place + if(!_are_weights_const) + { + auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); + auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2); + auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4); + + ITensorPack pack_opt; + pack_opt.add_tensor(TensorType::ACL_SRC_1, weights); + pack_opt.add_tensor(TensorType::ACL_SRC_2, bias); + pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights); + + // Prepare optimized function + _dwc_optimized_func->prepare(pack_opt); + + return; + } + if(!_is_prepared) { auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); diff --git a/src/cpu/operators/CpuDepthwiseConv2d.h b/src/cpu/operators/CpuDepthwiseConv2d.h index 15e52ef515..3d8719ee44 100644 --- a/src/cpu/operators/CpuDepthwiseConv2d.h +++ b/src/cpu/operators/CpuDepthwiseConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -143,6 +143,7 @@ private: bool _permute{ false }; bool _is_activationlayer_enabled{ false }; bool _is_prepared{ false }; + bool _are_weights_const{ true }; }; /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp index e75b082ca5..a5b9eca56e 100644 --- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp +++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,7 @@ struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl { std::unique_ptr asm_kernel{ nullptr }; bool is_prepared{ false }; + bool are_weights_const{ true }; experimental::MemoryRequirements mem_req{}; }; @@ -62,6 +63,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src, const CPUInfo &ci = NEScheduler::get().cpu_info(); const unsigned int num_threads = NEScheduler::get().num_threads(); _pImpl->is_prepared = false; + _pImpl->are_weights_const = weights->are_values_constant(); // If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured() if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info)) @@ -107,10 +109,11 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors) void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors) { - if(!_pImpl->is_prepared) + const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); + + if((!_pImpl->are_weights_const && weights != nullptr) || !_pImpl->is_prepared) { // Pack weights and bias - const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); const ITensor *bias = tensors.get_const_tensor(TensorType::ACL_SRC_2); ITensor *storage = tensors.get_tensor(TensorType::ACL_INT_1); diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index 7260eec42d..ab49ee1962 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -244,6 +244,8 @@ template using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; template using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture; +template +using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture; TEST_SUITE(Float) TEST_SUITE(F32) @@ -354,6 +356,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), @@ -373,6 +385,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), @@ -383,6 +405,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} TEST_SUITE_END() // Optimized TEST_SUITE_END() // F32 diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index 58e5c528e7..a08080a665 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -59,7 +59,7 @@ public: void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type, QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info, - DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false) + DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false) { ARM_COMPUTE_ERROR_ON(mixed_layout && in_place); _mixed_layout = mixed_layout; @@ -75,6 +75,7 @@ public: _depth_multiplier = depth_multiplier; _dilation = dilation; _in_place = in_place; + _run_twice = run_twice; _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type; @@ -105,6 +106,9 @@ public: // Create tensors _src = create_tensor(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout); _weights = create_tensor(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); + if(_run_twice) { + _weights.info()->set_are_values_constant(false); + } _biases = create_tensor(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); TensorType *target_to_use = nullptr; if(!_in_place) @@ -151,6 +155,16 @@ public: fill(AccessorType(_weights), 1); fill(AccessorType(_biases), 2); + // Run with variable input + if(_run_twice) { + _dwc.run(); + + // Fill tensors with a new seed + fill(AccessorType(_src), 3); + fill(AccessorType(_weights), 4); + fill(AccessorType(_biases), 5); + } + if(_mixed_layout) { mix_layout(_dwc, _src, _target); @@ -171,6 +185,16 @@ public: fill(src, 0); fill(weights, 1); fill(biases, 2); + if(_run_twice) { + SimpleTensor depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info); + if(_act_info.enabled()) { + reference::activation_layer(depth_out, _act_info); + } + + fill(src, 3); + fill(weights, 4); + fill(biases, 5); + } SimpleTensor depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info); _reference = (_act_info.enabled()) ? reference::activation_layer(depth_out, _act_info) : depth_out; @@ -258,9 +282,10 @@ protected: Size2D _dilation{}; bool _mixed_layout{ false }; bool _in_place{ false }; + bool _run_twice{ false }; }; -template +template class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture { public: @@ -270,7 +295,7 @@ public: { DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), - data_layout, act_info, mixed_layout, in_place); + data_layout, act_info, mixed_layout, in_place, run_twice); } }; -- cgit v1.2.1