5 files changed, 90 insertions, 9 deletions
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp
index c93ffb113d..f69ecdc5bf 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,6 +88,7 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI
     _is_nchw      = src->data_layout() == DataLayout::NCHW;
     _permute      = _is_nchw;
     _is_prepared  = false;
+    _are_weights_const = weights->are_values_constant();
 
     // Configure pipeline
     _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
@@ -218,6 +219,25 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &t
 
 void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
 {
+    // if weights are not constant then we need to repack so that weights
+    // can be updated in-place
+    if(!_are_weights_const)
+    {
+        auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+        auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+        auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
+
+        ITensorPack pack_opt;
+        pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
+        pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
+        pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
+
+        // Prepare optimized function
+        _dwc_optimized_func->prepare(pack_opt);
+
+        return;
+    }
+
     if(!_is_prepared)
     {
         auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.h b/src/cpu/operators/CpuDepthwiseConv2d.h
index 15e52ef515..3d8719ee44 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.h
+++ b/src/cpu/operators/CpuDepthwiseConv2d.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -143,6 +143,7 @@ private:
         bool                                                _permute{ false };
         bool                                                _is_activationlayer_enabled{ false };
         bool                                                _is_prepared{ false };
+        bool                                                _are_weights_const{ true };
     };
 
     /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index e75b082ca5..a5b9eca56e 100644
--- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,7 @@ struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl
 {
     std::unique_ptr<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel> asm_kernel{ nullptr };
     bool                                                              is_prepared{ false };
+    bool                                                              are_weights_const{ true };
     experimental::MemoryRequirements                                  mem_req{};
 };
 
@@ -62,6 +63,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo     *src,
     const CPUInfo     &ci          = NEScheduler::get().cpu_info();
     const unsigned int num_threads = NEScheduler::get().num_threads();
     _pImpl->is_prepared            = false;
+    _pImpl->are_weights_const      = weights->are_values_constant();
 
     // If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()
     if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info))
@@ -107,10 +109,11 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
 
 void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors)
 {
-    if(!_pImpl->is_prepared)
+    const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+
+    if((!_pImpl->are_weights_const && weights != nullptr) || !_pImpl->is_prepared)
     {
         // Pack weights and bias
-        const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
         const ITensor *bias    = tensors.get_const_tensor(TensorType::ACL_SRC_2);
         ITensor       *storage = tensors.get_tensor(TensorType::ACL_INT_1);
 
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 7260eec42d..ab49ee1962 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -244,6 +244,8 @@ template <typename T>
 using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
 template <typename T>
 using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, false, false, true>;
 
 TEST_SUITE(Float)
 TEST_SUITE(F32)
@@ -354,6 +356,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture<float
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                           framework::dataset::make("DepthMultiplier", 1)),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
                            combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
                                                            framework::dataset::make("DepthMultiplier", 1)),
@@ -373,6 +385,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture<float
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                                           framework::dataset::make("DepthMultiplier", 1)),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                            combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
                                                            framework::dataset::make("DepthMultiplier", 1)),
@@ -383,6 +405,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                           framework::dataset::make("DepthMultiplier", 1)),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 TEST_SUITE_END() // Optimized
 TEST_SUITE_END() // F32
 
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 58e5c528e7..a08080a665 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -59,7 +59,7 @@ public:
     void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation,
                unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
                QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info,
-               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false)
+               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false)
     {
         ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
         _mixed_layout              = mixed_layout;
@@ -75,6 +75,7 @@ public:
         _depth_multiplier          = depth_multiplier;
         _dilation                  = dilation;
         _in_place                  = in_place;
+        _run_twice                 = run_twice;
 
         _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
 
@@ -105,6 +106,9 @@ public:
         // Create tensors
         _src                      = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
         _weights                  = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+        if(_run_twice) {
+            _weights.info()->set_are_values_constant(false);
+        }
         _biases                   = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
         TensorType *target_to_use = nullptr;
         if(!_in_place)
@@ -151,6 +155,16 @@ public:
         fill(AccessorType(_weights), 1);
         fill(AccessorType(_biases), 2);
 
+        // Run with variable input
+        if(_run_twice) {
+            _dwc.run();
+
+            // Fill tensors with a new seed
+            fill(AccessorType(_src), 3);
+            fill(AccessorType(_weights), 4);
+            fill(AccessorType(_biases), 5);
+        }
+
         if(_mixed_layout)
         {
             mix_layout(_dwc, _src, _target);
@@ -171,6 +185,16 @@ public:
         fill(src, 0);
         fill(weights, 1);
         fill(biases, 2);
+        if(_run_twice) {
+            SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
+            if(_act_info.enabled()) {
+                reference::activation_layer<T>(depth_out, _act_info);
+            }
+
+            fill(src, 3);
+            fill(weights, 4);
+            fill(biases, 5);
+        }
 
         SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
         _reference                = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out;
@@ -258,9 +282,10 @@ protected:
     Size2D              _dilation{};
     bool                _mixed_layout{ false };
     bool                _in_place{ false };
+    bool                _run_twice{ false };
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false, bool run_twice = false>
 class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -270,7 +295,7 @@ public:
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
                                                                                                                data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(),
-                                                                                                               data_layout, act_info, mixed_layout, in_place);
+                                                                                                               data_layout, act_info, mixed_layout, in_place, run_twice);
     }
 };