From a7077e9b8cc2d93a84249ee665776d09963e08a0 Mon Sep 17 00:00:00 2001
From: Milos Puzovic <Milos.Puzovic@arm.com>
Date: Fri, 28 Oct 2022 16:49:15 +0100
Subject: Updateable weights in depthwise convolution

Check whether weights are defined as constant, if they are not constant then do not pack them if they are already packed so that they can be updated.

Signed-off-by: Milos Puzovic <Milos.Puzovic@arm.com>
Change-Id: I73447e31e3660b05f8f40e04ea4ea2003eb9b802
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8539
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 src/cpu/operators/CpuDepthwiseConv2d.cpp           | 22 +++++++++++++-
 src/cpu/operators/CpuDepthwiseConv2d.h             |  3 +-
 .../CpuDepthwiseConv2dAssemblyDispatch.cpp         |  9 ++++--
 .../validation/NEON/DepthwiseConvolutionLayer.cpp  | 34 +++++++++++++++++++++-
 .../fixtures/DepthwiseConvolutionLayerFixture.h    | 31 ++++++++++++++++++--
 5 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp
index c93ffb113d..f69ecdc5bf 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,6 +88,7 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI
     _is_nchw      = src->data_layout() == DataLayout::NCHW;
     _permute      = _is_nchw;
     _is_prepared  = false;
+    _are_weights_const = weights->are_values_constant();
 
     // Configure pipeline
     _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
@@ -218,6 +219,25 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &t
 
 void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
 {
+    // if weights are not constant then we need to repack so that weights
+    // can be updated in-place
+    if(!_are_weights_const)
+    {
+        auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+        auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+        auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
+
+        ITensorPack pack_opt;
+        pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
+        pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
+        pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
+
+        // Prepare optimized function
+        _dwc_optimized_func->prepare(pack_opt);
+
+        return;
+    }
+
     if(!_is_prepared)
     {
         auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.h b/src/cpu/operators/CpuDepthwiseConv2d.h
index 15e52ef515..3d8719ee44 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.h
+++ b/src/cpu/operators/CpuDepthwiseConv2d.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -143,6 +143,7 @@ private:
         bool                                                _permute{ false };
         bool                                                _is_activationlayer_enabled{ false };
         bool                                                _is_prepared{ false };
+        bool                                                _are_weights_const{ true };
     };
 
     /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index e75b082ca5..a5b9eca56e 100644
--- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,7 @@ struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl
 {
     std::unique_ptr<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel> asm_kernel{ nullptr };
     bool                                                              is_prepared{ false };
+    bool                                                              are_weights_const{ true };
     experimental::MemoryRequirements                                  mem_req{};
 };
 
@@ -62,6 +63,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo     *src,
     const CPUInfo     &ci          = NEScheduler::get().cpu_info();
     const unsigned int num_threads = NEScheduler::get().num_threads();
     _pImpl->is_prepared            = false;
+    _pImpl->are_weights_const      = weights->are_values_constant();
 
     // If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()
     if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info))
@@ -107,10 +109,11 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
 
 void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors)
 {
-    if(!_pImpl->is_prepared)
+    const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+
+    if((!_pImpl->are_weights_const && weights != nullptr) || !_pImpl->is_prepared)
     {
         // Pack weights and bias
-        const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
         const ITensor *bias    = tensors.get_const_tensor(TensorType::ACL_SRC_2);
         ITensor       *storage = tensors.get_tensor(TensorType::ACL_INT_1);
 
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 7260eec42d..ab49ee1962 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -244,6 +244,8 @@ template <typename T>
 using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
 template <typename T>
 using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, false, false, true>;
 
 TEST_SUITE(Float)
 TEST_SUITE(F32)
@@ -354,6 +356,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture<float
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                           framework::dataset::make("DepthMultiplier", 1)),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
                            combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
                                                            framework::dataset::make("DepthMultiplier", 1)),
@@ -373,6 +385,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture<float
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+                                                           framework::dataset::make("DepthMultiplier", 1)),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                            combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
                                                            framework::dataset::make("DepthMultiplier", 1)),
@@ -383,6 +405,16 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                           framework::dataset::make("DepthMultiplier", 1)),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 TEST_SUITE_END() // Optimized
 TEST_SUITE_END() // F32
 
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 58e5c528e7..a08080a665 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -59,7 +59,7 @@ public:
     void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation,
                unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
                QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info,
-               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false)
+               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false)
     {
         ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
         _mixed_layout              = mixed_layout;
@@ -75,6 +75,7 @@ public:
         _depth_multiplier          = depth_multiplier;
         _dilation                  = dilation;
         _in_place                  = in_place;
+        _run_twice                 = run_twice;
 
         _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
 
@@ -105,6 +106,9 @@ public:
         // Create tensors
         _src                      = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
         _weights                  = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+        if(_run_twice) {
+            _weights.info()->set_are_values_constant(false);
+        }
         _biases                   = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
         TensorType *target_to_use = nullptr;
         if(!_in_place)
@@ -151,6 +155,16 @@ public:
         fill(AccessorType(_weights), 1);
         fill(AccessorType(_biases), 2);
 
+        // Run with variable input
+        if(_run_twice) {
+            _dwc.run();
+
+            // Fill tensors with a new seed
+            fill(AccessorType(_src), 3);
+            fill(AccessorType(_weights), 4);
+            fill(AccessorType(_biases), 5);
+        }
+
         if(_mixed_layout)
         {
             mix_layout(_dwc, _src, _target);
@@ -171,6 +185,16 @@ public:
         fill(src, 0);
         fill(weights, 1);
         fill(biases, 2);
+        if(_run_twice) {
+            SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
+            if(_act_info.enabled()) {
+                reference::activation_layer<T>(depth_out, _act_info);
+            }
+
+            fill(src, 3);
+            fill(weights, 4);
+            fill(biases, 5);
+        }
 
         SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info);
         _reference                = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out;
@@ -258,9 +282,10 @@ protected:
     Size2D              _dilation{};
     bool                _mixed_layout{ false };
     bool                _in_place{ false };
+    bool                _run_twice{ false };
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false, bool run_twice = false>
 class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -270,7 +295,7 @@ public:
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
                                                                                                                data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(),
-                                                                                                               data_layout, act_info, mixed_layout, in_place);
+                                                                                                               data_layout, act_info, mixed_layout, in_place, run_twice);
     }
 };
 
-- 
cgit v1.2.1