From b49f27cbcaf6ecdc405efa29b5f43535f53cb50c Mon Sep 17 00:00:00 2001
From: Ramy Elgammal <ramy.elgammal@arm.com>
Date: Mon, 8 May 2023 03:33:43 +0100
Subject: =?UTF-8?q?Re-enable=20dyanmic=20weights=20in=20Neon=E2=84=A2=20de?=
 =?UTF-8?q?pthwise=20convolution?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 - Call Neon™ depthwise convolution validation inside in its configure() method.

Resolves: COMPMID-6188
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: Ib2ae4d995ff2bbc92ce4496d4ab93cf09113e3e9
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9594
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/experimental/PostOpUtils.h                        |  4 +---
 src/cpu/kernels/pool2d/neon/fp32.cpp                       |  6 +-----
 src/cpu/operators/CpuDepthwiseConv2d.cpp                   | 12 +++++-------
 src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp |  8 +++++---
 tests/framework/Framework.cpp                              |  3 ++-
 5 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/src/core/experimental/PostOpUtils.h b/src/core/experimental/PostOpUtils.h
index 53795db13f..6217dcc3da 100644
--- a/src/core/experimental/PostOpUtils.h
+++ b/src/core/experimental/PostOpUtils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,7 +42,6 @@ template <typename FromTensorT, typename ToTensorT>
 PostOpList<ToTensorT> transform_post_op_list_arguments(const PostOpList<FromTensorT> &post_ops, std::function<ToTensorT(FromTensorT)> transform_arg)
 {
     PostOpList<ToTensorT> transformed_post_ops;
-    int                   op_idx = 0;
     for(const auto &post_op : post_ops.get_list())
     {
         switch(post_op->type())
@@ -70,7 +69,6 @@ PostOpList<ToTensorT> transform_post_op_list_arguments(const PostOpList<FromTens
                 ARM_COMPUTE_ERROR("Unsupported PostOpType");
             }
         }
-        ++op_idx;
     }
     return transformed_post_ops;
 }
diff --git a/src/cpu/kernels/pool2d/neon/fp32.cpp b/src/cpu/kernels/pool2d/neon/fp32.cpp
index 8e93df3347..a400f3a95d 100644
--- a/src/cpu/kernels/pool2d/neon/fp32.cpp
+++ b/src/cpu/kernels/pool2d/neon/fp32.cpp
@@ -234,11 +234,9 @@ void poolingMxN_fp32_neon_nhwc_kernel_indices(const ITensor *src, ITensor *dst0,
             float    res      = min_value;
             uint32_t idx      = 0U;
             const uint8_t *in_ptr_y = in_ptr_n + in_ptr_y_offset + in_ptr_x_offset;
-            uint32_t    curr_kernel_index = pool_size_x * pool_start_y;
             for(int y = pool_start_y; y < pool_end_y; ++y)
             {
                 const uint8_t *in_ptr_x = in_ptr_y + (x_off * sizeof(float));
-                curr_kernel_index += pool_start_x;
                 for(int x = pool_start_x; x < pool_end_x; ++x)
                 {
                     const float data = *(reinterpret_cast<const float *>(in_ptr_x));
@@ -248,9 +246,7 @@ void poolingMxN_fp32_neon_nhwc_kernel_indices(const ITensor *src, ITensor *dst0,
                         res = data;
                     }
                     in_ptr_x += y_stride;
-                    curr_kernel_index++;
                 }
-                curr_kernel_index += (pool_size_x - pool_end_x);
                 in_ptr_y += z_stride;
             }
 
@@ -434,4 +430,4 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
     }
 }
 } // namespace cpu
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/cpu/operators/CpuDepthwiseConv2d.cpp b/src/cpu/operators/CpuDepthwiseConv2d.cpp
index ea451a461a..884fe5c4ed 100644
--- a/src/cpu/operators/CpuDepthwiseConv2d.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2d.cpp
@@ -83,11 +83,11 @@ void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorI
     ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
                                                                              dst, info));
 
-    _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
-    _has_bias     = biases != nullptr;
-    _is_nchw      = src->data_layout() == DataLayout::NCHW;
-    _permute      = _is_nchw;
-    _is_prepared  = false;
+    _is_quantized      = is_data_type_quantized_asymmetric(src->data_type());
+    _has_bias          = biases != nullptr;
+    _is_nchw           = src->data_layout() == DataLayout::NCHW;
+    _permute           = _is_nchw;
+    _is_prepared       = false;
     _are_weights_const = weights->are_values_constant();
 
     // Configure pipeline
@@ -461,8 +461,6 @@ void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights,
 
 Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!weights->are_values_constant(), "Dynamic weights are not supported");
-
     DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
     switch(depth_conv_func)
     {
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index fb6a2b08da..d0881f85d3 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -221,8 +221,6 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(
                                                                                 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
-                                                                     output->info(), conv_info, depth_multiplier, act_info, dilation));
 
     const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
     _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
@@ -310,7 +308,11 @@ struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
 void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
                                             const ActivationLayerInfo &act_info, const Size2D &dilation)
 {
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, biases, output);
+
     ARM_COMPUTE_LOG_PARAMS(input, weights, output, conv_info, depth_multiplier, biases, act_info, dilation);
+    ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
+                                                                     output->info(), conv_info, depth_multiplier, act_info, dilation));
 
     const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
     _impl->op              = std::make_shared<cpu::CpuDepthwiseConv2d>();
diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp
index 03c1db82c8..a3dee07862 100644
--- a/tests/framework/Framework.cpp
+++ b/tests/framework/Framework.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -557,6 +557,7 @@ bool Framework::run()
 
     int id          = 0;
     int id_run_test = 0;
+    ARM_COMPUTE_UNUSED(id_run_test); // Not used if ARM_COMPUTE_CL is not defined
 
     for(auto &test_factory : _test_factories)
     {
-- 
cgit v1.2.1