From 8155c0253c00aa9e26651361460c66feb39829a6 Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Fri, 16 Apr 2021 15:08:59 +0100
Subject: Rework OpenCL Depthwise Convolution

- Remove dedicated kernels for NCHW. Now we only use NHWC with permute
- Remove specialized kernels for 3x3 NHWC
- Simplify CLDepthwiseConvolutionLayer.cpp to call just the native
  implementation for both floating-point and quantized data types
- Develop two parametric opencl kernels for depthwise convolution layer NHWC
  (floating-point and quantized)
- Add support to export the weights to cl_image
- Extend test for depthwise convolution on opencl

Resolves COMPMID-4417

Change-Id: Ibe533f79c2860f9cac8e921895d5a8f947753a5c
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5893
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/KernelDescriptors.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'arm_compute/core/KernelDescriptors.h')

diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 6c1fc74b1e..a6e5c3372e 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -96,16 +96,12 @@ struct GEMMKernelInfo
     GEMMLowpOutputStageInfo output_stage{};                   /**< GEMMLowp output stage information */
 };
 
-/** Descriptor used by the depthwise convolution kernels */
-struct DWCKernelInfo
+/** Compute descriptor used by the depthwise convolution native kernel */
+struct DWCComputeKernelInfo
 {
-    ActivationLayerInfo activation_info{}; /**< Activation function to perform after the depthwise convolution */
-};
-
-/** Descriptor used by the depthwise convolution kernels to retrieve the number of output elements processed by each thread */
-struct DWCWeightsKernelInfo
-{
-    unsigned int n0{ 0 }; /**< Number of columns processed by each thread */
+    unsigned int n0{ 0 };                             /**< Number of columns processed by each thread */
+    unsigned int m0{ 0 };                             /**< Number of rows processed by each thread */
+    bool         export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
 };
 
 /** Descriptor used by the softmax kernels */
-- 
cgit v1.2.1