From dff601dad65d95ad85fee349a44b6417b7de7f2c Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Thu, 9 Aug 2018 13:28:41 +0100
Subject: COMPMID-1488 - Add support for NHWC when running
 CLGEMMConvolutionLayer with QASYMM8

Fixed also a bug in the graph API related to the bias shape in DepthWiseConvolution for NHWC

Change-Id: I275141a42e51f6747b77db1c31d1bc69e8685af5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143454
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 src/graph/GraphBuilder.cpp                         |  2 +-
 .../CL/functions/CLDepthwiseConvolutionLayer.cpp   |  2 +-
 .../CL/functions/CLGEMMConvolutionLayer.cpp        | 45 ++++++++++++++--------
 3 files changed, 32 insertions(+), 17 deletions(-)

(limited to 'src')
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 7b09ec98f5..fa78024e71 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -380,7 +380,7 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
     if(has_bias)
     {
         TensorDescriptor b_desc = input_tensor_desc;
-        b_desc.shape            = TensorShape(b_desc.shape.z());
+        b_desc.shape            = TensorShape(get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
         b_nid                   = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
     }
 
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 1d89b84097..76451af9b1 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -73,7 +73,7 @@ Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const
                                                 ActivationLayerInfo act_info, GPUTarget gpu_target)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW && input->data_layout() != DataLayout::NHWC);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
 
     if(input->data_layout() == DataLayout::NCHW)
     {
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 49549a0ad0..ca6157ef13 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -91,8 +91,8 @@ void CLConvolutionLayerReshapeWeights::run()
 
 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
-      _add_bias_kernel(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _append_bias(false), _skip_im2col(false),
-      _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
+      _add_bias_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _append_bias(false),
+      _skip_im2col(false), _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
 {
 }
 
@@ -143,14 +143,13 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
         weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
 
         // Perform validation step on GEMMLowp
-        CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), output, gemm_info);
+        return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), output, gemm_info);
     }
     else
     {
         // Perform validation step on Matrix multiply function
-        CLGEMM::validate(input, weights, nullptr, output, 1.0f, 0.0f, gemm_info);
+        return CLGEMM::validate(input, weights, nullptr, output, 1.0f, 0.0f, gemm_info);
     }
-    return Status{};
 }
 
 void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
@@ -283,9 +282,17 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
 
     if(!is_nhwc || _is_quantized)
     {
-        // Configure and tune Col2Im
-        _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h));
-        CLScheduler::get().tune_kernel_static(_col2im_kernel);
+        if(input->info()->data_layout() == DataLayout::NCHW)
+        {
+            // Configure and tune Col2Im
+            _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h));
+            CLScheduler::get().tune_kernel_static(_col2im_kernel);
+        }
+        else
+        {
+            // Configure reshape layer
+            _reshape_layer.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output);
+        }
     }
 
     if(!is_nhwc || _is_quantized)
@@ -316,8 +323,6 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && input->data_layout() == DataLayout::NHWC,
-                                    "NHWC is unsupported for QASYMM8!");
 
     const DataLayout data_layout = input->data_layout();
     const DataType   data_type   = input->data_type();
@@ -378,7 +383,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
 
     // Output tensor auto inizialitation if not yet initialized
     ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr));
-    weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, append_bias), 1, data_type);
+    weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col)), 1, data_type);
     weights_to_use        = &weights_reshaped_info;
 
     if(!skip_im2col)
@@ -431,9 +436,12 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     // Validate Col2Im
     if(!is_nhwc || is_quantized)
     {
-        ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(is_quantized ? gemm_output_staged_to_use : gemm_output_to_use,
-                                                             output,
-                                                             std::make_pair(conv_w, conv_h)));
+        if(input->data_layout() == DataLayout::NCHW)
+        {
+            ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(is_quantized ? gemm_output_staged_to_use : gemm_output_to_use,
+                                                                 output,
+                                                                 std::make_pair(conv_w, conv_h)));
+        }
     }
 
     //Validate Activation Layer
@@ -480,7 +488,14 @@ void CLGEMMConvolutionLayer::run()
     // Reshape output matrix
     if(_data_layout == DataLayout::NCHW || _is_quantized)
     {
-        CLScheduler::get().enqueue(_col2im_kernel, false);
+        if(_data_layout == DataLayout::NCHW)
+        {
+            CLScheduler::get().enqueue(_col2im_kernel, false);
+        }
+        else
+        {
+            _reshape_layer.run();
+        }
     }
 
     //Run Activation Layer if enabled
-- 
cgit v1.2.1