From bfa3b52de2cfbd330efc19e2096134a20c645406 Mon Sep 17 00:00:00 2001
From: Gian Marco <gianmarco.iodice@arm.com>
Date: Tue, 12 Dec 2017 10:08:38 +0000
Subject: COMPMID-556 - Fix examples

- Fixed data type issue in cl_sgemm
- Added support for NEON and OpenCL targets in graph examples. Before we
  could run only OpenCL target
- Add auto_init() in NEDepthwiseVectorToTensorKernel

Change-Id: I4410ce6f4992b2375b980634fe55f1083cf3c471
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112850
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
---
 examples/graph_alexnet.cpp                         | 44 ++++++++++----------
 examples/graph_googlenet.cpp                       | 47 ++++++++++++----------
 examples/graph_lenet.cpp                           | 32 ++++++++-------
 examples/graph_mobilenet.cpp                       | 46 ++++++++++++---------
 examples/graph_squeezenet.cpp                      | 47 ++++++++++++----------
 examples/graph_vgg16.cpp                           | 43 +++++++++++---------
 examples/graph_vgg19.cpp                           | 43 +++++++++++---------
 .../kernels/NEDepthwiseVectorToTensorKernel.cpp    | 11 +++++
 src/runtime/CL/functions/CLConvolutionLayer.cpp    | 15 +++++--
 src/runtime/NEON/functions/NEConvolutionLayer.cpp  |  2 +-
 utils/GraphUtils.h                                 | 21 ++++++++++
 utils/Utils.h                                      | 12 +++---
 12 files changed, 215 insertions(+), 148 deletions(-)

diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 534ee45bcd..0d5531f282 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -37,7 +37,7 @@ using namespace arm_compute::graph_utils;
 /** Example demonstrating how to implement AlexNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 void main_graph_alexnet(int argc, const char **argv)
 {
@@ -49,43 +49,45 @@ void main_graph_alexnet(int argc, const char **argv)
     constexpr float mean_g = 116.67f; /* Mean value to subtract from green channel */
     constexpr float mean_b = 104.01f; /* Mean value to subtract from blue channel */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint            target_hint      = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+    ConvolutionMethodHint convolution_hint = target_hint == TargetHint::NEON ? ConvolutionMethodHint::GEMM : ConvolutionMethodHint::DIRECT;
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
     {
-        data_path = argv[1];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [image] [labels]\n\n";
-        std::cout << "No image provided: using random values\n\n";
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 3)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [labels]\n\n";
-        std::cout << "No text file with labels provided: skipping output accessor\n\n";
+        data_path = argv[2];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+        std::cout << "No image provided: using random values\n\n";
     }
-    else
+    else if(argc == 4)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        label     = argv[3];
+        data_path = argv[2];
+        image     = argv[3];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+        std::cout << "No text file with labels provided: skipping output accessor\n\n";
     }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
+    else
     {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        image     = argv[3];
+        label     = argv[4];
     }
 
     Graph graph;
 
-    graph << hint
+    graph << target_hint
           << Tensor(TensorInfo(TensorShape(227U, 227U, 3U, 1U), 1, DataType::F32),
                     get_input_accessor(image, mean_r, mean_g, mean_b))
           // Layer 1
@@ -98,7 +100,7 @@ void main_graph_alexnet(int argc, const char **argv)
           << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
           << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
           // Layer 2
-          << ConvolutionMethodHint::DIRECT
+          << convolution_hint
           << ConvolutionLayer(
               5U, 5U, 256U,
               get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"),
@@ -157,7 +159,7 @@ void main_graph_alexnet(int argc, const char **argv)
 /** Main program for AlexNet
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 int main(int argc, const char **argv)
 {
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index b7ff4e4bf0..d08382ab8e 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp
@@ -34,6 +34,8 @@
 using namespace arm_compute::graph;
 using namespace arm_compute::graph_utils;
 
+namespace
+{
 BranchLayer get_inception_node(const std::string &data_path, std::string &&param_path,
                                unsigned int a_filt,
                                std::tuple<unsigned int, unsigned int> b_filters,
@@ -88,11 +90,12 @@ BranchLayer get_inception_node(const std::string &data_path, std::string &&param
 
     return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
 }
+} // namespace
 
 /** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 void main_graph_googlenet(int argc, const char **argv)
 {
@@ -104,44 +107,45 @@ void main_graph_googlenet(int argc, const char **argv)
     constexpr float mean_g = 116.67f; /* Mean value to subtract from green channel */
     constexpr float mean_b = 104.01f; /* Mean value to subtract from blue channel */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint            target_hint      = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+    ConvolutionMethodHint convolution_hint = target_hint == TargetHint::NEON ? ConvolutionMethodHint::GEMM : ConvolutionMethodHint::DIRECT;
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
     {
-        //Do something with argv[1]
-        data_path = argv[1];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [image] [labels]\n\n";
-        std::cout << "No image provided: using random values\n";
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 3)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [labels]\n\n";
-        std::cout << "No text file with labels provided: skipping output accessor\n";
+        data_path = argv[2];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+        std::cout << "No image provided: using random values\n\n";
     }
-    else
+    else if(argc == 4)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        label     = argv[3];
+        data_path = argv[2];
+        image     = argv[3];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+        std::cout << "No text file with labels provided: skipping output accessor\n\n";
     }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
+    else
     {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        image     = argv[3];
+        label     = argv[4];
     }
 
     Graph graph;
 
-    graph << hint
+    graph << target_hint
           << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32),
                     get_input_accessor(image, mean_r, mean_g, mean_b))
           << ConvolutionLayer(
@@ -152,6 +156,7 @@ void main_graph_googlenet(int argc, const char **argv)
           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
           << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
           << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+          << convolution_hint
           << ConvolutionLayer(
               1U, 1U, 64U,
               get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"),
@@ -191,7 +196,7 @@ void main_graph_googlenet(int argc, const char **argv)
 /** Main program for Googlenet
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 int main(int argc, const char **argv)
 {
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index ad4a4e02c7..d4a44382b4 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -32,6 +32,8 @@
 using namespace arm_compute::graph;
 using namespace arm_compute::graph_utils;
 
+namespace
+{
 /** Generates appropriate accessor according to the specified path
  *
  * @note If path is empty will generate a DummyAccessor else will generate a NumPyBinLoader
@@ -52,49 +54,51 @@ std::unique_ptr<ITensorAccessor> get_accessor(const std::string &path, const std
         return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file);
     }
 }
+} // namespace
 
 /** Example demonstrating how to implement LeNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] batches )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches )
  */
 void main_graph_lenet(int argc, const char **argv)
 {
     std::string  data_path;   /** Path to the trainable data */
     unsigned int batches = 4; /** Number of batches */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint target_hint = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [batches]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
+    {
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [batches]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
+    }
+    else if(argc == 3)
     {
         //Do something with argv[1]
-        data_path = argv[1];
+        data_path = argv[2];
         std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
         std::cout << "No number of batches where specified, thus will use the default : " << batches << "\n\n";
     }
     else
     {
         //Do something with argv[1] and argv[2]
-        data_path = argv[1];
-        batches   = std::strtol(argv[2], nullptr, 0);
-    }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
-    {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        batches   = std::strtol(argv[3], nullptr, 0);
     }
 
     Graph graph;
 
     //conv1 << pool1 << conv2 << pool2 << fc1 << act1 << fc2 << smx
-    graph << hint
+    graph << target_hint
           << Tensor(TensorInfo(TensorShape(28U, 28U, 1U, batches), 1, DataType::F32), DummyAccessor())
           << ConvolutionLayer(
               5U, 5U, 20U,
@@ -126,7 +130,7 @@ void main_graph_lenet(int argc, const char **argv)
 /** Main program for LeNet
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] batches )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches )
  */
 int main(int argc, const char **argv)
 {
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 081fae67e2..0c916c7ba0 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -32,6 +32,8 @@
 using namespace arm_compute::graph;
 using namespace arm_compute::graph_utils;
 
+namespace
+{
 BranchLayer get_dwsc_node(const std::string &data_path, std::string &&param_path,
                           unsigned int  conv_filt,
                           PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info)
@@ -66,11 +68,12 @@ BranchLayer get_dwsc_node(const std::string &data_path, std::string &&param_path
 
     return BranchLayer(std::move(sg));
 }
+} // namespace
 
 /** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 void main_graph_mobilenet(int argc, const char **argv)
 {
@@ -82,42 +85,44 @@ void main_graph_mobilenet(int argc, const char **argv)
     constexpr float mean_g = 116.67f; /* Mean value to subtract from green channel */
     constexpr float mean_b = 104.01f; /* Mean value to subtract from blue channel */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint target_hint = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
     {
-        data_path = argv[1];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [image] [labels]\n\n";
-        std::cout << "No image provided: using random values\n\n";
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 3)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [labels]\n\n";
-        std::cout << "No text file with labels provided: skipping output accessor\n\n";
+        data_path = argv[2];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+        std::cout << "No image provided: using random values\n\n";
     }
-    else
+    else if(argc == 4)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        label     = argv[3];
+        data_path = argv[2];
+        image     = argv[3];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+        std::cout << "No text file with labels provided: skipping output accessor\n\n";
     }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
+    else
     {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        image     = argv[3];
+        label     = argv[4];
     }
 
     Graph graph;
-    graph << hint
+
+    graph << target_hint
           << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32),
                     get_input_accessor(image, mean_r, mean_g, mean_b))
           << ConvolutionLayer(
@@ -131,6 +136,7 @@ void main_graph_mobilenet(int argc, const char **argv)
               get_weights_accessor(data_path, "/cnn_data/mobilenet_v1_model/Conv2d_0_BatchNorm_beta.npy"),
               get_weights_accessor(data_path, "/cnn_data/mobilenet_v1_model/Conv2d_0_BatchNorm_gamma.npy"),
               0.001f)
+
           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
           << get_dwsc_node(data_path, "Conv2d_1", 64, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0))
           << get_dwsc_node(data_path, "Conv2d_2", 128, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR), PadStrideInfo(1, 1, 0, 0))
@@ -161,7 +167,7 @@ void main_graph_mobilenet(int argc, const char **argv)
 /** Main program for MobileNetV1
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 int main(int argc, const char **argv)
 {
diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index 1743509256..c8c411aa8b 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp
@@ -35,6 +35,8 @@ using namespace arm_compute::graph;
 using namespace arm_compute::graph_utils;
 using namespace arm_compute::logging;
 
+namespace
+{
 BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, unsigned int expand1_filt, unsigned int expand3_filt)
 {
     std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_";
@@ -56,11 +58,12 @@ BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&par
 
     return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
 }
+} // namespace
 
 /** Example demonstrating how to implement Squeezenet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 void main_graph_squeezenet(int argc, const char **argv)
 {
@@ -72,44 +75,45 @@ void main_graph_squeezenet(int argc, const char **argv)
     constexpr float mean_g = 116.67f; /* Mean value to subtract from green channel */
     constexpr float mean_b = 104.01f; /* Mean value to subtract from blue channel */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint            target_hint      = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+    ConvolutionMethodHint convolution_hint = target_hint == TargetHint::NEON ? ConvolutionMethodHint::GEMM : ConvolutionMethodHint::DIRECT;
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
     {
-        //Do something with argv[1]
-        data_path = argv[1];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [image] [labels]\n\n";
-        std::cout << "No image provided: using random values\n";
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 3)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [labels]\n\n";
-        std::cout << "No text file with labels provided: skipping output accessor\n";
+        data_path = argv[2];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+        std::cout << "No image provided: using random values\n\n";
     }
-    else
+    else if(argc == 4)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        label     = argv[3];
+        data_path = argv[2];
+        image     = argv[3];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+        std::cout << "No text file with labels provided: skipping output accessor\n\n";
     }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
+    else
     {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        image     = argv[3];
+        label     = argv[4];
     }
 
     Graph graph;
 
-    graph << hint
+    graph << target_hint
           << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32),
                     get_input_accessor(image, mean_r, mean_g, mean_b))
           << ConvolutionLayer(
@@ -117,6 +121,7 @@ void main_graph_squeezenet(int argc, const char **argv)
               get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy"),
               get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"),
               PadStrideInfo(2, 2, 0, 0))
+          << convolution_hint
           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
           << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
           << ConvolutionLayer(
@@ -194,7 +199,7 @@ void main_graph_squeezenet(int argc, const char **argv)
 /** Main program for Squeezenet v1.0
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 int main(int argc, const char **argv)
 {
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 44dd1f63e4..cac38d30a7 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -35,7 +35,7 @@ using namespace arm_compute::graph_utils;
 /** Example demonstrating how to implement VGG16's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 void main_graph_vgg16(int argc, const char **argv)
 {
@@ -47,43 +47,46 @@ void main_graph_vgg16(int argc, const char **argv)
     constexpr float mean_g = 116.779f; /* Mean value to subtract from green channel */
     constexpr float mean_b = 103.939f; /* Mean value to subtract from blue channel */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint            target_hint      = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+    ConvolutionMethodHint convolution_hint = ConvolutionMethodHint::DIRECT;
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
     {
-        data_path = argv[1];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [image] [labels]\n\n";
-        std::cout << "No image provided: using random values\n\n";
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 3)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [labels]\n\n";
-        std::cout << "No text file with labels provided: skipping output accessor\n\n";
+        data_path = argv[2];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+        std::cout << "No image provided: using random values\n\n";
     }
-    else
+    else if(argc == 4)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        label     = argv[3];
+        data_path = argv[2];
+        image     = argv[3];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+        std::cout << "No text file with labels provided: skipping output accessor\n\n";
     }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
+    else
     {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        image     = argv[3];
+        label     = argv[4];
     }
 
     Graph graph;
 
-    graph << hint
+    graph << target_hint
+          << convolution_hint
           << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32),
                     get_input_accessor(image, mean_r, mean_g, mean_b))
           << ConvolutionMethodHint::DIRECT
@@ -211,7 +214,7 @@ void main_graph_vgg16(int argc, const char **argv)
 /** Main program for VGG16
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 int main(int argc, const char **argv)
 {
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index a39e255ad0..49ae0fe51c 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -35,7 +35,7 @@ using namespace arm_compute::graph_utils;
 /** Example demonstrating how to implement VGG19's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 void main_graph_vgg19(int argc, const char **argv)
 {
@@ -47,43 +47,46 @@ void main_graph_vgg19(int argc, const char **argv)
     constexpr float mean_g = 116.779f; /* Mean value to subtract from green channel */
     constexpr float mean_b = 103.939f; /* Mean value to subtract from blue channel */
 
+    // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+    TargetHint            target_hint      = set_target_hint(argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0);
+    ConvolutionMethodHint convolution_hint = ConvolutionMethodHint::DIRECT;
+
     // Parse arguments
     if(argc < 2)
     {
         // Print help
-        std::cout << "Usage: " << argv[0] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
         std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 2)
     {
-        data_path = argv[1];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [image] [labels]\n\n";
-        std::cout << "No image provided: using random values\n\n";
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
     }
     else if(argc == 3)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [labels]\n\n";
-        std::cout << "No text file with labels provided: skipping output accessor\n\n";
+        data_path = argv[2];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+        std::cout << "No image provided: using random values\n\n";
     }
-    else
+    else if(argc == 4)
     {
-        data_path = argv[1];
-        image     = argv[2];
-        label     = argv[3];
+        data_path = argv[2];
+        image     = argv[3];
+        std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+        std::cout << "No text file with labels provided: skipping output accessor\n\n";
     }
-
-    // Check if OpenCL is available and initialize the scheduler
-    TargetHint hint = TargetHint::NEON;
-    if(Graph::opencl_is_available())
+    else
     {
-        hint = TargetHint::OPENCL;
+        data_path = argv[2];
+        image     = argv[3];
+        label     = argv[4];
     }
 
     Graph graph;
 
-    graph << hint
+    graph << target_hint
+          << convolution_hint
           << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32),
                     get_input_accessor(image, mean_r, mean_g, mean_b))
           // Layer 1
@@ -220,7 +223,7 @@ void main_graph_vgg19(int argc, const char **argv)
 /** Main program for VGG19
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
  */
 int main(int argc, const char **argv)
 {
diff --git a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
index 6deda506ab..9b36df3c39 100644
--- a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp
@@ -45,6 +45,17 @@ NEDepthwiseVectorToTensorKernel::NEDepthwiseVectorToTensorKernel()
 void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    TensorShape output_shape = input->info()->tensor_shape();
+    output_shape.set(0, conv_w);
+    output_shape.set(1, conv_h);
+    output_shape.set(2, input->info()->tensor_shape()[0] / (conv_w * conv_h));
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
 
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 66548d19b2..d628bf93ce 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -222,7 +222,10 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
     shape_im2col.set(0, mat_input_cols);
     shape_im2col.set(1, mat_input_rows);
     shape_im2col.set(2, 1);
-    _input_im2col_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col));
+    // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
+    TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position());
+    im2col_reshaped_info.set_quantization_info(input->info()->quantization_info());
+    _input_im2col_reshaped.allocator()->init(im2col_reshaped_info);
     _memory_group.manage(&_input_im2col_reshaped);
 
     // Create tensor (interleave) to prepare input tensor for GEMM
@@ -231,7 +234,10 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
         TensorShape shape_interleaved = shape_im2col;
         shape_interleaved.set(0, shape_interleaved.x() * 4);
         shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f));
-        _input_interleaved_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_interleaved));
+        // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
+        TensorInfo interleaved_info(shape_interleaved, 1, dt, input->info()->fixed_point_position());
+        interleaved_info.set_quantization_info(input->info()->quantization_info());
+        _input_interleaved_reshaped.allocator()->init(interleaved_info);
         _memory_group.manage(&_input_interleaved_reshaped);
     }
 
@@ -241,8 +247,9 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig
     shape_gemm.set(1, mat_input_rows);
     const DataType gemm_data_type = _is_quantized ? DataType::S32 : dt;
     // GEMM output should be S32 for acquiring raw integer accumulator without quantized postprocessing for quantized asymmetric input.
-    TensorInfo info_gemm(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_gemm).set_data_type(gemm_data_type).set_quantization_info(
-                             output->info()->quantization_info()));
+    // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
+    TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
+    info_gemm.set_quantization_info(output->info()->quantization_info());
     _gemm_output.allocator()->init(info_gemm);
     _memory_group.manage(&_gemm_output);
 
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 865672e525..2717bbfabc 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -267,7 +267,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights,
         // Configure matrix multiplication kernel
         if(_is_fully_connected_convolution)
         {
-            _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace, 1.f, 0.f, false, false);
+            _mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace, 1.f, 0.f, true, false);
         }
         else
         {
diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h
index d7d5cd6778..429394d1cc 100644
--- a/utils/GraphUtils.h
+++ b/utils/GraphUtils.h
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_GRAPH_UTILS_H__
 
 #include "arm_compute/core/PixelValue.h"
+#include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/ITensorAccessor.h"
 #include "arm_compute/graph/Types.h"
 
@@ -220,6 +221,26 @@ inline std::unique_ptr<graph::ITensorAccessor> get_input_accessor(const std::str
     }
 }
 
+/** Utility function to return the TargetHint
+ *
+ * @param[in] target Integer value which expresses the selected target. Must be 0 for NEON or 1 for OpenCL
+ *
+ * @return the TargetHint
+ */
+inline graph::TargetHint set_target_hint(int target)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(target > 1, "Invalid target. Target must be 0 (NEON) or 1 (OpenCL)");
+    if(target == 1 && graph::Graph::opencl_is_available())
+    {
+        // If type of target is OpenCL, check if OpenCL is available and initialize the scheduler
+        return graph::TargetHint::OPENCL;
+    }
+    else
+    {
+        return graph::TargetHint::NEON;
+    }
+}
+
 /** Generates appropriate output accessor according to the specified labels_path
  *
  * @note If labels_path is empty will generate a DummyAccessor else will generate a TopNPredictionsAccessor
diff --git a/utils/Utils.h b/utils/Utils.h
index 9133fd0ac0..eb4e846e80 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -708,7 +708,7 @@ void save_to_ppm(T &tensor, const std::string &ppm_filename)
 
 /** Template helper function to save a tensor image to a NPY file.
  *
- * @note Only F32 format supported.
+ * @note Only F32 data type supported.
  * @note Only works with 2D tensors.
  * @note If the input tensor is a CLTensor, the function maps and unmaps the image
  *
@@ -719,7 +719,7 @@ void save_to_ppm(T &tensor, const std::string &ppm_filename)
 template <typename T>
 void save_to_npy(T &tensor, const std::string &npy_filename, bool fortran_order)
 {
-    ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(&tensor, arm_compute::Format::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(&tensor, arm_compute::DataType::F32);
     ARM_COMPUTE_ERROR_ON(tensor.info()->num_dimensions() > 2);
 
     std::ofstream fs;
@@ -745,9 +745,9 @@ void save_to_npy(T &tensor, const std::string &npy_filename, bool fortran_order)
         // Map buffer if creating a CLTensor
         map(tensor, true);
 
-        switch(tensor.info()->format())
+        switch(tensor.info()->data_type())
         {
-            case arm_compute::Format::F32:
+            case arm_compute::DataType::F32:
             {
                 std::vector<float> tmp; /* Used only to get the typestring */
                 npy::Typestring    typestring_o{ tmp };
@@ -851,9 +851,9 @@ void fill_random_tensor(T &tensor, float lower_bound, float upper_bound)
 
     Iterator it(&tensor, window);
 
-    switch(tensor.info()->format())
+    switch(tensor.info()->data_type())
     {
-        case arm_compute::Format::F32:
+        case arm_compute::DataType::F32:
         {
             std::uniform_real_distribution<float> dist(lower_bound, upper_bound);
 
-- 
cgit v1.2.1