From 4a6d9e85a9cb2e199d20b06e5450036c3b83b91d Mon Sep 17 00:00:00 2001
From: ramelg01 <ramy.elgammal@arm.com>
Date: Sat, 2 Oct 2021 14:34:36 +0100
Subject: Provide logging for configure functions in all CPP functions

 - Moving impl of CPPSplit template to src/runtime/CPP to allow
   including of Log.h from src/common.
 - Fix logging of vector<ITensor*> to print contained tensor's info not their ptrs.

Partially-Resovles: COMPMID-4718
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: Idec81665b2a7c0cfae5248803109c6e2edc520a1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6362
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 Android.bp                                         |   1 +
 arm_compute/runtime/CPP/functions/CPPSplit.h       | 135 +--------------
 src/common/utils/Log.h                             |   6 +-
 .../CL/functions/CLBatchNormalizationLayer.cpp     |   3 +-
 .../CPPBoxWithNonMaximaSuppressionLimit.cpp        |  10 +-
 .../CPP/functions/CPPDetectionOutputLayer.cpp      |   9 +-
 .../CPP/functions/CPPDetectionPostProcessLayer.cpp |  12 +-
 .../CPP/functions/CPPNonMaximumSuppression.cpp     |   6 +-
 src/runtime/CPP/functions/CPPPermute.cpp           |   6 +-
 src/runtime/CPP/functions/CPPSplit.cpp             | 186 +++++++++++++++++++++
 src/runtime/CPP/functions/CPPTopKV.cpp             |   6 +-
 src/runtime/CPP/functions/CPPUpsample.cpp          |   6 +-
 src/runtime/NEON/functions/NEUnstack.cpp           |   2 +-
 utils/TypePrinter.h                                | 167 +++++++++++++-----
 14 files changed, 363 insertions(+), 192 deletions(-)
 create mode 100644 src/runtime/CPP/functions/CPPSplit.cpp
diff --git a/Android.bp b/Android.bp
index 9b6808eb9a..00e0b39175 100644
--- a/Android.bp
+++ b/Android.bp
@@ -710,6 +710,7 @@ cc_library_static {
         "src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp",
         "src/runtime/CPP/functions/CPPNonMaximumSuppression.cpp",
         "src/runtime/CPP/functions/CPPPermute.cpp",
+        "src/runtime/CPP/functions/CPPSplit.cpp",
         "src/runtime/CPP/functions/CPPTopKV.cpp",
         "src/runtime/CPP/functions/CPPUpsample.cpp",
         "src/runtime/IScheduler.cpp",
diff --git a/arm_compute/runtime/CPP/functions/CPPSplit.h b/arm_compute/runtime/CPP/functions/CPPSplit.h
index b2b4d07c86..b797b26960 100644
--- a/arm_compute/runtime/CPP/functions/CPPSplit.h
+++ b/arm_compute/runtime/CPP/functions/CPPSplit.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,10 +41,8 @@ template <typename SliceType, typename TensorInterfaceType = ITensor>
 class CPPSplit : public IFunction
 {
 public:
-    CPPSplit()
-        : _outputs_vector(), _slice_functions(), _num_outputs(0)
-    {
-    }
+    CPPSplit();
+
     /** Static function to check if given info will lead to a valid configuration of @ref CPPSplit
      *
      * @param[in] input   The input tensor info. Data types supported: All.
@@ -55,72 +53,7 @@ public:
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs, unsigned int axis)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
-        ARM_COMPUTE_RETURN_ERROR_ON(axis >= input->num_dimensions());
-        ARM_COMPUTE_RETURN_ERROR_ON(outputs.size() < 2);
-
-        // Get output shape
-        TensorShape  output_shape{};
-        unsigned int total_output_shape_size = 0;
-
-        // Sum the output sizes and fall back to evenly-sized splits if any are zero
-        const bool using_split_shapes = std::none_of(outputs.begin(), outputs.end(), [&total_output_shape_size](ITensorInfo * info)
-        {
-            unsigned int output_shape_size = info->tensor_shape().total_size();
-            total_output_shape_size += output_shape_size;
-            return output_shape_size == 0;
-        });
-
-        if(using_split_shapes)
-        {
-            ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() != total_output_shape_size);
-        }
-        else
-        {
-            output_shape = arm_compute::misc::shape_calculator::compute_split_shape(input, axis, outputs.size());
-            ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
-        }
-
-        // Validate output tensors
-        unsigned int axis_offset = 0;
-        for(const auto &output : outputs)
-        {
-            ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-            if(using_split_shapes)
-            {
-                output_shape = output->tensor_shape();
-                ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
-            }
-
-            const size_t axis_split_step = output_shape[axis];
-
-            // Start/End coordinates
-            Coordinates start_coords;
-            Coordinates end_coords;
-            for(unsigned int d = 0; d < output_shape.num_dimensions(); ++d)
-            {
-                end_coords.set(d, -1);
-            }
-
-            // Output auto inizialitation if not yet initialized
-            TensorInfo tmp_output_info = *output->clone();
-            if(tmp_output_info.tensor_shape().total_size() == 0)
-            {
-                tmp_output_info = input->clone()->set_is_resizable(true).set_tensor_shape(output_shape);
-            }
-
-            // Update coordinate on axis
-            start_coords.set(axis, axis_offset);
-            end_coords.set(axis, axis_offset + axis_split_step);
-
-            ARM_COMPUTE_RETURN_ON_ERROR(SliceType::validate(input, output, start_coords, end_coords));
-            axis_offset += axis_split_step;
-        }
-
-        return Status{};
-    }
+    static Status validate(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs, unsigned int axis);
 
     /** Initialise the kernel's input and outputs.
      *
@@ -130,65 +63,7 @@ public:
      *                     from the split dimension.
      * @param[in]  axis    Axis on which to split the input.
      */
-    void configure(const TensorInterfaceType *input, const std::vector<TensorInterfaceType *> &outputs, unsigned int axis)
-    {
-        // Create Slice functions
-        _num_outputs = outputs.size();
-        _slice_functions.resize(_num_outputs);
-
-        // Extract output tensor info
-        std::vector<ITensorInfo *> outputs_info;
-        for(auto &output : outputs)
-        {
-            ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-            outputs_info.emplace_back(output->info());
-        }
-
-        // If any of the outputs have a zero size, fall-back to using evenly-sized output splits
-        const bool outputs_have_sizes = std::none_of(outputs_info.begin(), outputs_info.end(), [](ITensorInfo * info)
-        {
-            return info->tensor_shape().total_size() == 0;
-        });
-
-        // Validate
-        ARM_COMPUTE_ERROR_THROW_ON(CPPSplit::validate(input->info(), outputs_info, axis));
-
-        unsigned int axis_offset = 0;
-        unsigned int i           = 0;
-
-        for(const auto &output_info : outputs_info)
-        {
-            // Get output shape
-            TensorShape output_shape = (outputs_have_sizes ?
-                                        output_info->tensor_shape() :
-                                        arm_compute::misc::shape_calculator::compute_split_shape(input->info(), axis, _num_outputs));
-
-            const size_t axis_split_step = output_shape[axis];
-
-            // Start/End coordinates
-            Coordinates start_coords;
-            Coordinates end_coords;
-
-            for(unsigned int d = 0; d < output_shape.num_dimensions(); ++d)
-            {
-                end_coords.set(d, -1);
-            }
-
-            // Update coordinate on axis
-            start_coords.set(axis, axis_offset);
-            end_coords.set(axis, axis_offset + axis_split_step);
-
-            // Configure slice function
-            _slice_functions[i].configure(input, outputs[i], start_coords, end_coords);
-
-            // Set valid region from shape
-            outputs[i]->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
-
-            // Update axis offset
-            axis_offset += axis_split_step;
-            ++i;
-        }
-    }
+    void configure(const TensorInterfaceType *input, const std::vector<TensorInterfaceType *> &outputs, unsigned int axis);
 
 protected:
     std::vector<TensorInterfaceType *> _outputs_vector;
diff --git a/src/common/utils/Log.h b/src/common/utils/Log.h
index 5b049d0de6..f3ae38a57c 100644
--- a/src/common/utils/Log.h
+++ b/src/common/utils/Log.h
@@ -134,9 +134,9 @@ logParamsImpl(std::vector<std::string> &data_registry, const std::tuple<Tp...> &
 /** Function Template with variable number of inputs to collect all the passed parameters from
  *  the logging macro ARM_COMPUTE_LOG_PARAMS(...)
  *
- * @param[in] ...ins The input parameters in the variadic template, taken by reference, (not by value) to avoid
- *                   detecting T as an abstract data type when passing any of these parameters as L-value reference
- *                   to an abstract type.
+ * @param[in] ...ins The input parameters in the variadic template, taken by universal references Ts.. &&, (not by value)
+ *                   to avoid detecting T as an abstract data type when passing any of these parameters as an L-value
+ *                   reference to an abstract type.
  *
  * @return  Vector of the parameters' data in a string format
  */
diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
index 234a0df2aa..e8affc0853 100644
--- a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
@@ -29,10 +29,11 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/common/utils/Log.h"
 
 #include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLBatchNormalizationLayer::CLBatchNormalizationLayer()
diff --git a/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp b/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp
index b6803d0d37..dccbe4045d 100644
--- a/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp
+++ b/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,8 @@
 #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
 #include "arm_compute/runtime/Scheduler.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace
@@ -130,10 +132,12 @@ CPPBoxWithNonMaximaSuppressionLimit::CPPBoxWithNonMaximaSuppressionLimit(std::sh
 {
 }
 
-void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
-                                                    ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
+void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in,
+                                                    ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out,
+                                                    ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
+    ARM_COMPUTE_LOG_PARAMS(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
 
     _is_qasymm8 = scores_in->info()->data_type() == DataType::QASYMM8 || scores_in->info()->data_type() == DataType::QASYMM8_SIGNED;
 
diff --git a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp
index fdb4c9f0f6..41d875eb97 100644
--- a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp
+++ b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,8 @@
 #include "arm_compute/core/Validate.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
+#include "src/common/utils/Log.h"
+
 #include <list>
 
 namespace arm_compute
@@ -388,9 +390,12 @@ CPPDetectionOutputLayer::CPPDetectionOutputLayer()
 {
 }
 
-void CPPDetectionOutputLayer::configure(const ITensor *input_loc, const ITensor *input_conf, const ITensor *input_priorbox, ITensor *output, DetectionOutputLayerInfo info)
+void CPPDetectionOutputLayer::configure(const ITensor *input_loc, const ITensor *input_conf, const ITensor *input_priorbox,
+                                        ITensor *output, DetectionOutputLayerInfo info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
+    ARM_COMPUTE_LOG_PARAMS(input_loc, input_conf, input_priorbox, output, info);
+
     // Output auto initialization if not yet initialized
     // Since the number of bboxes to kept is unknown before nms, the shape is set to the maximum
     // The maximum is keep_top_k * input_loc_size[1]
diff --git a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp
index 31f1fafd69..ecbc49b3c1 100644
--- a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp
+++ b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,8 @@
 #include "arm_compute/core/Validate.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
+#include "src/common/utils/Log.h"
+
 #include <cstddef>
 #include <ios>
 #include <list>
@@ -213,10 +215,14 @@ CPPDetectionPostProcessLayer::CPPDetectionPostProcessLayer(std::shared_ptr<IMemo
 {
 }
 
-void CPPDetectionPostProcessLayer::configure(const ITensor *input_box_encoding, const ITensor *input_scores, const ITensor *input_anchors,
-                                             ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info)
+void CPPDetectionPostProcessLayer::configure(const ITensor *input_box_encoding, const ITensor *input_scores,
+                                             const ITensor *input_anchors, ITensor *output_boxes, ITensor *output_classes,
+                                             ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input_box_encoding, input_scores, input_anchors, output_boxes, output_classes, output_scores);
+    ARM_COMPUTE_LOG_PARAMS(input_box_encoding, input_scores, input_anchors, output_boxes, output_classes, output_scores,
+                           num_detection, info);
+
     _num_max_detected_boxes = info.max_detections() * info.max_classes_per_detection();
 
     auto_init_if_empty(*output_boxes->info(), TensorInfo(TensorShape(_kNumCoordBox, _num_max_detected_boxes, _kBatchSize), 1, DataType::F32));
diff --git a/src/runtime/CPP/functions/CPPNonMaximumSuppression.cpp b/src/runtime/CPP/functions/CPPNonMaximumSuppression.cpp
index d0d0b1e98b..6d01b127c0 100644
--- a/src/runtime/CPP/functions/CPPNonMaximumSuppression.cpp
+++ b/src/runtime/CPP/functions/CPPNonMaximumSuppression.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,12 +25,16 @@
 
 #include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CPPNonMaximumSuppression::configure(
     const ITensor *bboxes, const ITensor *scores, ITensor *indices, unsigned int max_output_size,
     const float score_threshold, const float nms_threshold)
 {
+    ARM_COMPUTE_LOG_PARAMS(bboxes, scores, indices, max_output_size, score_threshold, nms_threshold);
+
     auto k = std::make_unique<CPPNonMaximumSuppressionKernel>();
     k->configure(bboxes, scores, indices, max_output_size, score_threshold, nms_threshold);
     _kernel = std::move(k);
diff --git a/src/runtime/CPP/functions/CPPPermute.cpp b/src/runtime/CPP/functions/CPPPermute.cpp
index 76fa09f12b..83941f1dc1 100644
--- a/src/runtime/CPP/functions/CPPPermute.cpp
+++ b/src/runtime/CPP/functions/CPPPermute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,10 +25,14 @@
 
 #include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h"
 
+#include "src/common/utils/Log.h"
+
 using namespace arm_compute;
 
 void CPPPermute::configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, perm);
+
     auto k = std::make_unique<CPPPermuteKernel>();
     k->configure(input, output, perm);
     _kernel = std::move(k);
diff --git a/src/runtime/CPP/functions/CPPSplit.cpp b/src/runtime/CPP/functions/CPPSplit.cpp
new file mode 100644
index 0000000000..98af8ad971
--- /dev/null
+++ b/src/runtime/CPP/functions/CPPSplit.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
+
+#ifdef ARM_COMPUTE_CPU_ENABLED // NEON Build is activated
+#include "arm_compute/runtime/NEON/functions/NESlice.h"
+#endif /* ARM_COMPUTE_CPU_ENABLED */
+
+#ifdef ARM_COMPUTE_OPENCL_ENABLED // OPENCL build is activated
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+#endif /* ARM_COMPUTE_OPENCL_ENABLED */
+
+#include "src/common/utils/Log.h"
+
+namespace arm_compute
+{
+/** Basic function to split a tensor along a given axis */
+
+template <typename SliceType, typename TensorInterfaceType>
+CPPSplit<SliceType, TensorInterfaceType>::CPPSplit()
+    : _outputs_vector(), _slice_functions(), _num_outputs(0)
+{
+}
+
+template <typename SliceType, typename TensorInterfaceType>
+Status CPPSplit<SliceType, TensorInterfaceType>::validate(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs, unsigned int axis)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_RETURN_ERROR_ON(axis >= input->num_dimensions());
+    ARM_COMPUTE_RETURN_ERROR_ON(outputs.size() < 2);
+
+    // Get output shape
+    TensorShape  output_shape{};
+    unsigned int total_output_shape_size = 0;
+
+    // Sum the output sizes and fall back to evenly-sized splits if any are zero
+    const bool using_split_shapes = std::none_of(outputs.begin(), outputs.end(), [&total_output_shape_size](ITensorInfo * info)
+    {
+        unsigned int output_shape_size = info->tensor_shape().total_size();
+        total_output_shape_size += output_shape_size;
+        return output_shape_size == 0;
+    });
+
+    if(using_split_shapes)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() != total_output_shape_size);
+    }
+    else
+    {
+        output_shape = arm_compute::misc::shape_calculator::compute_split_shape(input, axis, outputs.size());
+        ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
+    }
+
+    // Validate output tensors
+    unsigned int axis_offset = 0;
+    for(const auto &output : outputs)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+        if(using_split_shapes)
+        {
+            output_shape = output->tensor_shape();
+            ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
+        }
+
+        const size_t axis_split_step = output_shape[axis];
+
+        // Start/End coordinates
+        Coordinates start_coords;
+        Coordinates end_coords;
+        for(unsigned int d = 0; d < output_shape.num_dimensions(); ++d)
+        {
+            end_coords.set(d, -1);
+        }
+
+        // Output auto inizialitation if not yet initialized
+        TensorInfo tmp_output_info = *output->clone();
+        if(tmp_output_info.tensor_shape().total_size() == 0)
+        {
+            tmp_output_info = input->clone()->set_is_resizable(true).set_tensor_shape(output_shape);
+        }
+
+        // Update coordinate on axis
+        start_coords.set(axis, axis_offset);
+        end_coords.set(axis, axis_offset + axis_split_step);
+
+        ARM_COMPUTE_RETURN_ON_ERROR(SliceType::validate(input, output, start_coords, end_coords));
+        axis_offset += axis_split_step;
+    }
+
+    return Status{};
+}
+
+template <typename SliceType, typename TensorInterfaceType>
+void CPPSplit<SliceType, TensorInterfaceType>::configure(const TensorInterfaceType *input, const std::vector<TensorInterfaceType *> &outputs, unsigned int axis)
+{
+    // (TensorInterfaceType*)
+    ARM_COMPUTE_LOG_PARAMS(input, outputs, axis);
+
+    // Create Slice functions
+    _num_outputs = outputs.size();
+    _slice_functions.resize(_num_outputs);
+
+    // Extract output tensor info
+    std::vector<ITensorInfo *> outputs_info;
+    for(auto &output : outputs)
+    {
+        ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+        outputs_info.emplace_back(output->info());
+    }
+
+    // If any of the outputs have a zero size, fall-back to using evenly-sized output splits
+    const bool outputs_have_sizes = std::none_of(outputs_info.begin(), outputs_info.end(), [](ITensorInfo * info)
+    {
+        return info->tensor_shape().total_size() == 0;
+    });
+
+    // Validate
+    ARM_COMPUTE_ERROR_THROW_ON(CPPSplit::validate(input->info(), outputs_info, axis));
+
+    unsigned int axis_offset = 0;
+    unsigned int i           = 0;
+
+    for(const auto &output_info : outputs_info)
+    {
+        // Get output shape
+        TensorShape output_shape = (outputs_have_sizes ?
+                                    output_info->tensor_shape() :
+                                    arm_compute::misc::shape_calculator::compute_split_shape(input->info(), axis, _num_outputs));
+
+        const size_t axis_split_step = output_shape[axis];
+
+        // Start/End coordinates
+        Coordinates start_coords;
+        Coordinates end_coords;
+
+        for(unsigned int d = 0; d < output_shape.num_dimensions(); ++d)
+        {
+            end_coords.set(d, -1);
+        }
+
+        // Update coordinate on axis
+        start_coords.set(axis, axis_offset);
+        end_coords.set(axis, axis_offset + axis_split_step);
+
+        // Configure slice function
+        _slice_functions[i].configure(input, outputs[i], start_coords, end_coords);
+
+        // Set valid region from shape
+        outputs[i]->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
+
+        // Update axis offset
+        axis_offset += axis_split_step;
+        ++i;
+    }
+}
+
+// Instantiate CPPSplit for NESlice and CLSlice types to enable linking to the above templated CPPSplit's methods
+#ifdef ARM_COMPUTE_CPU_ENABLED // NEON Build is activated
+template class CPPSplit<NESlice, ITensor>;
+#endif /* ARM_COMPUTE_CPU_ENABLED */
+
+#ifdef ARM_COMPUTE_OPENCL_ENABLED // OPENCL build is activated
+template class CPPSplit<CLSlice, ICLTensor>;
+#endif /* ARM_COMPUTE_OPENCL_ENABLED */
+} // namespace arm_compute
diff --git a/src/runtime/CPP/functions/CPPTopKV.cpp b/src/runtime/CPP/functions/CPPTopKV.cpp
index 2547e56a1d..62a74735a2 100644
--- a/src/runtime/CPP/functions/CPPTopKV.cpp
+++ b/src/runtime/CPP/functions/CPPTopKV.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,10 +25,14 @@
 
 #include "arm_compute/core/CPP/kernels/CPPTopKVKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CPPTopKV::configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k)
 {
+    ARM_COMPUTE_LOG_PARAMS(predictions, targets, output, k);
+
     auto kernel = std::make_unique<CPPTopKVKernel>();
     kernel->configure(predictions, targets, output, k);
     _kernel = std::move(kernel);
diff --git a/src/runtime/CPP/functions/CPPUpsample.cpp b/src/runtime/CPP/functions/CPPUpsample.cpp
index 3b4ba2ba42..8f72473aeb 100644
--- a/src/runtime/CPP/functions/CPPUpsample.cpp
+++ b/src/runtime/CPP/functions/CPPUpsample.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,10 +25,14 @@
 
 #include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h"
 
+#include "src/common/utils/Log.h"
+
 using namespace arm_compute;
 
 void CPPUpsample::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, info);
+
     auto k = std::make_unique<CPPUpsampleKernel>();
     k->configure(input, output, info);
     _kernel = std::move(k);
diff --git a/src/runtime/NEON/functions/NEUnstack.cpp b/src/runtime/NEON/functions/NEUnstack.cpp
index 176b17f1f5..0ffab5e92a 100644
--- a/src/runtime/NEON/functions/NEUnstack.cpp
+++ b/src/runtime/NEON/functions/NEUnstack.cpp
@@ -71,7 +71,7 @@ void NEUnstack::configure(const ITensor *input, const std::vector<ITensor *> &ou
 
     ARM_COMPUTE_ERROR_ON_NULLPTR(input);
     ARM_COMPUTE_ERROR_THROW_ON(NEUnstack::validate(input->info(), outputs_vector_info, axis));
-    ARM_COMPUTE_LOG_PARAMS(input, output_vector, outputs_vector_info, axis);
+    ARM_COMPUTE_LOG_PARAMS(input, output_vector, axis);
 
     // Wrap around negative values
     const unsigned int axis_u = wrap_axis(axis, input->info());
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 23df2dc20c..5fa92e6360 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -67,7 +67,22 @@ std::string to_string_if_not_null(T *arg)
     }
 }
 
+/** Fallback method: try to use std::to_string:
+ *
+ * @param[in] val Value to convert to string
+ *
+ * @return String representing val.
+ */
+template <typename T>
+inline std::string to_string(const T &val)
+{
+    return support::cpp11::to_string(val);
+}
+
 /** Formatted output of a vector of objects.
+ *
+ * @note: Using the overloaded to_string() instead of overloaded operator<<(), because to_string() functions are
+ *        overloaded for all types, where two or more of them can use the same operator<<(), ITensor is an example.
  *
  * @param[out] os   Output stream
  * @param[in]  args Vector of objects to print
@@ -75,7 +90,7 @@ std::string to_string_if_not_null(T *arg)
  * @return Modified output stream.
  */
 template <typename T>
-inline ::std::ostream &operator<<(::std::ostream &os, const std::vector<T> &args)
+::std::ostream &operator<<(::std::ostream &os, const std::vector<T> &args)
 {
     const size_t max_print_size = 5U;
 
@@ -96,7 +111,7 @@ inline ::std::ostream &operator<<(::std::ostream &os, const std::vector<T> &args
         {
             os << ", ";
         }
-        os << args[i];
+        os << to_string(args[i]);
     }
     if(i < args.size())
     {
@@ -106,6 +121,20 @@ inline ::std::ostream &operator<<(::std::ostream &os, const std::vector<T> &args
     return os;
 }
 
+/** Formatted output of a vector of objects.
+ *
+ * @param[in] args Vector of objects to print
+ *
+ * @return String representing args.
+ */
+template <typename T>
+std::string to_string(const std::vector<T> &args)
+{
+    std::stringstream str;
+    str << args;
+    return str.str();
+}
+
 /** Formatted output of the Dimensions type.
  *
  * @param[out] os         Output stream.
@@ -1072,7 +1101,7 @@ inline ::std::ostream &operator<<(std::ostream &os, const ITensorInfo *info)
 
     os << "Shape=" << info->tensor_shape() << ","
        << "DataLayout=" << string_from_data_layout(data_layout) << ","
-       << "DataType=" << string_from_data_type(data_type) << ",";
+       << "DataType=" << string_from_data_type(data_type);
 
     if(is_data_type_quantized(data_type))
     {
@@ -1080,7 +1109,7 @@ inline ::std::ostream &operator<<(std::ostream &os, const ITensorInfo *info)
         const auto             scales  = qinfo.scale();
         const auto             offsets = qinfo.offset();
 
-        os << "QuantizationInfo={"
+        os << ", QuantizationInfo={"
            << "scales.size=" << scales.size()
            << ", scale(s)=" << scales << ", ";
 
@@ -2241,20 +2270,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const PriorBoxLayerInfo &i
     return os;
 }
 
-/** Formatted output of a vector of objects.
- *
- * @param[in] args Vector of objects to print
- *
- * @return String representing args.
- */
-template <typename T>
-std::string to_string(const std::vector<T> &args)
-{
-    std::stringstream str;
-    str << args;
-    return str.str();
-}
-
 /** Formatted output of the WinogradInfo type. */
 inline ::std::ostream &operator<<(::std::ostream &os, const WinogradInfo &info)
 {
@@ -2273,18 +2288,6 @@ inline std::string to_string(const WinogradInfo &type)
     return str.str();
 }
 
-/** Fallback method: try to use std::to_string:
- *
- * @param[in] val Value to convert to string
- *
- * @return String representing val.
- */
-template <typename T>
-inline std::string to_string(const T &val)
-{
-    return support::cpp11::to_string(val);
-}
-
 /** Convert a CLTunerMode value to a string
  *
  * @param val CLTunerMode value to be converted
@@ -2782,20 +2785,20 @@ inline std::string to_string(const SoftmaxKernelInfo &info)
  * @return Modified output stream.
  */
 template <typename T>
-inline ::std::ostream &operator<<(::std::ostream &os, const LSTMParams<T> &lstm_params)
-{
-    os << "{input_to_input_weights=" << lstm_params.input_to_input_weights() << ", "
-       << "recurrent_to_input_weights=" << lstm_params.recurrent_to_input_weights() << ", "
-       << "cell_to_input_weights=" << lstm_params.cell_to_input_weights() << ", "
-       << "input_gate_bias=" << lstm_params.input_gate_bias() << ", "
-       << "cell_to_forget_weights=" << lstm_params.cell_to_forget_weights() << ", "
-       << "cell_to_output_weights=" << lstm_params.cell_to_output_weights() << ", "
-       << "projection_weights=" << lstm_params.projection_weights() << ", "
-       << "projection_bias=" << lstm_params.projection_bias() << ", "
-       << "input_layer_norm_weights=" << lstm_params.input_layer_norm_weights() << ", "
-       << "forget_layer_norm_weights=" << lstm_params.forget_layer_norm_weights() << ", "
-       << "cell_layer_norm_weights=" << lstm_params.cell_layer_norm_weights() << ", "
-       << "output_layer_norm_weights=" << lstm_params.output_layer_norm_weights() << ", "
+::std::ostream &operator<<(::std::ostream &os, const LSTMParams<T> &lstm_params)
+{
+    os << "{input_to_input_weights=" << to_string(lstm_params.input_to_input_weights()) << ", "
+       << "recurrent_to_input_weights=" << to_string(lstm_params.recurrent_to_input_weights()) << ", "
+       << "cell_to_input_weights=" << to_string(lstm_params.cell_to_input_weights()) << ", "
+       << "input_gate_bias=" << to_string(lstm_params.input_gate_bias()) << ", "
+       << "cell_to_forget_weights=" << to_string(lstm_params.cell_to_forget_weights()) << ", "
+       << "cell_to_output_weights=" << to_string(lstm_params.cell_to_output_weights()) << ", "
+       << "projection_weights=" << to_string(lstm_params.projection_weights()) << ", "
+       << "projection_bias=" << to_string(lstm_params.projection_bias()) << ", "
+       << "input_layer_norm_weights=" << to_string(lstm_params.input_layer_norm_weights()) << ", "
+       << "forget_layer_norm_weights=" << to_string(lstm_params.forget_layer_norm_weights()) << ", "
+       << "cell_layer_norm_weights=" << to_string(lstm_params.cell_layer_norm_weights()) << ", "
+       << "output_layer_norm_weights=" << to_string(lstm_params.output_layer_norm_weights()) << ", "
        << "cell_clip=" << lstm_params.cell_clip() << ", "
        << "projection_clip=" << lstm_params.projection_clip() << ", "
        << "input_intermediate_scale=" << lstm_params.input_intermediate_scale() << ", "
@@ -2817,7 +2820,7 @@ inline ::std::ostream &operator<<(::std::ostream &os, const LSTMParams<T> &lstm_
  * @return String representing the corresponding LSTMParams
  */
 template <typename T>
-inline std::string to_string(const LSTMParams<T> &lstm_params)
+std::string to_string(const LSTMParams<T> &lstm_params)
 {
     std::stringstream str;
     str << lstm_params;
@@ -2836,6 +2839,80 @@ inline std::string to_string(const uint8_t num)
     return ::std::to_string(static_cast<int>(num));
 }
 
+/** Available non maxima suppression types */
+/** Formatted output of the NMSType type.
+ *
+ * @param[out] os       Output stream.
+ * @param[in]  nms_type NMSType to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const NMSType &nms_type)
+{
+    switch(nms_type)
+    {
+        case NMSType::LINEAR:
+            os << "LINEAR";
+            break;
+        case NMSType::GAUSSIAN:
+            os << "GAUSSIAN";
+            break;
+        case NMSType::ORIGINAL:
+            os << "ORIGINAL";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+    return os;
+}
+
+/** Converts a @ref NMSType to string
+ *
+ * @param[in] nms_type NMSType value to be converted
+ *
+ * @return String representing the corresponding NMSType
+ */
+inline std::string to_string(const NMSType nms_type)
+{
+    std::stringstream str;
+    str << nms_type;
+    return str.str();
+}
+
+/** Formatted output of the BoxNMSLimitInfo type.
+ *
+ * @param[out] os   Output stream.
+ * @param[in]  info BoxNMSLimitInfo to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const BoxNMSLimitInfo &info)
+{
+    os << "{score_thresh = " << info.score_thresh() << ", "
+       << "nms = " << info.nms() << ", "
+       << "detections_per_im = " << info.detections_per_im() << ", "
+       << "soft_nms_enabled = " << info.soft_nms_enabled() << ", "
+       << "soft_nms_min_score_thres = " << info.soft_nms_min_score_thres() << ", "
+       << "suppress_size = " << info.suppress_size() << ", "
+       << "min_size = " << info.min_size() << ", "
+       << "im_width = " << info.im_width() << ", "
+       << "im_height = " << info.im_height() << "}";
+    return os;
+}
+
+/** Converts a @ref BoxNMSLimitInfo to string
+ *
+ * @param[in] info BoxNMSLimitInfo value to be converted
+ *
+ * @return String representing the corresponding BoxNMSLimitInfo
+ */
+inline std::string to_string(const BoxNMSLimitInfo &info)
+{
+    std::stringstream str;
+    str << info;
+    return str.str();
+}
+
 } // namespace arm_compute
 
 #endif /* __ARM_COMPUTE_TYPE_PRINTER_H__ */
-- 
cgit v1.2.1