From 0499dff9293a86d3d53f72fed0a38b2823563674 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Fri, 31 Jul 2020 22:21:38 +0100
Subject: COMPMID-3392: Collapse TensorMaps into a single TensorPack

Collapse InputTensorMap and OutputTensorMap to a single TensorPack
mechanism.

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ie2fdfc6b07d84ad589169ec99ca64fcf45a00bec
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/253783
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3641
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
---
 Android.bp                                         |   1 +
 arm_compute/core/CL/ICLKernel.h                    |   7 +-
 .../core/CL/kernels/CLActivationLayerKernel.h      |   3 +-
 .../CL/kernels/CLBatchConcatenateLayerKernel.h     |   3 +-
 .../CL/kernels/CLDepthConcatenateLayerKernel.h     |   3 +-
 .../CL/kernels/CLElementWiseUnaryLayerKernel.h     |   3 +-
 .../core/CL/kernels/CLElementwiseOperationKernel.h |   3 +-
 arm_compute/core/CL/kernels/CLFillBorderKernel.h   |   2 +-
 .../CL/kernels/CLHeightConcatenateLayerKernel.h    |   3 +-
 .../CL/kernels/CLPixelWiseMultiplicationKernel.h   |   4 +-
 arm_compute/core/CL/kernels/CLReshapeLayerKernel.h |   3 +-
 arm_compute/core/CL/kernels/CLStridedSliceKernel.h |   3 +-
 .../CL/kernels/CLWidthConcatenate2TensorsKernel.h  |   3 +-
 .../CL/kernels/CLWidthConcatenate4TensorsKernel.h  |   3 +-
 .../CL/kernels/CLWidthConcatenateLayerKernel.h     |   3 +-
 arm_compute/core/CPP/ICPPKernel.h                  |   7 +-
 arm_compute/core/ITensorPack.h                     | 100 ++++++++++++++++
 .../core/NEON/kernels/NEActivationLayerKernel.h    |   3 +-
 .../core/NEON/kernels/NEArithmeticAdditionKernel.h |   2 +-
 .../NEON/kernels/NEArithmeticSubtractionKernel.h   |   2 +-
 .../NEON/kernels/NEBatchConcatenateLayerKernel.h   |   3 +-
 .../NEON/kernels/NEDepthConcatenateLayerKernel.h   |   3 +-
 .../NEON/kernels/NEElementwiseOperationKernel.h    |   3 +-
 .../NEON/kernels/NEHeightConcatenateLayerKernel.h  |   3 +-
 .../NEON/kernels/NEPixelWiseMultiplicationKernel.h |   4 +-
 .../core/NEON/kernels/NEReshapeLayerKernel.h       |   3 +-
 .../core/NEON/kernels/NEStridedSliceKernel.h       |   3 +-
 .../NEON/kernels/NEWidthConcatenateLayerKernel.h   |   3 +-
 arm_compute/core/experimental/Types.h              |   7 +-
 arm_compute/runtime/CL/CLScheduler.h               |   7 +-
 arm_compute/runtime/CL/CLTuner.h                   |   7 +-
 arm_compute/runtime/CL/ICLOperator.h               |   4 +-
 arm_compute/runtime/CL/ICLTuner.h                  |   5 +-
 .../runtime/CL/functions/CLConcatenateLayer.h      |   2 +-
 .../runtime/CL/functions/CLElementwiseOperations.h |  14 +--
 arm_compute/runtime/CL/functions/CLPReluLayer.h    |   2 +-
 .../CL/functions/CLPixelWiseMultiplication.h       |   4 +-
 arm_compute/runtime/CL/tuners/BifrostTuner.h       |   2 +-
 arm_compute/runtime/CL/tuners/MidgardTuner.h       |   2 +-
 arm_compute/runtime/CPP/CPPScheduler.h             |   4 +-
 arm_compute/runtime/IOperator.h                    |   9 +-
 arm_compute/runtime/IScheduler.h                   |   5 +-
 arm_compute/runtime/NEON/INEOperator.h             |   5 +-
 .../runtime/NEON/functions/NEConcatenateLayer.h    |   2 +-
 arm_compute/runtime/OMP/OMPScheduler.h             |   5 +-
 arm_compute/runtime/SingleThreadScheduler.h        |   5 +-
 src/core/CL/kernels/CLActivationLayerKernel.cpp    |   6 +-
 .../CL/kernels/CLBatchConcatenateLayerKernel.cpp   |   7 +-
 .../CL/kernels/CLDepthConcatenateLayerKernel.cpp   |   7 +-
 .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp   |   6 +-
 .../CL/kernels/CLElementwiseOperationKernel.cpp    |   8 +-
 src/core/CL/kernels/CLFillBorderKernel.cpp         |   6 +-
 .../CL/kernels/CLHeightConcatenateLayerKernel.cpp  |   7 +-
 .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp |  16 +--
 src/core/CL/kernels/CLReshapeLayerKernel.cpp       |   6 +-
 src/core/CL/kernels/CLStridedSliceKernel.cpp       |   6 +-
 .../kernels/CLWidthConcatenate2TensorsKernel.cpp   |   9 +-
 .../kernels/CLWidthConcatenate4TensorsKernel.cpp   |  13 +-
 .../CL/kernels/CLWidthConcatenateLayerKernel.cpp   |   7 +-
 src/core/ITensor.cpp                               |  10 +-
 src/core/ITensorPack.cpp                           |  65 ++++++++++
 src/core/NEON/kernels/NEActivationLayerKernel.cpp  |  10 +-
 .../NEON/kernels/NEArithmeticAdditionKernel.cpp    |   8 +-
 .../NEON/kernels/NEArithmeticSubtractionKernel.cpp |   8 +-
 .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp |   8 +-
 .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp |   8 +-
 .../NEON/kernels/NEElementwiseOperationKernel.cpp  |   6 +-
 .../kernels/NEHeightConcatenateLayerKernel.cpp     |   7 +-
 .../kernels/NEPixelWiseMultiplicationKernel.cpp    |  16 +--
 src/core/NEON/kernels/NEReshapeLayerKernel.cpp     |   6 +-
 src/core/NEON/kernels/NEStridedSliceKernel.cpp     |   6 +-
 .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp |   7 +-
 src/runtime/CL/CLOperator.cpp                      |  10 +-
 src/runtime/CL/CLScheduler.cpp                     |  15 +--
 src/runtime/CL/CLTuner.cpp                         |  15 +--
 src/runtime/CL/functions/CLActivationLayer.cpp     |   8 +-
 src/runtime/CL/functions/CLConcatenateLayer.cpp    |  24 ++--
 .../CL/functions/CLElementWiseUnaryLayer.cpp       |  56 ++++-----
 .../CL/functions/CLElementwiseOperations.cpp       | 131 +++++++++++----------
 src/runtime/CL/functions/CLPReluLayer.cpp          |  30 +++--
 .../CL/functions/CLPixelWiseMultiplication.cpp     |  46 ++++----
 src/runtime/CL/functions/CLReshapeLayer.cpp        |   8 +-
 src/runtime/CL/functions/CLSlice.cpp               |   7 +-
 src/runtime/CL/functions/CLStridedSlice.cpp        |   8 +-
 src/runtime/CL/tuners/BifrostTuner.cpp             |   4 +-
 src/runtime/CL/tuners/MidgardTuner.cpp             |   4 +-
 src/runtime/CPP/CPPScheduler.cpp                   |  21 ++--
 src/runtime/CPP/SingleThreadScheduler.cpp          |   4 +-
 src/runtime/NEON/INEOperator.cpp                   |  10 +-
 src/runtime/NEON/functions/NEActivationLayer.cpp   |   8 +-
 .../NEON/functions/NEArithmeticAddition.cpp        |   8 +-
 .../NEON/functions/NEArithmeticSubtraction.cpp     |   8 +-
 src/runtime/NEON/functions/NEConcatenateLayer.cpp  |  22 ++--
 .../NEON/functions/NEElementwiseOperators.cpp      |  56 +++++----
 src/runtime/NEON/functions/NEPReluLayer.cpp        |   8 +-
 .../NEON/functions/NEPixelWiseMultiplication.cpp   |  16 ++-
 src/runtime/NEON/functions/NEReshapeLayer.cpp      |   8 +-
 src/runtime/NEON/functions/NESlice.cpp             |   7 +-
 src/runtime/NEON/functions/NEStridedSlice.cpp      |   7 +-
 src/runtime/OMP/OMPScheduler.cpp                   |   8 +-
 tests/framework/instruments/SchedulerTimer.cpp     |   4 +-
 101 files changed, 646 insertions(+), 461 deletions(-)
 create mode 100644 arm_compute/core/ITensorPack.h
 create mode 100644 src/core/ITensorPack.cpp

diff --git a/Android.bp b/Android.bp
index 45d96e7e9b..3b59a7d283 100644
--- a/Android.bp
+++ b/Android.bp
@@ -229,6 +229,7 @@ cc_library_static {
         "src/core/IDistribution1D.cpp",
         "src/core/IKernel.cpp",
         "src/core/ITensor.cpp",
+        "src/core/ITensorPack.cpp",
         "src/core/MultiImageInfo.cpp",
         "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp",
         "src/core/NEON/kernels/NEAccumulateKernel.cpp",
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index 6e606ff857..d4990a1dee 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -225,14 +225,13 @@ public:
      *
      * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
      *
-     * @param[in]     inputs  A vector containing the input tensors.
-     * @param[in]     outputs A vector containing the output tensors.
+     * @param[in]     tensors A vector containing the tensors to operato on.
      * @param[in]     window  Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
      * @param[in,out] queue   Command queue on which to enqueue the kernel.
      */
-    virtual void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+    virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
     {
-        ARM_COMPUTE_UNUSED(inputs, outputs, window, queue);
+        ARM_COMPUTE_UNUSED(tensors, window, queue);
     }
     /** Add the passed parameters to the object's kernel's arguments starting from the index idx.
      *
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index cf62ca600b..81d4ccb065 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -68,8 +68,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 
 private:
     bool _run_in_place;
diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
index 890fecf0c9..bb8968ca83 100644
--- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
@@ -73,8 +73,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 
 private:
     unsigned int _batch_offset;
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
index ad365d655b..d8493bc5d8 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -71,8 +71,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 
 private:
     unsigned int _depth_offset;
diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
index f8f158c5bc..82cd953b68 100644
--- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
@@ -60,8 +60,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
index 76bc879638..b459292161 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -54,8 +54,7 @@ public:
     ~CLElementwiseOperationKernel() = default;
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override;
-
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
     BorderSize border_size() const override;
 
 protected:
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
index 8cad68dc1a..5323af4c0e 100644
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
@@ -85,7 +85,7 @@ public:
     void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
     void run(const Window &window, cl::CommandQueue &queue) override;
     bool is_parallelisable() const override;
 
diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
index 0563fad414..4fa2b40881 100644
--- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
@@ -68,8 +68,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 
 private:
     unsigned int _height_offset;
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 86159fc915..6b5bd11bde 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -132,7 +132,7 @@ public:
                            ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
     BorderSize border_size() const override;
 
 private:
@@ -184,7 +184,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
     BorderSize border_size() const override;
 
 private:
diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
index 11c06d1cab..6e3f255c52 100644
--- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
@@ -53,8 +53,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
index 28a665b113..74311b71fa 100644
--- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
+++ b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
@@ -73,8 +73,7 @@ public:
                            int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
index aaedaa90ee..a379b5f0b8 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
@@ -67,8 +67,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
index b1d6e890a0..6b0e8ee21d 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
@@ -71,8 +71,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
index cb8777d5fd..32e90af404 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
@@ -68,8 +68,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, cl::CommandQueue &queue) override;
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
 
 private:
     unsigned int _width_offset;
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index 3f784cf6d9..ab369ffe1d 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -79,14 +79,13 @@ public:
      *
      * @note The width of the window has to be a multiple of num_elems_processed_per_iteration().
      *
-     * @param[in] inputs  A vector containing the input tensors.
-     * @param[in] outputs A vector containing the output tensors.
+     * @param[in] tensors A vector containing the tensors to operate on.
      * @param[in] window  Region on which to execute the kernel. (Must be a region of the window returned by window())
      * @param[in] info    Info about executing thread and CPU.
      */
-    virtual void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+    virtual void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
     {
-        ARM_COMPUTE_UNUSED(inputs, outputs, window, info);
+        ARM_COMPUTE_UNUSED(tensors, window, info);
     }
 
     /** Name of the kernel
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
new file mode 100644
index 0000000000..36b6aea490
--- /dev/null
+++ b/arm_compute/core/ITensorPack.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ITENSORPACK_H
+#define ARM_COMPUTE_ITENSORPACK_H
+
+#include <cstdint>
+#include <map>
+
+namespace arm_compute
+{
+// Forward declaration
+class ITensor;
+
+/** Tensor packing service */
+class ITensorPack
+{
+private:
+    struct PackElement
+    {
+        PackElement() = default;
+        PackElement(ITensor *tensor)
+            : tensor(tensor), ctensor(nullptr)
+        {
+        }
+        PackElement(const ITensor *ctensor)
+            : tensor(nullptr), ctensor(ctensor)
+        {
+        }
+
+        ITensor       *tensor{ nullptr };
+        const ITensor *ctensor{ nullptr };
+    };
+
+public:
+    /** Default Constructor */
+    ITensorPack() = default;
+    /** Add tensor to the pack
+     *
+     * @param[in] id     ID/type of the tensor to add
+     * @param[in] tensor Tensor to add
+     */
+    void add_tensor(int id, ITensor *tensor);
+
+    /** Add const tensor to the pack
+     *
+     * @param[in] id     ID/type of the tensor to add
+     * @param[in] tensor Tensor to add
+     */
+    void add_tensor(int id, const ITensor *tensor);
+    /** Get tensor of a given id from the pac
+     *
+     * @param[in] id ID of tensor to extract
+     *
+     * @return The pointer to the tensor if exist and is non-const else nullptr
+     */
+    ITensor *get_tensor(int id);
+    /** Get constant tensor of a given id
+     *
+     * @param[in] id ID of tensor to extract
+     *
+     * @return The pointer to the tensor if exist and is const else nullptr
+     */
+    const ITensor *get_const_tensor(int id) const;
+    /** Pack size accessor
+     *
+     * @return Number of tensors registered to the pack
+     */
+    size_t size() const;
+    /** Checks if pack is empty
+     *
+     * @return True if empty else false
+     */
+    bool empty() const;
+
+private:
+    std::map<unsigned int, PackElement> _pack{}; /**< Container with the packed tensors */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_ITENSORPACK_H */
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index 960e6fa3e5..325647bd66 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -76,8 +76,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     using ActivationFunction = ActivationLayerInfo::ActivationFunction;
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
index 764596d7dc..eece5708e8 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
@@ -86,7 +86,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     /** Common signature for all the specialised add functions
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index 86c3dd8e88..e3a41a2b1c 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -98,7 +98,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     /** Common signature for all the specialised sub functions
diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
index f397a29b48..478890925b 100644
--- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
@@ -76,8 +76,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window);
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
index e1aaa59f25..3b2b9a1b79 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
@@ -76,8 +76,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window);
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
index cf43aca4b0..47b8c3b7c8 100644
--- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
@@ -67,8 +67,7 @@ public:
     using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 protected:
     /** Validate the argument passed to the kernel
diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
index d463b53e2c..8a5e86acc4 100644
--- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
@@ -74,8 +74,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     unsigned int _height_offset;
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
index 8876d04c62..c530d78c42 100644
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
@@ -111,7 +111,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
 
     // Inherited methods overridden
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     /** Common signature for all the specialised multiplication functions with integer scaling factor
@@ -178,7 +178,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 };
 
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
index 97e91ed03d..a4b8426e41 100644
--- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
@@ -57,8 +57,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
index 59caeecb0d..be55fd75de 100644
--- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
+++ b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
@@ -91,8 +91,7 @@ public:
                            int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     Coordinates _starts_abs;    /**< Absolute start coordinates */
diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
index b5336ad026..64d741deab 100644
--- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
@@ -73,8 +73,7 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
 private:
     unsigned int _width_offset;
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index eca833e708..4dee5ff70d 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -24,13 +24,14 @@
 #ifndef ARM_COMPUTE_EXPERIMENTAL_TYPES_H
 #define ARM_COMPUTE_EXPERIMENTAL_TYPES_H
 
+#include "arm_compute/core/ITensorPack.h"
 #include "arm_compute/core/TensorShape.h"
 
-#include <map>
 #include <vector>
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 
 /** Memory type */
@@ -51,10 +52,6 @@ enum TensorType : int32_t
     ACL_SRC_VEC = 256,
 };
 
-using InputTensorMap    = std::map<int32_t, const ITensor *>;
-using OutputTensorMap   = std::map<int32_t, ITensor *>;
-using OperatorTensorMap = OutputTensorMap;
-
 namespace experimental
 {
 struct MemoryInfo
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 93595c65c7..8a22832792 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -76,11 +76,10 @@ public:
     /** Schedule the execution of the passed kernel if possible.
      *
      * @param[in] kernel  Kernel to execute.
-     * @param[in] inputs  Vector containing the input tensors.
-     * @param[in] outputs Vector containing the output tensors.
+     * @param[in] tensors Vector containing the tensors to operate on.
      * @param[in] flush   (Optional) Specifies if the command queue will be flushed after running the kernel.
      */
-    void enqueue_op(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush = true);
+    void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true);
 
     /** Initialises the context and command queue to be used by the scheduler.
      *
@@ -152,7 +151,7 @@ public:
     bool is_initialised() const;
 
 private:
-    void enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush);
+    void enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush);
     /** Flag to ensure symbols initialisation is happening before Scheduler creation */
     static std::once_flag _initialize_symbols;
 
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index aa31181d2d..3b45a2177e 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -116,7 +116,7 @@ public:
     // Inherited methods overridden:
     void tune_kernel_static(ICLKernel &kernel) override;
     void tune_kernel_dynamic(ICLKernel &kernel) override;
-    void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
+    void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override;
 
     /** Is the kernel_event set ?
      *
@@ -127,11 +127,12 @@ public:
 private:
     /** Find optimal LWS using brute-force approach
      *
-     * @param[in] kernel OpenCL kernel to be tuned with LWS
+     * @param[in]     kernel  OpenCL kernel to be tuned with LWS
+     * @param[in,out] tensors Tensors for the kernel to operate on
      *
      * @return The optimal LWS to use
      */
-    cl::NDRange find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs);
+    cl::NDRange find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors);
 
     std::unordered_map<std::string, cl::NDRange> _lws_table;
     cl::Event   _kernel_event;
diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h
index 2d6c96e815..526b7e93e9 100644
--- a/arm_compute/runtime/CL/ICLOperator.h
+++ b/arm_compute/runtime/CL/ICLOperator.h
@@ -54,8 +54,8 @@ public:
     ICLOperator &operator=(ICLOperator &&) = default;
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
-    void prepare(OperatorTensorMap constants) override;
+    void run(ITensorPack &tensors) override;
+    void prepare(ITensorPack &constants) override;
     MemoryRequirements workspace() const override;
 
 protected:
diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h
index 4bc8ddf632..0f951c384e 100644
--- a/arm_compute/runtime/CL/ICLTuner.h
+++ b/arm_compute/runtime/CL/ICLTuner.h
@@ -54,10 +54,9 @@ public:
     /** Tune OpenCL kernel dynamically
      *
      * @param[in]      kernel  Kernel to tune
-     * @param[in]      inputs  Inputs for the kernel to use
-     * @param[in, out] outputs Outputs for the kernel to use
+     * @param[in, out] tensors Tensors for the kernel to use
      */
-    virtual void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0;
+    virtual void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) = 0;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_ICLTUNER_H */
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index 99a2053a5a..f535c8ea97 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -144,7 +144,7 @@ public:
     static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     std::vector<std::unique_ptr<ICLKernel>> _concat_kernels;
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 5af24c90ac..2d9d43863d 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -98,7 +98,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -168,7 +168,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -207,7 +207,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -246,7 +246,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -285,7 +285,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -324,7 +324,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -363,7 +363,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 08567cccfb..84743508df 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -64,7 +64,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index ca8d77e6b7..2066012306 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -103,7 +103,7 @@ public:
                            ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
@@ -136,7 +136,7 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     CLFillBorderKernel _border_handler;
diff --git a/arm_compute/runtime/CL/tuners/BifrostTuner.h b/arm_compute/runtime/CL/tuners/BifrostTuner.h
index 830f7d9067..237693fb88 100644
--- a/arm_compute/runtime/CL/tuners/BifrostTuner.h
+++ b/arm_compute/runtime/CL/tuners/BifrostTuner.h
@@ -37,7 +37,7 @@ public:
     // Inherited overriden methods
     void tune_kernel_static(ICLKernel &kernel) override;
     void tune_kernel_dynamic(ICLKernel &kernel) override;
-    void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
+    void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override;
 };
 } // namespace tuners
 } // namespace arm_compute
diff --git a/arm_compute/runtime/CL/tuners/MidgardTuner.h b/arm_compute/runtime/CL/tuners/MidgardTuner.h
index c702e7a2aa..86d46044c2 100644
--- a/arm_compute/runtime/CL/tuners/MidgardTuner.h
+++ b/arm_compute/runtime/CL/tuners/MidgardTuner.h
@@ -37,7 +37,7 @@ public:
     // Inherited overriden methods
     void tune_kernel_static(ICLKernel &kernel) override;
     void tune_kernel_dynamic(ICLKernel &kernel) override;
-    void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
+    void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override;
 };
 } // namespace tuners
 } // namespace arm_compute
diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h
index 9d55ed448e..e8ad427eba 100644
--- a/arm_compute/runtime/CPP/CPPScheduler.h
+++ b/arm_compute/runtime/CPP/CPPScheduler.h
@@ -52,7 +52,7 @@ public:
     void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func) override;
     unsigned int num_threads() const override;
     void schedule(ICPPKernel *kernel, const Hints &hints) override;
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override;
 
 protected:
     /** Will run the workloads in parallel using num_threads
@@ -62,7 +62,7 @@ protected:
     void run_workloads(std::vector<Workload> &workloads) override;
 
 private:
-    void schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs);
+    void schedule_common(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors);
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h
index d72fca4399..e7952bb748 100644
--- a/arm_compute/runtime/IOperator.h
+++ b/arm_compute/runtime/IOperator.h
@@ -40,13 +40,10 @@ public:
     virtual ~IOperator() = default;
     /** Run the kernels contained in the function
      *
-     *
-     * @param[in] inputs    Vector that contains the input tensors.
-     * @param[in] outputs   Vector that contains the output tensors.
-     * @param[in] workspace Vector that contains the workspace tensors.
+     * @param[in] tensors Vector that contains the tensors to operate on.
      *
      */
-    virtual void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) = 0;
+    virtual void run(ITensorPack &tensors) = 0;
     /** Prepare the function for executing
      *
      * Any one off pre-processing step required by the function is handled here
@@ -55,7 +52,7 @@ public:
      *
      * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute
      */
-    virtual void prepare(OperatorTensorMap constants) = 0;
+    virtual void prepare(ITensorPack &constants) = 0;
 
     /** Return the memory requirements required by the workspace
      */
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index fff77274bd..98627538e8 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -168,10 +168,9 @@ public:
      *
      * @param[in] kernel  Kernel to execute.
      * @param[in] hints   Hints for the scheduler.
-     * @param[in] inputs  Vector containing the input tensors.
-     * @param[in] outputs Vector containing the output tensors.
+     * @param[in] tensors Vector containing the tensors to operate on.
      */
-    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0;
+    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) = 0;
 
     /** Execute all the passed workloads
      *
diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h
index f91305543f..415e767eec 100644
--- a/arm_compute/runtime/NEON/INEOperator.h
+++ b/arm_compute/runtime/NEON/INEOperator.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_INEOPERATOR_H
 #define ARM_COMPUTE_INEOPERATOR_H
 
+#include "../../core/ITensor.h"
 #include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IOperator.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
@@ -54,8 +55,8 @@ public:
     INEOperator &operator=(INEOperator &&) = default;
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
-    void prepare(OperatorTensorMap constants) override;
+    void run(ITensorPack &tensors) override;
+    void prepare(ITensorPack &constants) override;
     MemoryRequirements workspace() const override;
 
 protected:
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 73c62330c5..1d703ae729 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -132,7 +132,7 @@ public:
     static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
+    void run(ITensorPack &tensors) override;
 
 private:
     std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h
index 56bd6baaa6..56a31cc076 100644
--- a/arm_compute/runtime/OMP/OMPScheduler.h
+++ b/arm_compute/runtime/OMP/OMPScheduler.h
@@ -63,10 +63,9 @@ public:
      *
      * @param[in] kernel  Kernel to execute.
      * @param[in] hints   Hints for the scheduler.
-     * @param[in] inputs  Vector containing the input tensors.
-     * @param[in] outputs Vector containing the output tensors.
+     * @param[in] tensors Vector containing the tensors to operate on.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override;
 
 protected:
     /** Execute all the passed workloads
diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h
index 42fc742062..d45730e499 100644
--- a/arm_compute/runtime/SingleThreadScheduler.h
+++ b/arm_compute/runtime/SingleThreadScheduler.h
@@ -54,10 +54,9 @@ public:
      *
      * @param[in] kernel  Kernel to execute.
      * @param[in] hints   Hints for the scheduler.
-     * @param[in] inputs  Vector containing the input tensors.
-     * @param[in] outputs Vector containing the output tensors.
+     * @param[in] tensors Vector containing the tensors to operate on.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override;
 
 protected:
     /** Will run the workloads sequentially and in order.
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 66751f7dd3..62cafc5ad1 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -248,13 +248,13 @@ Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensor
     return Status{};
 }
 
-void CLActivationLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
     ARM_COMPUTE_ERROR_ON(_run_in_place && src != dst);
 
     Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
index b3496f7229..feebe01cdb 100644
--- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
@@ -139,14 +139,13 @@ Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i
     return Status{};
 }
 
-void CLBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                           const Window &window, cl::CommandQueue &queue)
+void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     Window slice = window.first_slice_window_3D();
 
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 0bae901baa..5978a0223f 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -123,14 +123,13 @@ Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i
     return Status{};
 }
 
-void CLDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                           const Window &window, cl::CommandQueue &queue)
+void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     Window slice = window.first_slice_window_3D();
 
diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
index 87fafd340c..c8c7fb03b8 100644
--- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
+++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
@@ -119,7 +119,7 @@ Status CLElementWiseUnaryLayerKernel::validate(const ITensorInfo *input, const I
     return Status{};
 }
 
-void CLElementWiseUnaryLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLElementWiseUnaryLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -127,8 +127,8 @@ void CLElementWiseUnaryLayerKernel::run_op(const InputTensorMap &inputs, const O
     Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
     Window slice     = collapsed.first_slice_window_3D();
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     do
     {
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index 7cc6fb38b1..ec33500f20 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -280,14 +280,14 @@ void CLElementwiseOperationKernel::configure_common(const CLCompileContext &comp
     _config_id = generate_id_for_tuning(kernel_name, *input1, *output);
 }
 
-void CLElementwiseOperationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_0));
-    const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_1));
-    auto       dst   = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
+    const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
+    auto       dst   = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     const TensorShape &in_shape1 = src_0->info()->tensor_shape();
     const TensorShape &in_shape2 = src_1->info()->tensor_shape();
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 1fca646129..1ea654b5cc 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -170,17 +170,15 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen
     _config_id += lower_string(string_from_border_mode(border_mode));
 }
 
-void CLFillBorderKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
-    ARM_COMPUTE_UNUSED(outputs);
-
     // Border mode undefined or border width == 0
     if(_kernel() == nullptr)
     {
         return;
     }
 
-    const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
+    const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
 
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index 5ab270cc01..22b2cfcbc5 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -123,14 +123,13 @@ void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_c
     output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
 }
 
-void CLHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                            const Window &window, cl::CommandQueue &queue)
+void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     unsigned int idx = 0;
     add_4D_tensor_argument(idx, src, window);
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 95869f7e96..229937ef31 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -267,14 +267,14 @@ Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, cons
     return Status{};
 }
 
-void CLPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_0));
-    const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_1));
-    auto       dst   = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
+    const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
+    auto       dst   = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     const TensorShape &in_shape1 = src_0->info()->tensor_shape();
     const TensorShape &in_shape2 = src_1->info()->tensor_shape();
@@ -420,14 +420,14 @@ Status CLComplexPixelWiseMultiplicationKernel::validate(const ITensorInfo *input
     return Status{};
 }
 
-void CLComplexPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLComplexPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_0));
-    const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_1));
-    auto       dst   = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
+    const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
+    auto       dst   = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     const TensorShape &in_shape1 = src_0->info()->tensor_shape();
     const TensorShape &in_shape2 = src_1->info()->tensor_shape();
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index 05b6dd5675..3daf21a9a7 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -100,7 +100,7 @@ Status CLReshapeLayerKernel::validate(const ITensorInfo *input, const ITensorInf
     return Status{};
 }
 
-void CLReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLReshapeLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
@@ -108,8 +108,8 @@ void CLReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTens
     Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
     Window slice            = window_collapsed.first_slice_window_3D();
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     // Set inputs
     unsigned int idx = 0;
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index 94cbd43bb1..f7b7290a3f 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -172,13 +172,13 @@ Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf
     return Status{};
 }
 
-void CLStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue)
+void CLStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
     Window slice            = window_collapsed.first_slice_window_4D();
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index 88b69c7cce..76100c2a63 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -147,17 +147,16 @@ void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile
     _config_id += support::cpp11::to_string(input2->dimension(1));
 }
 
-void CLWidthConcatenate2TensorsKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                              const Window &window, cl::CommandQueue &queue)
+void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
     Window slice = window.first_slice_window_4D();
 
-    const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_VEC));
-    const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_VEC + 1));
-    auto       dst  = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC));
+    const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1));
+    auto       dst  = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     do
     {
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index e49bb1c8ce..0377eb76b1 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -202,17 +202,16 @@ void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile
     _config_id += support::cpp11::to_string(input4->dimension(1));
 }
 
-void CLWidthConcatenate4TensorsKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                              const Window &window, cl::CommandQueue &queue)
+void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_VEC));
-    const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_VEC + 1));
-    const auto src2 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_VEC + 2));
-    const auto src3 = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC_VEC + 3));
-    auto       dst  = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC));
+    const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1));
+    const auto src2 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2));
+    const auto src3 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3));
+    auto       dst  = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     Window slice = window.first_slice_window_4D();
 
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index 591c26f877..d40597fbb5 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -122,14 +122,13 @@ void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_co
     output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
 }
 
-void CLWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                           const Window &window, cl::CommandQueue &queue)
+void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
-    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(inputs.at(TensorType::ACL_SRC));
-    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(outputs.at(TensorType::ACL_DST));
+    const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto       dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
 
     unsigned int idx = 0;
     add_4D_tensor_argument(idx, src, window);
diff --git a/src/core/ITensor.cpp b/src/core/ITensor.cpp
index 226e8d5714..e263596333 100644
--- a/src/core/ITensor.cpp
+++ b/src/core/ITensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,8 +30,8 @@
 #include <cstring>
 #include <limits>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void ITensor::copy_from(const ITensor &src)
 {
     if(&src == this)
@@ -64,7 +64,8 @@ void ITensor::copy_from(const ITensor &src)
 
     const size_t line_size = src_info->element_size() * src_info->dimension(0);
 
-    execute_window_loop(win_src, [&](const Coordinates &)
+    execute_window_loop(
+        win_src, [&](const Coordinates &)
     {
         memcpy(dst_it.ptr(), src_it.ptr(), line_size);
     },
@@ -168,3 +169,4 @@ void ITensor::mark_as_unused() const
 {
     _is_used = false;
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/core/ITensorPack.cpp b/src/core/ITensorPack.cpp
new file mode 100644
index 0000000000..7a54a8bc6b
--- /dev/null
+++ b/src/core/ITensorPack.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/ITensorPack.h"
+
+#include "arm_compute/core/ITensor.h"
+
+namespace arm_compute
+{
+void ITensorPack::add_tensor(int id, ITensor *tensor)
+{
+    _pack[id] = PackElement(tensor);
+}
+
+void ITensorPack::add_tensor(int id, const ITensor *tensor)
+{
+    _pack[id] = PackElement(tensor);
+}
+
+const ITensor *ITensorPack::get_const_tensor(int id) const
+{
+    auto it = _pack.find(id);
+    if(it != _pack.end())
+    {
+        return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
+    }
+    return nullptr;
+}
+
+ITensor *ITensorPack::get_tensor(int id)
+{
+    auto it = _pack.find(id);
+    return it != _pack.end() ? it->second.tensor : nullptr;
+}
+
+size_t ITensorPack::size() const
+{
+    return _pack.size();
+}
+
+bool ITensorPack::empty() const
+{
+    return _pack.empty();
+}
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 7023d59763..b15df311cc 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -850,9 +850,7 @@ Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensor
     return Status{};
 }
 
-void NEActivationLayerKernel::run_op(const InputTensorMap &inputs,
-                                     const OutputTensorMap &outputs,
-                                     const Window &window, const ThreadInfo &info)
+void NEActivationLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     // Early exit on disabled activation
     if(!_act_info.enabled())
@@ -865,8 +863,10 @@ void NEActivationLayerKernel::run_op(const InputTensorMap &inputs,
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_func == nullptr);
 
-    ARM_COMPUTE_ERROR_ON(inputs.empty() || outputs.empty());
+    ARM_COMPUTE_ERROR_ON(tensors.empty());
 
-    (this->*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), window);
+    (this->*_func)(tensors.get_const_tensor(TensorType::ACL_SRC),
+                   tensors.get_tensor(TensorType::ACL_DST),
+                   window);
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index 0ad4b3f12e..5f5a3e5b37 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -984,12 +984,16 @@ Status NEArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITe
     return Status{};
 }
 
-void NEArithmeticAdditionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEArithmeticAdditionKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     // Dispatch kernel
-    (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), _policy, window);
+    (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC_0),
+             tensors.get_const_tensor(TensorType::ACL_SRC_1),
+             tensors.get_tensor(TensorType::ACL_DST),
+             _policy,
+             window);
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 572f09eab2..92371936fa 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -807,12 +807,16 @@ Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const
     return Status{};
 }
 
-void NEArithmeticSubtractionKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEArithmeticSubtractionKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     // Dispatch kernel
-    (*_func)(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), window, (_policy == ConvertPolicy::SATURATE));
+    (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC_0),
+             tensors.get_const_tensor(TensorType::ACL_SRC_1),
+             tensors.get_tensor(TensorType::ACL_DST),
+             window,
+             (_policy == ConvertPolicy::SATURATE));
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
index c597afd804..0ee6d0efcf 100644
--- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
@@ -191,14 +191,16 @@ Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i
     return Status{};
 }
 
-void NEBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                           const Window &window, const ThreadInfo &info)
+void NEBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_func == nullptr);
 
-    (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _batch_offset, window);
+    (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC),
+             tensors.get_tensor(TensorType::ACL_DST),
+             _batch_offset,
+             window);
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index 49e10de94e..6926ec1aac 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -189,14 +189,16 @@ Status NEDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i
     return Status{};
 }
 
-void NEDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                           const Window &window, const ThreadInfo &info)
+void NEDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_func == nullptr);
 
-    (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _depth_offset, window);
+    (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC),
+             tensors.get_tensor(TensorType::ACL_DST),
+             _depth_offset,
+             window);
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index 213103a830..014a564bf1 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -1157,13 +1157,15 @@ void NEElementwiseOperationKernel::configure_common(const ITensorInfo *input1, c
     INEKernel::configure(win);
 }
 
-void NEElementwiseOperationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info, window);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_function == nullptr);
-    _function(inputs.at(TensorType::ACL_SRC_0), inputs.at(TensorType::ACL_SRC_1), outputs.at(TensorType::ACL_DST), window);
+    _function(tensors.get_const_tensor(TensorType::ACL_SRC_0),
+              tensors.get_const_tensor(TensorType::ACL_SRC_1),
+              tensors.get_tensor(TensorType::ACL_DST), window);
 }
 
 /** Arithmetic operators (min, max, squared_diff) */
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
index d4043e02b7..8a671bfa23 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
@@ -84,15 +84,14 @@ Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsign
     return Status{};
 }
 
-void NEHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                            const Window &window, const ThreadInfo &info)
+void NEHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    const auto src = inputs.at(TensorType::ACL_SRC);
-    auto       dst = outputs.at(TensorType::ACL_DST);
+    const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
+    auto       dst = tensors.get_tensor(TensorType::ACL_DST);
 
     // Offset output pointer to the correct position
     uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY];
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index b5b4f841b4..907a7f197b 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -1265,15 +1265,15 @@ Status NEPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, cons
     return Status{};
 }
 
-void NEPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    auto input1 = inputs.at(TensorType::ACL_SRC_0);
-    auto input2 = inputs.at(TensorType::ACL_SRC_1);
-    auto output = outputs.at(TensorType::ACL_DST);
+    auto input1 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+    auto input2 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+    auto output = tensors.get_tensor(TensorType::ACL_DST);
 
     if(_func_quantized != nullptr)
     {
@@ -1363,15 +1363,15 @@ Status NEComplexPixelWiseMultiplicationKernel::validate(const ITensorInfo *input
     return Status{};
 }
 
-void NEComplexPixelWiseMultiplicationKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEComplexPixelWiseMultiplicationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    auto input1 = inputs.at(TensorType::ACL_SRC_0);
-    auto input2 = inputs.at(TensorType::ACL_SRC_1);
-    auto output = outputs.at(TensorType::ACL_DST);
+    auto input1 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+    auto input2 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+    auto output = tensors.get_tensor(TensorType::ACL_DST);
 
     Iterator input1_it(input1, window.broadcast_if_dimension_le_one(input1->info()->tensor_shape()));
     Iterator input2_it(input2, window.broadcast_if_dimension_le_one(input2->info()->tensor_shape()));
diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
index f4aeed5e9f..23b349b443 100644
--- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
@@ -86,14 +86,14 @@ void NEReshapeLayerKernel::configure(const ITensorInfo *input, ITensorInfo *outp
     INEKernel::configure(win);
 }
 
-void NEReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEReshapeLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    const auto src = inputs.at(TensorType::ACL_SRC);
-    auto       dst = outputs.at(TensorType::ACL_DST);
+    const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
+    auto       dst = tensors.get_tensor(TensorType::ACL_DST);
 
     switch(src->info()->data_type())
     {
diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
index 3472f7fe15..243a60f249 100644
--- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp
+++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
@@ -166,13 +166,15 @@ Status NEStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInf
     return Status{};
 }
 
-void NEStridedSliceKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
+void NEStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
     // Dispatch kernel
-    strided_slice_generic(inputs.at(TensorType::ACL_SRC_0), outputs.at(TensorType::ACL_DST), _starts_abs, _final_strides, _shrink_mask, window);
+    strided_slice_generic(tensors.get_const_tensor(TensorType::ACL_SRC_0),
+                          tensors.get_tensor(TensorType::ACL_DST),
+                          _starts_abs, _final_strides, _shrink_mask, window);
 }
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index 1b32e3614e..171f5965a5 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -84,15 +84,14 @@ Status NEWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigne
     return Status{};
 }
 
-void NEWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
-                                           const Window &window, const ThreadInfo &info)
+void NEWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    const auto src = inputs.at(TensorType::ACL_SRC);
-    auto       dst = outputs.at(TensorType::ACL_DST);
+    const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
+    auto       dst = tensors.get_tensor(TensorType::ACL_DST);
 
     // Offset output pointer to the correct position
     uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0];
diff --git a/src/runtime/CL/CLOperator.cpp b/src/runtime/CL/CLOperator.cpp
index c41454e933..57a4d0ec57 100644
--- a/src/runtime/CL/CLOperator.cpp
+++ b/src/runtime/CL/CLOperator.cpp
@@ -33,19 +33,17 @@ ICLOperator::ICLOperator(IRuntimeContext *ctx)
 {
 }
 
-void ICLOperator::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void ICLOperator::run(ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(workspace);
-
-    if(inputs.empty())
+    if(tensors.empty())
     {
         ARM_COMPUTE_ERROR("No inputs provided");
     }
 
-    CLScheduler::get().enqueue_op(*_kernel.get(), inputs, outputs, false);
+    CLScheduler::get().enqueue_op(*_kernel.get(), tensors, false);
 }
 
-void ICLOperator::prepare(OperatorTensorMap constants)
+void ICLOperator::prepare(ITensorPack &constants)
 {
     ARM_COMPUTE_UNUSED(constants);
 }
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index 5ef66f456a..ccef5cbd1b 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -151,22 +151,22 @@ void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::De
     _cl_tuner       = cl_tuner;
 }
 
-void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush)
+void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised,
                              "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
                              or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!");
 
-    const bool inject_memory = !inputs.empty();
+    const bool inject_memory = !tensors.empty();
 
     // Tune the kernel if the CLTuner has been provided
     if(_cl_tuner != nullptr)
     {
-        inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, inputs, outputs) : _cl_tuner->tune_kernel_dynamic(kernel);
+        inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, tensors) : _cl_tuner->tune_kernel_dynamic(kernel);
     }
 
     // Run kernel
-    inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
+    inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
 
     if(flush)
     {
@@ -176,11 +176,12 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs
 
 void CLScheduler::enqueue(ICLKernel &kernel, bool flush)
 {
-    enqueue_common(kernel, {}, {}, flush);
+    ITensorPack pack;
+    enqueue_common(kernel, pack, flush);
 }
 
-void CLScheduler::enqueue_op(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush)
+void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush)
 {
-    enqueue_common(kernel, inputs, outputs, flush);
+    enqueue_common(kernel, tensors, flush);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index b2e3476e20..adfe67fb11 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -77,10 +77,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel)
 
 void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
 {
-    tune_kernel_dynamic(kernel, {}, {});
+    ITensorPack pack;
+    tune_kernel_dynamic(kernel, pack);
 }
 
-void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
 {
     // Get the configuration ID from the kernel and append GPU target name and number of available compute units
     const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
@@ -95,7 +96,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &input
             if(_tune_new_kernels)
             {
                 // Find the optimal LWS for the kernel
-                cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs);
+                cl::NDRange opt_lws = find_optimal_lws(kernel, tensors);
 
                 // Insert the optimal LWS in the table
                 add_lws_to_table(config_id, opt_lws);
@@ -117,7 +118,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal
     _lws_table.emplace(kernel_id, optimal_lws);
 }
 
-cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors)
 {
     // Profiling queue
     cl::CommandQueue queue_profiler;
@@ -172,8 +173,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &i
     cl::NDRange gws = ICLKernel::gws_from_window(kernel.window());
 
     // Run the kernel with default lws to be used as baseline
-    const bool inject_memory = !inputs.empty();
-    inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
+    const bool inject_memory = !tensors.empty();
+    inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
 
     queue_profiler.finish();
 
@@ -203,7 +204,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &i
         kernel.set_lws_hint(lws_test);
 
         // Run the kernel
-        inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
+        inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
 
         queue_profiler.finish();
 
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index 784473d426..5ddf227382 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -89,9 +89,9 @@ Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *
 
 void CLActivationLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 1ddda021bc..4214813446 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -220,16 +220,14 @@ Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_
     return Status{};
 }
 
-void CLConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLConcatenation::run(ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(workspace);
-
-    if(inputs.empty() || outputs.empty())
+    if(tensors.empty())
     {
         ARM_COMPUTE_ERROR("No inputs provided");
     }
 
-    if(inputs.size() != _num_inputs)
+    if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
     {
         ARM_COMPUTE_ERROR("Configured with different number of inputs");
     }
@@ -237,15 +235,17 @@ void CLConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, Operat
     if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
     {
         ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
-        CLScheduler::get().enqueue_op(*_concat_kernels.at(0), inputs, outputs, true);
+        CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
     }
     else
     {
         int i = 0;
         for(auto &k : _concat_kernels)
         {
-            const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } };
-            CLScheduler::get().enqueue_op(*k, input, outputs, true);
+            ITensorPack pack;
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
+            pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
+            CLScheduler::get().enqueue_op(*k, pack, true);
             ++i;
         }
     }
@@ -303,13 +303,13 @@ Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inpu
 
 void CLConcatenateLayer::run()
 {
-    InputTensorMap srcs;
+    ITensorPack pack;
     for(unsigned i = 0; i < _impl->num_inputs; ++i)
     {
-        srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)));
+        pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i));
     }
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(srcs, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
index f8e9694b1c..de94255b48 100644
--- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
+++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
@@ -153,10 +153,10 @@ Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu
 
 void CLRsqrtLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLExpLayer::Impl
@@ -195,10 +195,10 @@ Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLExpLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLNegLayer::Impl
@@ -236,10 +236,10 @@ Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLNegLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLSinLayer::Impl
@@ -277,10 +277,10 @@ Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLSinLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLAbsLayer::Impl
@@ -318,10 +318,10 @@ Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLAbsLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLLogLayer::Impl
@@ -359,10 +359,10 @@ Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLLogLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLRoundLayer::Impl
@@ -400,9 +400,9 @@ Status CLRoundLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu
 
 void CLRoundLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp
index e66e4bf526..6f664725c5 100644
--- a/src/runtime/CL/functions/CLElementwiseOperations.cpp
+++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp
@@ -47,19 +47,21 @@ void configure_border_handler(const CLCompileContext &compile_context, CLFillBor
     }
 }
 
-void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs)
+ITensorPack select_border_input(ITensorPack &tensors)
 {
-    if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1)
+    ITensorPack pack;
+    if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
     {
-        if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
+        if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
         }
         else
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
         }
     }
+    return pack;
 }
 } // namespace
 
@@ -83,12 +85,11 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
     return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
 }
 
-void CLArithmeticAddition::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLArithmeticAddition::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLArithmeticSubtraction::CLArithmeticSubtraction()
@@ -110,12 +111,11 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
     return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
 }
 
-void CLArithmeticSubtraction::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLArithmeticSubtraction::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLArithmeticDivision::CLArithmeticDivision()
@@ -136,12 +136,11 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info);
 }
 
-void CLArithmeticDivision::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLArithmeticDivision::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwiseMax::CLElementwiseMax()
@@ -162,12 +161,11 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info);
 }
 
-void CLElementwiseMax::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwiseMax::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwiseMin::CLElementwiseMin()
@@ -188,12 +186,11 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info);
 }
 
-void CLElementwiseMin::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwiseMin::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwiseSquaredDiff::CLElementwiseSquaredDiff()
@@ -214,12 +211,11 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
 }
 
-void CLElementwiseSquaredDiff::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwiseSquaredDiff::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwisePower::CLElementwisePower()
@@ -240,12 +236,11 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info);
 }
 
-void CLElementwisePower::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwisePower::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 } // namespace experimental
 
@@ -287,10 +282,12 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
 
 void CLArithmeticAddition::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLArithmeticSubtraction::Impl
@@ -331,10 +328,12 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
 
 void CLArithmeticSubtraction::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLArithmeticDivision::Impl
@@ -374,10 +373,12 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn
 
 void CLArithmeticDivision::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwiseMax::Impl
@@ -417,10 +418,12 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
 
 void CLElementwiseMax::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwiseMin::Impl
@@ -460,10 +463,12 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
 
 void CLElementwiseMin::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwiseSquaredDiff::Impl
@@ -504,10 +509,12 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
 
 void CLElementwiseSquaredDiff::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwisePower::Impl
@@ -547,9 +554,11 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo
 
 void CLElementwisePower::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp
index fbb466acc8..e03bd13284 100644
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -44,19 +44,22 @@ void configure_border_handler(const CLCompileContext &compile_context, CLFillBor
         }
     }
 }
-void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs)
+
+ITensorPack select_border_input(ITensorPack &tensors)
 {
-    if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1)
+    ITensorPack pack;
+    if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
     {
-        if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
+        if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
         }
         else
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
         }
     }
+    return pack;
 }
 } // namespace
 
@@ -80,12 +83,11 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
 }
 
-void CLPReluLayer::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLPReluLayer::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 } // namespace experimental
 
@@ -126,9 +128,11 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha
 
 void CLPReluLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
index 34e06a3d03..883ce68536 100644
--- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
+++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
@@ -34,19 +34,21 @@ namespace arm_compute
 {
 namespace
 {
-void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs)
+ITensorPack select_border_input(ITensorPack &tensors)
 {
-    if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1)
+    ITensorPack pack;
+    if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
     {
-        if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
+        if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
         }
         else
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
         }
     }
+    return pack;
 }
 } // namespace
 
@@ -81,12 +83,11 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen
     return CLPixelWiseMultiplicationKernel::validate(input1, input2, output, scale, overflow_policy, rounding_policy, act_info);
 }
 
-void CLPixelWiseMultiplication::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLPixelWiseMultiplication::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication()
@@ -116,12 +117,11 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con
     return CLComplexPixelWiseMultiplicationKernel::validate(input1, input2, output, act_info);
 }
 
-void CLComplexPixelWiseMultiplication::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLComplexPixelWiseMultiplication::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 } // namespace experimental
 
@@ -165,10 +165,12 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen
 
 void CLPixelWiseMultiplication::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLComplexPixelWiseMultiplication::Impl
@@ -208,9 +210,11 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con
 
 void CLComplexPixelWiseMultiplication::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLReshapeLayer.cpp b/src/runtime/CL/functions/CLReshapeLayer.cpp
index ac8b176963..273a761a0a 100644
--- a/src/runtime/CL/functions/CLReshapeLayer.cpp
+++ b/src/runtime/CL/functions/CLReshapeLayer.cpp
@@ -84,10 +84,10 @@ Status CLReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out
 
 void CLReshapeLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
 /** [CLReshapeLayer snippet] **/
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp
index 3689707bd0..f36550ba91 100644
--- a/src/runtime/CL/functions/CLSlice.cpp
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -97,8 +97,9 @@ void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor
 
 void CLSlice::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp
index bdef0785ec..b78073dd67 100644
--- a/src/runtime/CL/functions/CLStridedSlice.cpp
+++ b/src/runtime/CL/functions/CLStridedSlice.cpp
@@ -96,9 +96,9 @@ Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *out
 
 void CLStridedSlice::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index 1797c2ceb1..52644bf192 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -316,9 +316,9 @@ void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel)
     ARM_COMPUTE_UNUSED(kernel);
 }
 
-void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(kernel, inputs, outputs);
+    ARM_COMPUTE_UNUSED(kernel, tensors);
 }
 } // namespace tuners
 } // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp
index 68c98cebe7..e49e15508b 100644
--- a/src/runtime/CL/tuners/MidgardTuner.cpp
+++ b/src/runtime/CL/tuners/MidgardTuner.cpp
@@ -74,9 +74,9 @@ void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel)
     ARM_COMPUTE_UNUSED(kernel);
 }
 
-void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(kernel, inputs, outputs);
+    ARM_COMPUTE_UNUSED(kernel, tensors);
 }
 } // namespace tuners
 } // namespace arm_compute
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index b07aa8ce18..55f62c1387 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -405,7 +405,7 @@ void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
 }
 #endif /* DOXYGEN_SKIP_THIS */
 
-void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
 
@@ -464,13 +464,13 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const
         {
             ThreadInfo info;
             info.cpu_info = &_cpu_info;
-            if(inputs.empty())
+            if(tensors.empty())
             {
                 kernel->run(max_window, info);
             }
             else
             {
-                kernel->run_op(inputs, outputs, max_window, info);
+                kernel->run_op(tensors, max_window, info);
             }
         }
         else
@@ -495,18 +495,18 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const
             for(unsigned int t = 0; t < num_windows; t++)
             {
                 //Capture 't' by copy, all the other variables by reference:
-                workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info)
+                workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &tensors](const ThreadInfo & info)
                 {
                     Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
                     win.validate();
 
-                    if(inputs.empty())
+                    if(tensors.empty())
                     {
                         kernel->run(win, info);
                     }
                     else
                     {
-                        kernel->run_op(inputs, outputs, win, info);
+                        kernel->run_op(tensors, win, info);
                     }
                 };
             }
@@ -515,15 +515,14 @@ void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const
     }
 }
 
-void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors)
 {
-    schedule_common(kernel, hints, inputs, outputs);
+    schedule_common(kernel, hints, tensors);
 }
 
 void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
 {
-    const InputTensorMap inputs;
-    OutputTensorMap      outputs;
-    schedule_common(kernel, hints, inputs, outputs);
+    ITensorPack tensors;
+    schedule_common(kernel, hints, tensors);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp
index a672656419..6f67bc005f 100644
--- a/src/runtime/CPP/SingleThreadScheduler.cpp
+++ b/src/runtime/CPP/SingleThreadScheduler.cpp
@@ -49,12 +49,12 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
     kernel->run(kernel->window(), info);
 }
 
-void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors)
 {
     ARM_COMPUTE_UNUSED(hints);
     ThreadInfo info;
     info.cpu_info = &_cpu_info;
-    kernel->run_op(inputs, outputs, kernel->window(), info);
+    kernel->run_op(tensors, kernel->window(), info);
 }
 
 void SingleThreadScheduler::run_workloads(std::vector<Workload> &workloads)
diff --git a/src/runtime/NEON/INEOperator.cpp b/src/runtime/NEON/INEOperator.cpp
index 1d819977c8..75068b15c9 100644
--- a/src/runtime/NEON/INEOperator.cpp
+++ b/src/runtime/NEON/INEOperator.cpp
@@ -33,19 +33,17 @@ INEOperator::INEOperator(IRuntimeContext *ctx)
 {
 }
 
-void INEOperator::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void INEOperator::run(ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(workspace);
-
-    if(inputs.empty() || outputs.empty())
+    if(tensors.empty())
     {
         ARM_COMPUTE_ERROR("No inputs provided");
     }
 
-    NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs);
+    NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, tensors);
 }
 
-void INEOperator::prepare(OperatorTensorMap constants)
+void INEOperator::prepare(ITensorPack &constants)
 {
     ARM_COMPUTE_UNUSED(constants);
 }
diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp
index 0e75e58b3b..7f55edbf70 100644
--- a/src/runtime/NEON/functions/NEActivationLayer.cpp
+++ b/src/runtime/NEON/functions/NEActivationLayer.cpp
@@ -85,9 +85,9 @@ Status NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *
 
 void NEActivationLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
index b18309ef1d..4453a015e8 100644
--- a/src/runtime/NEON/functions/NEArithmeticAddition.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
@@ -79,8 +79,10 @@ void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input
 
 void NEArithmeticAddition::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
index c7f492bcbc..1c95bbfae8 100644
--- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
@@ -80,8 +80,10 @@ void NEArithmeticSubtraction::configure(const ITensor *input1, const ITensor *in
 
 void NEArithmeticSubtraction::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
index 9f8a2a1b8e..8df4f4cb62 100644
--- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp
+++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp
@@ -146,16 +146,14 @@ Status NEConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_
     return Status{};
 }
 
-void NEConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void NEConcatenation::run(ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(workspace);
-
-    if(inputs.empty() || outputs.empty())
+    if(tensors.empty())
     {
         ARM_COMPUTE_ERROR("No inputs provided");
     }
 
-    if(inputs.size() != _num_inputs)
+    if(static_cast<int>(tensors.size() - 1) != static_cast<int>(_num_inputs))
     {
         ARM_COMPUTE_ERROR("Configured with different number of inputs");
     }
@@ -163,8 +161,10 @@ void NEConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, Operat
     int i = 0;
     for(auto &k : _concat_kernels)
     {
-        const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } };
-        NEScheduler::get().schedule_op(k.get(), Window::DimY, input, outputs);
+        ITensorPack pack;
+        pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
+        pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
+        NEScheduler::get().schedule_op(k.get(), Window::DimY, pack);
         ++i;
     }
 }
@@ -216,13 +216,13 @@ Status NEConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inpu
 
 void NEConcatenateLayer::run()
 {
-    InputTensorMap srcs;
+    ITensorPack pack;
     for(unsigned i = 0; i < _impl->num_inputs; ++i)
     {
-        srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)));
+        pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i));
     }
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(srcs, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp
index 9340cc09d4..d1f60c71e1 100644
--- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp
+++ b/src/runtime/NEON/functions/NEElementwiseOperators.cpp
@@ -163,9 +163,11 @@ Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
 
 void NEElementwiseMax::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct NEElementwiseMin::Impl
@@ -202,9 +204,11 @@ Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
 
 void NEElementwiseMin::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct NEElementwiseSquaredDiff::Impl
@@ -241,9 +245,11 @@ Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
 
 void NEElementwiseSquaredDiff::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct NEElementwiseDivision::Impl
@@ -280,9 +286,11 @@ Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorI
 
 void NEElementwiseDivision::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct NEElementwisePower::Impl
@@ -319,9 +327,11 @@ Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo
 
 void NEElementwisePower::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 template <ComparisonOperation COP>
@@ -364,9 +374,11 @@ Status NEElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, c
 template <ComparisonOperation COP>
 void                          NEElementwiseComparisonStatic<COP>::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct NEElementwiseComparison::Impl
@@ -401,9 +413,11 @@ Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITenso
 
 void NEElementwiseComparison::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 // Supported Specializations
diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp
index 15d9fd9959..f9393a4d92 100644
--- a/src/runtime/NEON/functions/NEPReluLayer.cpp
+++ b/src/runtime/NEON/functions/NEPReluLayer.cpp
@@ -71,9 +71,11 @@ void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor
 
 void NEPReluLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 Status NEPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
index ba5dd7cdee..4208878b75 100644
--- a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
+++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
@@ -97,9 +97,11 @@ void NEPixelWiseMultiplication::configure(const ITensor *input1, const ITensor *
 
 void NEPixelWiseMultiplication::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct NEComplexPixelWiseMultiplication::Impl
@@ -134,8 +136,10 @@ void NEComplexPixelWiseMultiplication::configure(ITensor *input1, ITensor *input
 
 void NEComplexPixelWiseMultiplication::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp
index 47d5519274..c1c88c1c7a 100644
--- a/src/runtime/NEON/functions/NEReshapeLayer.cpp
+++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp
@@ -84,9 +84,9 @@ Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out
 
 void NEReshapeLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NESlice.cpp b/src/runtime/NEON/functions/NESlice.cpp
index 7c3252178b..2bacf2ee2a 100644
--- a/src/runtime/NEON/functions/NESlice.cpp
+++ b/src/runtime/NEON/functions/NESlice.cpp
@@ -94,9 +94,10 @@ void NESlice::configure(const ITensor *input, ITensor *output, const Coordinates
 
 void NESlice::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEStridedSlice.cpp b/src/runtime/NEON/functions/NEStridedSlice.cpp
index 37e3590446..8bf81e8270 100644
--- a/src/runtime/NEON/functions/NEStridedSlice.cpp
+++ b/src/runtime/NEON/functions/NEStridedSlice.cpp
@@ -76,9 +76,10 @@ void NEStridedSlice::configure(const ITensor *input, ITensor *output,
 
 void NEStridedSlice::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 Status NEStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *output,
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index 101601868a..11448e595c 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -83,7 +83,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
     }
 }
 
-void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
     ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC,
@@ -97,7 +97,7 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Inp
     {
         ThreadInfo info;
         info.cpu_info = &_cpu_info;
-        kernel->run_op(inputs, outputs, max_window, info);
+        kernel->run_op(tensors, max_window, info);
     }
     else
     {
@@ -106,11 +106,11 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Inp
         for(unsigned int t = 0; t < num_windows; t++)
         {
             //Capture 't' by copy, all the other variables by reference:
-            workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info)
+            workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &tensors](const ThreadInfo & info)
             {
                 Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
                 win.validate();
-                kernel->run_op(inputs, outputs, win, info);
+                kernel->run_op(tensors, win, info);
             };
         }
         run_workloads(workloads);
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
index 75b128591a..aa69bc297d 100644
--- a/tests/framework/instruments/SchedulerTimer.cpp
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -91,10 +91,10 @@ public:
         _kernels.push_back(std::move(info));
     }
 
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, ITensorPack &tensors) override
     {
         _timer.start();
-        _real_scheduler.schedule_op(kernel, hints, inputs, outputs);
+        _real_scheduler.schedule_op(kernel, hints, tensors);
         _timer.stop();
 
         typename SchedulerClock<output_timestamps>::kernel_info info;
-- 
cgit v1.2.1