From 0499dff9293a86d3d53f72fed0a38b2823563674 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Fri, 31 Jul 2020 22:21:38 +0100
Subject: COMPMID-3392: Collapse TensorMaps into a single TensorPack

Collapse InputTensorMap and OutputTensorMap to a single TensorPack
mechanism.

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ie2fdfc6b07d84ad589169ec99ca64fcf45a00bec
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/253783
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3641
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
---
 src/runtime/CL/CLOperator.cpp                      |  10 +-
 src/runtime/CL/CLScheduler.cpp                     |  15 +--
 src/runtime/CL/CLTuner.cpp                         |  15 +--
 src/runtime/CL/functions/CLActivationLayer.cpp     |   8 +-
 src/runtime/CL/functions/CLConcatenateLayer.cpp    |  24 ++--
 .../CL/functions/CLElementWiseUnaryLayer.cpp       |  56 ++++-----
 .../CL/functions/CLElementwiseOperations.cpp       | 131 +++++++++++----------
 src/runtime/CL/functions/CLPReluLayer.cpp          |  30 +++--
 .../CL/functions/CLPixelWiseMultiplication.cpp     |  46 ++++----
 src/runtime/CL/functions/CLReshapeLayer.cpp        |   8 +-
 src/runtime/CL/functions/CLSlice.cpp               |   7 +-
 src/runtime/CL/functions/CLStridedSlice.cpp        |   8 +-
 src/runtime/CL/tuners/BifrostTuner.cpp             |   4 +-
 src/runtime/CL/tuners/MidgardTuner.cpp             |   4 +-
 14 files changed, 192 insertions(+), 174 deletions(-)

(limited to 'src/runtime/CL')

diff --git a/src/runtime/CL/CLOperator.cpp b/src/runtime/CL/CLOperator.cpp
index c41454e933..57a4d0ec57 100644
--- a/src/runtime/CL/CLOperator.cpp
+++ b/src/runtime/CL/CLOperator.cpp
@@ -33,19 +33,17 @@ ICLOperator::ICLOperator(IRuntimeContext *ctx)
 {
 }
 
-void ICLOperator::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void ICLOperator::run(ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(workspace);
-
-    if(inputs.empty())
+    if(tensors.empty())
     {
         ARM_COMPUTE_ERROR("No inputs provided");
     }
 
-    CLScheduler::get().enqueue_op(*_kernel.get(), inputs, outputs, false);
+    CLScheduler::get().enqueue_op(*_kernel.get(), tensors, false);
 }
 
-void ICLOperator::prepare(OperatorTensorMap constants)
+void ICLOperator::prepare(ITensorPack &constants)
 {
     ARM_COMPUTE_UNUSED(constants);
 }
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index 5ef66f456a..ccef5cbd1b 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -151,22 +151,22 @@ void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::De
     _cl_tuner       = cl_tuner;
 }
 
-void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush)
+void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised,
                              "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
                              or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!");
 
-    const bool inject_memory = !inputs.empty();
+    const bool inject_memory = !tensors.empty();
 
     // Tune the kernel if the CLTuner has been provided
     if(_cl_tuner != nullptr)
     {
-        inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, inputs, outputs) : _cl_tuner->tune_kernel_dynamic(kernel);
+        inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, tensors) : _cl_tuner->tune_kernel_dynamic(kernel);
     }
 
     // Run kernel
-    inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
+    inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
 
     if(flush)
     {
@@ -176,11 +176,12 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs
 
 void CLScheduler::enqueue(ICLKernel &kernel, bool flush)
 {
-    enqueue_common(kernel, {}, {}, flush);
+    ITensorPack pack;
+    enqueue_common(kernel, pack, flush);
 }
 
-void CLScheduler::enqueue_op(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs, bool flush)
+void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush)
 {
-    enqueue_common(kernel, inputs, outputs, flush);
+    enqueue_common(kernel, tensors, flush);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index b2e3476e20..adfe67fb11 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -77,10 +77,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel)
 
 void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
 {
-    tune_kernel_dynamic(kernel, {}, {});
+    ITensorPack pack;
+    tune_kernel_dynamic(kernel, pack);
 }
 
-void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
 {
     // Get the configuration ID from the kernel and append GPU target name and number of available compute units
     const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
@@ -95,7 +96,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &input
             if(_tune_new_kernels)
             {
                 // Find the optimal LWS for the kernel
-                cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs);
+                cl::NDRange opt_lws = find_optimal_lws(kernel, tensors);
 
                 // Insert the optimal LWS in the table
                 add_lws_to_table(config_id, opt_lws);
@@ -117,7 +118,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal
     _lws_table.emplace(kernel_id, optimal_lws);
 }
 
-cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors)
 {
     // Profiling queue
     cl::CommandQueue queue_profiler;
@@ -172,8 +173,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &i
     cl::NDRange gws = ICLKernel::gws_from_window(kernel.window());
 
     // Run the kernel with default lws to be used as baseline
-    const bool inject_memory = !inputs.empty();
-    inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
+    const bool inject_memory = !tensors.empty();
+    inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
 
     queue_profiler.finish();
 
@@ -203,7 +204,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &i
         kernel.set_lws_hint(lws_test);
 
         // Run the kernel
-        inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
+        inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
 
         queue_profiler.finish();
 
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index 784473d426..5ddf227382 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -89,9 +89,9 @@ Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *
 
 void CLActivationLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 1ddda021bc..4214813446 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -220,16 +220,14 @@ Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_
     return Status{};
 }
 
-void CLConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLConcatenation::run(ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(workspace);
-
-    if(inputs.empty() || outputs.empty())
+    if(tensors.empty())
     {
         ARM_COMPUTE_ERROR("No inputs provided");
     }
 
-    if(inputs.size() != _num_inputs)
+    if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
     {
         ARM_COMPUTE_ERROR("Configured with different number of inputs");
     }
@@ -237,15 +235,17 @@ void CLConcatenation::run(InputTensorMap inputs, OutputTensorMap outputs, Operat
     if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
     {
         ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
-        CLScheduler::get().enqueue_op(*_concat_kernels.at(0), inputs, outputs, true);
+        CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
     }
     else
     {
         int i = 0;
         for(auto &k : _concat_kernels)
         {
-            const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } };
-            CLScheduler::get().enqueue_op(*k, input, outputs, true);
+            ITensorPack pack;
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
+            pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
+            CLScheduler::get().enqueue_op(*k, pack, true);
             ++i;
         }
     }
@@ -303,13 +303,13 @@ Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inpu
 
 void CLConcatenateLayer::run()
 {
-    InputTensorMap srcs;
+    ITensorPack pack;
     for(unsigned i = 0; i < _impl->num_inputs; ++i)
     {
-        srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)));
+        pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i));
     }
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(srcs, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
index f8e9694b1c..de94255b48 100644
--- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
+++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
@@ -153,10 +153,10 @@ Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu
 
 void CLRsqrtLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLExpLayer::Impl
@@ -195,10 +195,10 @@ Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLExpLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLNegLayer::Impl
@@ -236,10 +236,10 @@ Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLNegLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLSinLayer::Impl
@@ -277,10 +277,10 @@ Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLSinLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLAbsLayer::Impl
@@ -318,10 +318,10 @@ Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLAbsLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLLogLayer::Impl
@@ -359,10 +359,10 @@ Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 
 void CLLogLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 
 struct CLRoundLayer::Impl
@@ -400,9 +400,9 @@ Status CLRoundLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu
 
 void CLRoundLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp
index e66e4bf526..6f664725c5 100644
--- a/src/runtime/CL/functions/CLElementwiseOperations.cpp
+++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp
@@ -47,19 +47,21 @@ void configure_border_handler(const CLCompileContext &compile_context, CLFillBor
     }
 }
 
-void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs)
+ITensorPack select_border_input(ITensorPack &tensors)
 {
-    if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1)
+    ITensorPack pack;
+    if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
     {
-        if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
+        if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
         }
         else
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
         }
     }
+    return pack;
 }
 } // namespace
 
@@ -83,12 +85,11 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
     return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
 }
 
-void CLArithmeticAddition::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLArithmeticAddition::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLArithmeticSubtraction::CLArithmeticSubtraction()
@@ -110,12 +111,11 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
     return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
 }
 
-void CLArithmeticSubtraction::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLArithmeticSubtraction::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLArithmeticDivision::CLArithmeticDivision()
@@ -136,12 +136,11 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info);
 }
 
-void CLArithmeticDivision::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLArithmeticDivision::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwiseMax::CLElementwiseMax()
@@ -162,12 +161,11 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info);
 }
 
-void CLElementwiseMax::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwiseMax::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwiseMin::CLElementwiseMin()
@@ -188,12 +186,11 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info);
 }
 
-void CLElementwiseMin::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwiseMin::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwiseSquaredDiff::CLElementwiseSquaredDiff()
@@ -214,12 +211,11 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
 }
 
-void CLElementwiseSquaredDiff::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwiseSquaredDiff::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLElementwisePower::CLElementwisePower()
@@ -240,12 +236,11 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info);
 }
 
-void CLElementwisePower::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLElementwisePower::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 } // namespace experimental
 
@@ -287,10 +282,12 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
 
 void CLArithmeticAddition::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLArithmeticSubtraction::Impl
@@ -331,10 +328,12 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
 
 void CLArithmeticSubtraction::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLArithmeticDivision::Impl
@@ -374,10 +373,12 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn
 
 void CLArithmeticDivision::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwiseMax::Impl
@@ -417,10 +418,12 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
 
 void CLElementwiseMax::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwiseMin::Impl
@@ -460,10 +463,12 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
 
 void CLElementwiseMin::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwiseSquaredDiff::Impl
@@ -504,10 +509,12 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
 
 void CLElementwiseSquaredDiff::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLElementwisePower::Impl
@@ -547,9 +554,11 @@ Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo
 
 void CLElementwisePower::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp
index fbb466acc8..e03bd13284 100644
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -44,19 +44,22 @@ void configure_border_handler(const CLCompileContext &compile_context, CLFillBor
         }
     }
 }
-void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs)
+
+ITensorPack select_border_input(ITensorPack &tensors)
 {
-    if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1)
+    ITensorPack pack;
+    if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
     {
-        if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
+        if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
         }
         else
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
         }
     }
+    return pack;
 }
 } // namespace
 
@@ -80,12 +83,11 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha
     return CLArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
 }
 
-void CLPReluLayer::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLPReluLayer::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 } // namespace experimental
 
@@ -126,9 +128,11 @@ Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha
 
 void CLPReluLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
index 34e06a3d03..883ce68536 100644
--- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
+++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
@@ -34,19 +34,21 @@ namespace arm_compute
 {
 namespace
 {
-void select_border_input(InputTensorMap &tensor_map, InputTensorMap &inputs, OutputTensorMap &outputs)
+ITensorPack select_border_input(ITensorPack &tensors)
 {
-    if(outputs.at(TensorType::ACL_DST)->info()->dimension(0) > 1)
+    ITensorPack pack;
+    if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
     {
-        if(inputs.at(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
+        if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_1);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
         }
         else
         {
-            tensor_map[TensorType::ACL_SRC] = inputs.at(TensorType::ACL_SRC_0);
+            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
         }
     }
+    return pack;
 }
 } // namespace
 
@@ -81,12 +83,11 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen
     return CLPixelWiseMultiplicationKernel::validate(input1, input2, output, scale, overflow_policy, rounding_policy, act_info);
 }
 
-void CLPixelWiseMultiplication::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLPixelWiseMultiplication::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 
 CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication()
@@ -116,12 +117,11 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con
     return CLComplexPixelWiseMultiplicationKernel::validate(input1, input2, output, act_info);
 }
 
-void CLComplexPixelWiseMultiplication::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace)
+void CLComplexPixelWiseMultiplication::run(ITensorPack &tensors)
 {
-    InputTensorMap src;
-    select_border_input(src, inputs, outputs);
-    CLScheduler::get().enqueue_op(_border_handler, src, {});
-    ICLOperator::run(inputs, outputs, workspace);
+    auto border_pack = select_border_input(tensors);
+    CLScheduler::get().enqueue_op(_border_handler, border_pack);
+    ICLOperator::run(tensors);
 }
 } // namespace experimental
 
@@ -165,10 +165,12 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen
 
 void CLPixelWiseMultiplication::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 
 struct CLComplexPixelWiseMultiplication::Impl
@@ -208,9 +210,11 @@ Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, con
 
 void CLComplexPixelWiseMultiplication::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC_0, _impl->src_0 }, { TensorType::ACL_SRC_1, _impl->src_1 } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
 
-    _impl->op->run(src, dst, {});
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLReshapeLayer.cpp b/src/runtime/CL/functions/CLReshapeLayer.cpp
index ac8b176963..273a761a0a 100644
--- a/src/runtime/CL/functions/CLReshapeLayer.cpp
+++ b/src/runtime/CL/functions/CLReshapeLayer.cpp
@@ -84,10 +84,10 @@ Status CLReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out
 
 void CLReshapeLayer::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
 /** [CLReshapeLayer snippet] **/
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp
index 3689707bd0..f36550ba91 100644
--- a/src/runtime/CL/functions/CLSlice.cpp
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -97,8 +97,9 @@ void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor
 
 void CLSlice::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp
index bdef0785ec..b78073dd67 100644
--- a/src/runtime/CL/functions/CLStridedSlice.cpp
+++ b/src/runtime/CL/functions/CLStridedSlice.cpp
@@ -96,9 +96,9 @@ Status CLStridedSlice::validate(const ITensorInfo *input, const ITensorInfo *out
 
 void CLStridedSlice::run()
 {
-    const InputTensorMap  src{ { TensorType::ACL_SRC, _impl->src } };
-    const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } };
-
-    _impl->op->run(src, dst, {});
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index 1797c2ceb1..52644bf192 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -316,9 +316,9 @@ void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel)
     ARM_COMPUTE_UNUSED(kernel);
 }
 
-void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(kernel, inputs, outputs);
+    ARM_COMPUTE_UNUSED(kernel, tensors);
 }
 } // namespace tuners
 } // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp
index 68c98cebe7..e49e15508b 100644
--- a/src/runtime/CL/tuners/MidgardTuner.cpp
+++ b/src/runtime/CL/tuners/MidgardTuner.cpp
@@ -74,9 +74,9 @@ void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel)
     ARM_COMPUTE_UNUSED(kernel);
 }
 
-void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
 {
-    ARM_COMPUTE_UNUSED(kernel, inputs, outputs);
+    ARM_COMPUTE_UNUSED(kernel, tensors);
 }
 } // namespace tuners
 } // namespace arm_compute
-- 
cgit v1.2.1