10 files changed, 102 insertions, 92 deletions
diff --git a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
index 25023ff0a1..3daedd4efb 100644
--- a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
+++ b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,12 +32,31 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-/** Type of memory used by a workload tensor */
+/** Type of memory used by a workload tensor
+ *
+ *  We can classify tensors in 2 dimensions: Topology (where they are in a workload) and Memory allocation:
+ * Topology:
+ *      Argument tensors: "Outer" tensors exposed to the users as inputs and outputs (arguments)
+ *      Intermediate tensors: "Inner" tensors hidden from the users as links between operators
+ * Memory allocation:
+ *      Alloc: Tensors that need to be allocated real backing memory
+ *      No-Alloc: Tensors that don't need to be allocated real backing memory
+ *
+ * We end up with 3 MemoryType based on the product of these two classifications
+ *          |    Argument    |   Intermediate    |
+ * ---------*----------------*-------------------*
+ * Alloc    |     User       |   Auxiliary       |
+ * ---------*----------------*-------------------*
+ * No-Alloc *     N/A        |    Virtual        |
+ * ---------*----------------*-------------------*
+ */
 enum class MemoryType
 {
+    /** Both User and Auxiliary types are of Alloc type. Since they require memory allocation */
     User      = 0, /**< Memory coming directly from users, e.g. for argument tensors */
-    Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for temporary tensors */
-    NoAlloc   = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory */
+    Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for tensors holding temporary results between kernels */
+    /** Virtual type is of No-Alloc type. Since it doesn't require memory allocation */
+    Virtual = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory. It is mainly used at sketch time to link operators; there should be no Virtual tensors at runtime */
 };
 
 /** Memory information for tensors with @ref MemoryType::Auxiliary.
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
index f19ad6dfc5..422edb35f1 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
@@ -70,18 +70,9 @@ public:
     TensorInfo create_tensor_info(Args &&... args)
     {
         auto tensor_info = TensorInfo(std::forward<Args>(args)...);
-        tensor_info.set_id(allocate_new_tensor_id());
+        register_new_tensor(tensor_info);
         return tensor_info;
     }
-    /** Create a @ref TensorInfo associated with the workload sketch by copying from an existing tensor info
-     * @note The newly copied tensor will have a different identity within the workload than the one copied from
-     *       To copy the identity of @p tensor_info as well, use @ref TensorInfo 's copy constructors instead
-     *
-     * @param[in] tensor_info @ref ITensorInfo to copy from
-     *
-     * @return TensorInfo   Newly created tensor info
-     */
-    TensorInfo create_tensor_info(const ITensorInfo &tensor_info);
     /** Create a default @ref TensorInfo associated with the workload sketch
      * It is usually used by user input or output tensors
      *
@@ -90,7 +81,11 @@ public:
     TensorInfo create_tensor_info();
 
 private:
-    ITensorInfo::Id                 allocate_new_tensor_id();
+    /** Register a new tensor by setting a new id to it and register its memory descriptor in the sketch
+     *
+     * @param[in,out] tensor_info @ref ITensorInfo that will be registered
+     */
+    void register_new_tensor(ITensorInfo &tensor_info);
     std::unique_ptr<Implementation> _impl; /**< Internal opaque implementation*/
 };
 
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 954c6c5f1a..5905ba5215 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -56,7 +56,7 @@ TensorInfo::TensorInfo(const ITensorInfo &info)
     _quantization_info             = info.quantization_info();
     _data_layout                   = info.data_layout();
     _are_values_constant           = info.are_values_constant();
-    _id                            = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied
+    _id                            = info.id();
     _lock_paddings                 = info.lock_paddings();
 }
 
@@ -77,7 +77,7 @@ TensorInfo::TensorInfo(const TensorInfo &info)
     _quantization_info             = info.quantization_info();
     _data_layout                   = info.data_layout();
     _are_values_constant           = info.are_values_constant();
-    _id                            = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied
+    _id                            = info.id();
     _lock_paddings                 = false;
 }
 TensorInfo::TensorInfo(Format format)
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index 669913ce30..4cf7a7fece 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,46 +31,6 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-namespace
-{
-/** Automatically create memory descriptors for all tensors in the graph
- *
- * @param[in] tensors @ref ITensorInfo map
- * @param[in] graph   @ref DependencyGraph of which the @p tensors are a part
- *
- * @return MemoryDescriptorMap  An assignment map of @ref MemoryDescriptors for each ITensorInfo in the graph
- */
-MemoryDescriptorMap assign_memory_descriptors(const std::map<ITensorInfo::Id, const ITensorInfo *> tensors, const DependencyGraph &graph)
-{
-    const auto all_tensors = graph.all_tensors();
-    const auto src_tensors = graph.global_src_tensors();
-    const auto dst_tensors = graph.global_dst_tensors();
-    const auto interm_tensors = graph.intermediate_tensors();
-
-    MemoryDescriptorMap mem_map{};
-    for(auto t_id : all_tensors)
-    {
-        const auto &tensor = tensors.at(t_id);
-        // Only global src and dst tensors to the entire component graph are "User" tensors, which are user-specified memories
-        if(is_in(t_id, src_tensors) || is_in(t_id, dst_tensors))
-        {
-            mem_map[t_id] = MemoryDescriptor{ MemoryType::User };
-        }
-        else if(is_in(t_id, interm_tensors))
-        {
-            mem_map[t_id] = MemoryDescriptor { MemoryType::NoAlloc };
-        }
-        else
-        {
-            AuxMemoryInfo aux_mem_info{ tensor->total_size() };
-            mem_map[t_id] = MemoryDescriptor{ MemoryType::Auxiliary, aux_mem_info };
-        }
-    }
-    return mem_map;
-}
-
-} // namespace
-
 std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
 {
     std::vector<DependencyGraph::TensorId> tensor_ids{};
@@ -89,19 +49,16 @@ GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services)
 {
 }
 
-GpuKernelComponentStream GpuKernelComponentGraph::fuse() const
+GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
 {
-    // Obtain memory descriptor map
-    const auto mem_map = assign_memory_descriptors(_tensors, _dependency_graph);
-
     GpuKernelComponentStream stream{ _services, mem_map };
-    const auto op_seq = _dependency_graph.build_operators_sequence();
+    const auto               op_seq = _dependency_graph.build_operators_sequence();
 
     stream.new_component_group();
     for(auto op : op_seq)
     {
         const auto component = _components.at(op.op).get();
-        const auto success = stream.add_component(component);
+        const auto success   = stream.add_component(component);
         ARM_COMPUTE_ERROR_ON(!success);
         ARM_COMPUTE_UNUSED(success);
     }
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index e4f498b130..8314ea0a50 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,8 +89,12 @@ public:
         }
     }
     /** Perform component fusion and serialize the graph into a stream of component groups
+     *
+     * @param[in] mem_map MemoryDescriptorMap for all the tensors in the component graph
+     *
+     * @return GpuKernelComponentStream
      */
-    GpuKernelComponentStream fuse() const;
+    GpuKernelComponentStream fuse(const MemoryDescriptorMap &mem_map) const;
 
 private:
     static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
index ce7cf1e908..33f672071d 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,23 +43,18 @@ const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const
     return _impl->context();
 }
 
-TensorInfo GpuWorkloadSketch::create_tensor_info(const ITensorInfo &tensor_info)
+void GpuWorkloadSketch::register_new_tensor(ITensorInfo &tensor_info)
 {
-    TensorInfo tensor{ tensor_info };
-    tensor.set_id(allocate_new_tensor_id());
-    return tensor;
+    tensor_info.set_id(_impl->allocate_new_tensor_id());
+    // All input output tensors are User tensors that need real backing memory
+    _impl->register_memory_descriptor(tensor_info, MemoryDescriptor{ MemoryType::User });
 }
 
 TensorInfo GpuWorkloadSketch::create_tensor_info()
 {
-    TensorInfo tensor{};
-    tensor.set_id(allocate_new_tensor_id());
-    return tensor;
-}
-
-ITensorInfo::Id GpuWorkloadSketch::allocate_new_tensor_id()
-{
-    return _impl->allocate_new_tensor_id();
+    TensorInfo tensor_info{};
+    register_new_tensor(tensor_info);
+    return tensor_info;
 }
 
 GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index 08796b607b..d5075d5c94 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -24,6 +24,7 @@
 #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
 
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
@@ -52,7 +53,8 @@ public:
           _comp_services{},
           _component_graph{ &_comp_services },
           _operator_group{},
-          _interm_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }
+          _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() },
+          _mem_map{}
     {
     }
     /** Prevent instances of this class from being copy constructed */
@@ -99,18 +101,47 @@ public:
      */
     GpuWorkloadSourceCode generate_source_code() const
     {
-        return component_graph().fuse().write_workload_code();
+        return component_graph().fuse(_mem_map).write_workload_code();
     }
-    /** Create an intermediate tensor info and save it
+    /** Create a virtual (see @ref MemoryType) tensor info and save it
      *
-     * @return ITensorInfo  The created intermediate tensor info object pointer
+     * @return ITensorInfo*  The created virtual tensor info object pointer
      */
-    ITensorInfo *create_intermediate_tensor()
+    ITensorInfo *create_virtual_tensor()
     {
         auto uptr = std::make_unique<TensorInfo>();
-        uptr->set_id(-allocate_new_tensor_id()); // intermediate tensors must have negative id
-        _interm_tensor_info_list.emplace_back(std::move(uptr));
-        return _interm_tensor_info_list.back().get();
+        uptr->set_id(-allocate_new_tensor_id()); // virtual tensors must have negative id
+        register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Virtual });
+        _managed_tensor_info_list.emplace_back(std::move(uptr));
+        return _managed_tensor_info_list.back().get();
+    }
+    /** Create an auxiliary (see @ref MemoryType) tensor info and save it
+     *
+     * @return ITensorInfo*  The created auxiliary tensor info object pointer
+     */
+
+    /** Create an auxiliary (see @ref MemoryType) tensor info and save it
+     *
+     * @param[in] tensor_info @ref ITensorInfo to copy from
+     *
+     * @return ITensorInfo*  The created auxiliary tensor info object pointer
+     */
+    ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info)
+    {
+        auto uptr = std::make_unique<TensorInfo>(tensor_info);
+        uptr->set_id(allocate_new_tensor_id());
+        register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ uptr->total_size() } });
+        _managed_tensor_info_list.emplace_back(std::move(uptr));
+        return _managed_tensor_info_list.back().get();
+    }
+    /** Register memory descriptor of a tensor info
+     *
+     * @param[in] info     @ref ITensorInfo to be registered
+     * @param[in] mem_desc @ref MemoryDescriptor to be registered with @p info
+     */
+    void register_memory_descriptor(const ITensorInfo &info, const MemoryDescriptor &mem_desc)
+    {
+        _mem_map[info.id()] = mem_desc;
     }
 
 private:
@@ -119,7 +150,8 @@ private:
     GpuKernelComponentGraph                  _component_graph;
     GpuOperatorGroup                         _operator_group;
     ITensorInfo::Id                          _next_id{ ITensorInfo::invalid_tensor_id };
-    std::vector<std::unique_ptr<TensorInfo>> _interm_tensor_info_list;
+    std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list;
+    MemoryDescriptorMap                      _mem_map;
 };
 } // namespace dynamic_fusion
 } // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 00fbb730b9..7a8b97957e 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -239,7 +239,7 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
     // Initialize the direct convolution descriptor
     const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
 
-    ITensorInfo *dst = sketch.implementation().create_intermediate_tensor();
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
 
     // Assert validation
     ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, attributes));
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
index cd5487c10b..c906da8199 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
@@ -66,7 +66,7 @@ Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch,
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
-    ARM_COMPUTE_RETURN_ERROR_ON(!is_user_tensor(dst));
+    ARM_COMPUTE_RETURN_ERROR_ON(!is_alloc_tensor(dst));
 
     // Initialize the destination tensor info.
     TensorInfo dst_to_validate = *dst;
diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h
index d317ec7fd6..c9fc2c610f 100644
--- a/src/dynamic_fusion/utils/Utils.h
+++ b/src/dynamic_fusion/utils/Utils.h
@@ -33,21 +33,29 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-inline bool is_user_tensor(const ITensorInfo *tensor_info)
+/** Tensor should have backing memory. @ref MemoryType
+ */
+inline bool is_alloc_tensor(const ITensorInfo *tensor_info)
 {
     return tensor_info->id() > ITensorInfo::invalid_tensor_id;
 }
 
-inline bool is_intermediate_tensor(const ITensorInfo *tensor_info)
+/** Tensor should not have backing memory. @ref MemoryType
+ */
+inline bool is_noalloc_tensor(const ITensorInfo *tensor_info)
 {
     return tensor_info->id() < ITensorInfo::invalid_tensor_id;
 }
 
+/** @ref ITensorInfo has valid id
+ */
 inline bool is_valid_tensor(const ITensorInfo *tensor_info)
 {
     return tensor_info->has_valid_id();
 }
 
+/** @ref ITensorInfo has invalid id
+ */
 inline bool is_invalid_tensor(const ITensorInfo *tensor_info)
 {
     return !is_valid_tensor(tensor_info);