aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h27
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h17
-rw-r--r--src/core/TensorInfo.cpp4
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp51
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h50
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp2
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp2
-rw-r--r--src/dynamic_fusion/utils/Utils.h12
10 files changed, 102 insertions, 92 deletions
diff --git a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
index 25023ff0a1..3daedd4efb 100644
--- a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
+++ b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,12 +32,31 @@ namespace experimental
{
namespace dynamic_fusion
{
-/** Type of memory used by a workload tensor */
+/** Type of memory used by a workload tensor
+ *
+ * We can classify tensors in 2 dimensions: Topology (where they are in a workload) and Memory allocation:
+ * Topology:
+ * Argument tensors: "Outer" tensors exposed to the users as inputs and outputs (arguments)
+ * Intermediate tensors: "Inner" tensors hidden from the users as links between operators
+ * Memory allocation:
+ * Alloc: Tensors that need to be allocated real backing memory
+ * No-Alloc: Tensors that don't need to be allocated real backing memory
+ *
+ * We end up with 3 MemoryType based on the product of these two classifications
+ * | Argument | Intermediate |
+ * ---------*----------------*-------------------*
+ * Alloc | User | Auxiliary |
+ * ---------*----------------*-------------------*
+ * No-Alloc * N/A | Virtual |
+ * ---------*----------------*-------------------*
+ */
enum class MemoryType
{
+ /** Both User and Auxiliary types are of Alloc type. Since they require memory allocation */
User = 0, /**< Memory coming directly from users, e.g. for argument tensors */
- Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for temporary tensors */
- NoAlloc = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory */
+ Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for tensors holding temporary results between kernels */
+ /** Virtual type is of No-Alloc type. Since it doesn't require memory allocation */
+ Virtual = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory. It is mainly used at sketch time to link operators; there should be no Virtual tensors at runtime */
};
/** Memory information for tensors with @ref MemoryType::Auxiliary.
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
index f19ad6dfc5..422edb35f1 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
@@ -70,18 +70,9 @@ public:
TensorInfo create_tensor_info(Args &&... args)
{
auto tensor_info = TensorInfo(std::forward<Args>(args)...);
- tensor_info.set_id(allocate_new_tensor_id());
+ register_new_tensor(tensor_info);
return tensor_info;
}
- /** Create a @ref TensorInfo associated with the workload sketch by copying from an existing tensor info
- * @note The newly copied tensor will have a different identity within the workload than the one copied from
- * To copy the identity of @p tensor_info as well, use @ref TensorInfo 's copy constructors instead
- *
- * @param[in] tensor_info @ref ITensorInfo to copy from
- *
- * @return TensorInfo Newly created tensor info
- */
- TensorInfo create_tensor_info(const ITensorInfo &tensor_info);
/** Create a default @ref TensorInfo associated with the workload sketch
* It is usually used by user input or output tensors
*
@@ -90,7 +81,11 @@ public:
TensorInfo create_tensor_info();
private:
- ITensorInfo::Id allocate_new_tensor_id();
+ /** Register a new tensor by setting a new id to it and register its memory descriptor in the sketch
+ *
+ * @param[in,out] tensor_info @ref ITensorInfo that will be registered
+ */
+ void register_new_tensor(ITensorInfo &tensor_info);
std::unique_ptr<Implementation> _impl; /**< Internal opaque implementation*/
};
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 954c6c5f1a..5905ba5215 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -56,7 +56,7 @@ TensorInfo::TensorInfo(const ITensorInfo &info)
_quantization_info = info.quantization_info();
_data_layout = info.data_layout();
_are_values_constant = info.are_values_constant();
- _id = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied
+ _id = info.id();
_lock_paddings = info.lock_paddings();
}
@@ -77,7 +77,7 @@ TensorInfo::TensorInfo(const TensorInfo &info)
_quantization_info = info.quantization_info();
_data_layout = info.data_layout();
_are_values_constant = info.are_values_constant();
- _id = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied
+ _id = info.id();
_lock_paddings = false;
}
TensorInfo::TensorInfo(Format format)
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index 669913ce30..4cf7a7fece 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,46 +31,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-namespace
-{
-/** Automatically create memory descriptors for all tensors in the graph
- *
- * @param[in] tensors @ref ITensorInfo map
- * @param[in] graph @ref DependencyGraph of which the @p tensors are a part
- *
- * @return MemoryDescriptorMap An assignment map of @ref MemoryDescriptors for each ITensorInfo in the graph
- */
-MemoryDescriptorMap assign_memory_descriptors(const std::map<ITensorInfo::Id, const ITensorInfo *> tensors, const DependencyGraph &graph)
-{
- const auto all_tensors = graph.all_tensors();
- const auto src_tensors = graph.global_src_tensors();
- const auto dst_tensors = graph.global_dst_tensors();
- const auto interm_tensors = graph.intermediate_tensors();
-
- MemoryDescriptorMap mem_map{};
- for(auto t_id : all_tensors)
- {
- const auto &tensor = tensors.at(t_id);
- // Only global src and dst tensors to the entire component graph are "User" tensors, which are user-specified memories
- if(is_in(t_id, src_tensors) || is_in(t_id, dst_tensors))
- {
- mem_map[t_id] = MemoryDescriptor{ MemoryType::User };
- }
- else if(is_in(t_id, interm_tensors))
- {
- mem_map[t_id] = MemoryDescriptor { MemoryType::NoAlloc };
- }
- else
- {
- AuxMemoryInfo aux_mem_info{ tensor->total_size() };
- mem_map[t_id] = MemoryDescriptor{ MemoryType::Auxiliary, aux_mem_info };
- }
- }
- return mem_map;
-}
-
-} // namespace
-
std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
{
std::vector<DependencyGraph::TensorId> tensor_ids{};
@@ -89,19 +49,16 @@ GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services)
{
}
-GpuKernelComponentStream GpuKernelComponentGraph::fuse() const
+GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
{
- // Obtain memory descriptor map
- const auto mem_map = assign_memory_descriptors(_tensors, _dependency_graph);
-
GpuKernelComponentStream stream{ _services, mem_map };
- const auto op_seq = _dependency_graph.build_operators_sequence();
+ const auto op_seq = _dependency_graph.build_operators_sequence();
stream.new_component_group();
for(auto op : op_seq)
{
const auto component = _components.at(op.op).get();
- const auto success = stream.add_component(component);
+ const auto success = stream.add_component(component);
ARM_COMPUTE_ERROR_ON(!success);
ARM_COMPUTE_UNUSED(success);
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index e4f498b130..8314ea0a50 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -89,8 +89,12 @@ public:
}
}
/** Perform component fusion and serialize the graph into a stream of component groups
+ *
+ * @param[in] mem_map MemoryDescriptorMap for all the tensors in the component graph
+ *
+ * @return GpuKernelComponentStream
*/
- GpuKernelComponentStream fuse() const;
+ GpuKernelComponentStream fuse(const MemoryDescriptorMap &mem_map) const;
private:
static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
index ce7cf1e908..33f672071d 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,23 +43,18 @@ const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const
return _impl->context();
}
-TensorInfo GpuWorkloadSketch::create_tensor_info(const ITensorInfo &tensor_info)
+void GpuWorkloadSketch::register_new_tensor(ITensorInfo &tensor_info)
{
- TensorInfo tensor{ tensor_info };
- tensor.set_id(allocate_new_tensor_id());
- return tensor;
+ tensor_info.set_id(_impl->allocate_new_tensor_id());
+ // All input output tensors are User tensors that need real backing memory
+ _impl->register_memory_descriptor(tensor_info, MemoryDescriptor{ MemoryType::User });
}
TensorInfo GpuWorkloadSketch::create_tensor_info()
{
- TensorInfo tensor{};
- tensor.set_id(allocate_new_tensor_id());
- return tensor;
-}
-
-ITensorInfo::Id GpuWorkloadSketch::allocate_new_tensor_id()
-{
- return _impl->allocate_new_tensor_id();
+ TensorInfo tensor_info{};
+ register_new_tensor(tensor_info);
+ return tensor_info;
}
GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index 08796b607b..d5075d5c94 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -24,6 +24,7 @@
#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
@@ -52,7 +53,8 @@ public:
_comp_services{},
_component_graph{ &_comp_services },
_operator_group{},
- _interm_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }
+ _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() },
+ _mem_map{}
{
}
/** Prevent instances of this class from being copy constructed */
@@ -99,18 +101,47 @@ public:
*/
GpuWorkloadSourceCode generate_source_code() const
{
- return component_graph().fuse().write_workload_code();
+ return component_graph().fuse(_mem_map).write_workload_code();
}
- /** Create an intermediate tensor info and save it
+ /** Create a virtual (see @ref MemoryType) tensor info and save it
*
- * @return ITensorInfo The created intermediate tensor info object pointer
+ * @return ITensorInfo* The created virtual tensor info object pointer
*/
- ITensorInfo *create_intermediate_tensor()
+ ITensorInfo *create_virtual_tensor()
{
auto uptr = std::make_unique<TensorInfo>();
- uptr->set_id(-allocate_new_tensor_id()); // intermediate tensors must have negative id
- _interm_tensor_info_list.emplace_back(std::move(uptr));
- return _interm_tensor_info_list.back().get();
+ uptr->set_id(-allocate_new_tensor_id()); // virtual tensors must have negative id
+ register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Virtual });
+ _managed_tensor_info_list.emplace_back(std::move(uptr));
+ return _managed_tensor_info_list.back().get();
+ }
+ /** Create an auxiliary (see @ref MemoryType) tensor info and save it
+ *
+ * @return ITensorInfo* The created auxiliary tensor info object pointer
+ */
+
+ /** Create an auxiliary (see @ref MemoryType) tensor info and save it
+ *
+ * @param[in] tensor_info @ref ITensorInfo to copy from
+ *
+ * @return ITensorInfo* The created auxiliary tensor info object pointer
+ */
+ ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info)
+ {
+ auto uptr = std::make_unique<TensorInfo>(tensor_info);
+ uptr->set_id(allocate_new_tensor_id());
+ register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ uptr->total_size() } });
+ _managed_tensor_info_list.emplace_back(std::move(uptr));
+ return _managed_tensor_info_list.back().get();
+ }
+ /** Register memory descriptor of a tensor info
+ *
+ * @param[in] info @ref ITensorInfo to be registered
+ * @param[in] mem_desc @ref MemoryDescriptor to be registered with @p info
+ */
+ void register_memory_descriptor(const ITensorInfo &info, const MemoryDescriptor &mem_desc)
+ {
+ _mem_map[info.id()] = mem_desc;
}
private:
@@ -119,7 +150,8 @@ private:
GpuKernelComponentGraph _component_graph;
GpuOperatorGroup _operator_group;
ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id };
- std::vector<std::unique_ptr<TensorInfo>> _interm_tensor_info_list;
+ std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list;
+ MemoryDescriptorMap _mem_map;
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 00fbb730b9..7a8b97957e 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -239,7 +239,7 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch,
// Initialize the direct convolution descriptor
const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
- ITensorInfo *dst = sketch.implementation().create_intermediate_tensor();
+ ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
// Assert validation
ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, attributes));
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
index cd5487c10b..c906da8199 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
@@ -66,7 +66,7 @@ Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch,
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
- ARM_COMPUTE_RETURN_ERROR_ON(!is_user_tensor(dst));
+ ARM_COMPUTE_RETURN_ERROR_ON(!is_alloc_tensor(dst));
// Initialize the destination tensor info.
TensorInfo dst_to_validate = *dst;
diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h
index d317ec7fd6..c9fc2c610f 100644
--- a/src/dynamic_fusion/utils/Utils.h
+++ b/src/dynamic_fusion/utils/Utils.h
@@ -33,21 +33,29 @@ namespace experimental
{
namespace dynamic_fusion
{
-inline bool is_user_tensor(const ITensorInfo *tensor_info)
+/** Tensor should have backing memory. @ref MemoryType
+ */
+inline bool is_alloc_tensor(const ITensorInfo *tensor_info)
{
return tensor_info->id() > ITensorInfo::invalid_tensor_id;
}
-inline bool is_intermediate_tensor(const ITensorInfo *tensor_info)
+/** Tensor should not have backing memory. @ref MemoryType
+ */
+inline bool is_noalloc_tensor(const ITensorInfo *tensor_info)
{
return tensor_info->id() < ITensorInfo::invalid_tensor_id;
}
+/** @ref ITensorInfo has valid id
+ */
inline bool is_valid_tensor(const ITensorInfo *tensor_info)
{
return tensor_info->has_valid_id();
}
+/** @ref ITensorInfo has invalid id
+ */
inline bool is_invalid_tensor(const ITensorInfo *tensor_info)
{
return !is_valid_tensor(tensor_info);