diff options
-rw-r--r-- | arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h | 27 | ||||
-rw-r--r-- | arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h | 17 | ||||
-rw-r--r-- | src/core/TensorInfo.cpp | 4 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp | 51 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h | 8 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp | 21 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h | 50 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp | 2 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp | 2 | ||||
-rw-r--r-- | src/dynamic_fusion/utils/Utils.h | 12 |
10 files changed, 102 insertions, 92 deletions
diff --git a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h index 25023ff0a1..3daedd4efb 100644 --- a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h +++ b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,12 +32,31 @@ namespace experimental { namespace dynamic_fusion { -/** Type of memory used by a workload tensor */ +/** Type of memory used by a workload tensor + * + * We can classify tensors in 2 dimensions: Topology (where they are in a workload) and Memory allocation: + * Topology: + * Argument tensors: "Outer" tensors exposed to the users as inputs and outputs (arguments) + * Intermediate tensors: "Inner" tensors hidden from the users as links between operators + * Memory allocation: + * Alloc: Tensors that need to be allocated real backing memory + * No-Alloc: Tensors that don't need to be allocated real backing memory + * + * We end up with 3 MemoryType based on the product of these two classifications + * | Argument | Intermediate | + * ---------*----------------*-------------------* + * Alloc | User | Auxiliary | + * ---------*----------------*-------------------* + * No-Alloc * N/A | Virtual | + * ---------*----------------*-------------------* + */ enum class MemoryType { + /** Both User and Auxiliary types are of Alloc type. Since they require memory allocation */ User = 0, /**< Memory coming directly from users, e.g. for argument tensors */ - Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for temporary tensors */ - NoAlloc = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory */ + Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for tensors holding temporary results between kernels */ + /** Virtual type is of No-Alloc type. Since it doesn't require memory allocation */ + Virtual = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory. It is mainly used at sketch time to link operators; there should be no Virtual tensors at runtime */ }; /** Memory information for tensors with @ref MemoryType::Auxiliary. diff --git a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h index f19ad6dfc5..422edb35f1 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h @@ -70,18 +70,9 @@ public: TensorInfo create_tensor_info(Args &&... args) { auto tensor_info = TensorInfo(std::forward<Args>(args)...); - tensor_info.set_id(allocate_new_tensor_id()); + register_new_tensor(tensor_info); return tensor_info; } - /** Create a @ref TensorInfo associated with the workload sketch by copying from an existing tensor info - * @note The newly copied tensor will have a different identity within the workload than the one copied from - * To copy the identity of @p tensor_info as well, use @ref TensorInfo 's copy constructors instead - * - * @param[in] tensor_info @ref ITensorInfo to copy from - * - * @return TensorInfo Newly created tensor info - */ - TensorInfo create_tensor_info(const ITensorInfo &tensor_info); /** Create a default @ref TensorInfo associated with the workload sketch * It is usually used by user input or output tensors * @@ -90,7 +81,11 @@ public: TensorInfo create_tensor_info(); private: - ITensorInfo::Id allocate_new_tensor_id(); + /** Register a new tensor by setting a new id to it and register its memory descriptor in the sketch + * + * @param[in,out] tensor_info @ref ITensorInfo that will be registered + */ + void register_new_tensor(ITensorInfo &tensor_info); std::unique_ptr<Implementation> _impl; /**< Internal opaque implementation*/ }; diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp index 954c6c5f1a..5905ba5215 100644 --- a/src/core/TensorInfo.cpp +++ b/src/core/TensorInfo.cpp @@ -56,7 +56,7 @@ TensorInfo::TensorInfo(const ITensorInfo &info) _quantization_info = info.quantization_info(); _data_layout = info.data_layout(); _are_values_constant = info.are_values_constant(); - _id = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied + _id = info.id(); _lock_paddings = info.lock_paddings(); } @@ -77,7 +77,7 @@ TensorInfo::TensorInfo(const TensorInfo &info) _quantization_info = info.quantization_info(); _data_layout = info.data_layout(); _are_values_constant = info.are_values_constant(); - _id = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied + _id = info.id(); _lock_paddings = false; } TensorInfo::TensorInfo(Format format) diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp index 669913ce30..4cf7a7fece 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,46 +31,6 @@ namespace experimental { namespace dynamic_fusion { -namespace -{ -/** Automatically create memory descriptors for all tensors in the graph - * - * @param[in] tensors @ref ITensorInfo map - * @param[in] graph @ref DependencyGraph of which the @p tensors are a part - * - * @return MemoryDescriptorMap An assignment map of @ref MemoryDescriptors for each ITensorInfo in the graph - */ -MemoryDescriptorMap assign_memory_descriptors(const std::map<ITensorInfo::Id, const ITensorInfo *> tensors, const DependencyGraph &graph) -{ - const auto all_tensors = graph.all_tensors(); - const auto src_tensors = graph.global_src_tensors(); - const auto dst_tensors = graph.global_dst_tensors(); - const auto interm_tensors = graph.intermediate_tensors(); - - MemoryDescriptorMap mem_map{}; - for(auto t_id : all_tensors) - { - const auto &tensor = tensors.at(t_id); - // Only global src and dst tensors to the entire component graph are "User" tensors, which are user-specified memories - if(is_in(t_id, src_tensors) || is_in(t_id, dst_tensors)) - { - mem_map[t_id] = MemoryDescriptor{ MemoryType::User }; - } - else if(is_in(t_id, interm_tensors)) - { - mem_map[t_id] = MemoryDescriptor { MemoryType::NoAlloc }; - } - else - { - AuxMemoryInfo aux_mem_info{ tensor->total_size() }; - mem_map[t_id] = MemoryDescriptor{ MemoryType::Auxiliary, aux_mem_info }; - } - } - return mem_map; -} - -} // namespace - std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors) { std::vector<DependencyGraph::TensorId> tensor_ids{}; @@ -89,19 +49,16 @@ GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services) { } -GpuKernelComponentStream GpuKernelComponentGraph::fuse() const +GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const { - // Obtain memory descriptor map - const auto mem_map = assign_memory_descriptors(_tensors, _dependency_graph); - GpuKernelComponentStream stream{ _services, mem_map }; - const auto op_seq = _dependency_graph.build_operators_sequence(); + const auto op_seq = _dependency_graph.build_operators_sequence(); stream.new_component_group(); for(auto op : op_seq) { const auto component = _components.at(op.op).get(); - const auto success = stream.add_component(component); + const auto success = stream.add_component(component); ARM_COMPUTE_ERROR_ON(!success); ARM_COMPUTE_UNUSED(success); } diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h index e4f498b130..8314ea0a50 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -89,8 +89,12 @@ public: } } /** Perform component fusion and serialize the graph into a stream of component groups + * + * @param[in] mem_map MemoryDescriptorMap for all the tensors in the component graph + * + * @return GpuKernelComponentStream */ - GpuKernelComponentStream fuse() const; + GpuKernelComponentStream fuse(const MemoryDescriptorMap &mem_map) const; private: static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors); diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp index ce7cf1e908..33f672071d 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,23 +43,18 @@ const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const return _impl->context(); } -TensorInfo GpuWorkloadSketch::create_tensor_info(const ITensorInfo &tensor_info) +void GpuWorkloadSketch::register_new_tensor(ITensorInfo &tensor_info) { - TensorInfo tensor{ tensor_info }; - tensor.set_id(allocate_new_tensor_id()); - return tensor; + tensor_info.set_id(_impl->allocate_new_tensor_id()); + // All input output tensors are User tensors that need real backing memory + _impl->register_memory_descriptor(tensor_info, MemoryDescriptor{ MemoryType::User }); } TensorInfo GpuWorkloadSketch::create_tensor_info() { - TensorInfo tensor{}; - tensor.set_id(allocate_new_tensor_id()); - return tensor; -} - -ITensorInfo::Id GpuWorkloadSketch::allocate_new_tensor_id() -{ - return _impl->allocate_new_tensor_id(); + TensorInfo tensor_info{}; + register_new_tensor(tensor_info); + return tensor_info; } GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation() diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h index 08796b607b..d5075d5c94 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h @@ -24,6 +24,7 @@ #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL +#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h" @@ -52,7 +53,8 @@ public: _comp_services{}, _component_graph{ &_comp_services }, _operator_group{}, - _interm_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() } + _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }, + _mem_map{} { } /** Prevent instances of this class from being copy constructed */ @@ -99,18 +101,47 @@ public: */ GpuWorkloadSourceCode generate_source_code() const { - return component_graph().fuse().write_workload_code(); + return component_graph().fuse(_mem_map).write_workload_code(); } - /** Create an intermediate tensor info and save it + /** Create a virtual (see @ref MemoryType) tensor info and save it * - * @return ITensorInfo The created intermediate tensor info object pointer + * @return ITensorInfo* The created virtual tensor info object pointer */ - ITensorInfo *create_intermediate_tensor() + ITensorInfo *create_virtual_tensor() { auto uptr = std::make_unique<TensorInfo>(); - uptr->set_id(-allocate_new_tensor_id()); // intermediate tensors must have negative id - _interm_tensor_info_list.emplace_back(std::move(uptr)); - return _interm_tensor_info_list.back().get(); + uptr->set_id(-allocate_new_tensor_id()); // virtual tensors must have negative id + register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Virtual }); + _managed_tensor_info_list.emplace_back(std::move(uptr)); + return _managed_tensor_info_list.back().get(); + } + /** Create an auxiliary (see @ref MemoryType) tensor info and save it + * + * @return ITensorInfo* The created auxiliary tensor info object pointer + */ + + /** Create an auxiliary (see @ref MemoryType) tensor info and save it + * + * @param[in] tensor_info @ref ITensorInfo to copy from + * + * @return ITensorInfo* The created auxiliary tensor info object pointer + */ + ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info) + { + auto uptr = std::make_unique<TensorInfo>(tensor_info); + uptr->set_id(allocate_new_tensor_id()); + register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ uptr->total_size() } }); + _managed_tensor_info_list.emplace_back(std::move(uptr)); + return _managed_tensor_info_list.back().get(); + } + /** Register memory descriptor of a tensor info + * + * @param[in] info @ref ITensorInfo to be registered + * @param[in] mem_desc @ref MemoryDescriptor to be registered with @p info + */ + void register_memory_descriptor(const ITensorInfo &info, const MemoryDescriptor &mem_desc) + { + _mem_map[info.id()] = mem_desc; } private: @@ -119,7 +150,8 @@ private: GpuKernelComponentGraph _component_graph; GpuOperatorGroup _operator_group; ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id }; - std::vector<std::unique_ptr<TensorInfo>> _interm_tensor_info_list; + std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list; + MemoryDescriptorMap _mem_map; }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp index 00fbb730b9..7a8b97957e 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp @@ -239,7 +239,7 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch, // Initialize the direct convolution descriptor const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info); - ITensorInfo *dst = sketch.implementation().create_intermediate_tensor(); + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); // Assert validation ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, attributes)); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp index cd5487c10b..c906da8199 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp @@ -66,7 +66,7 @@ Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch, { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); - ARM_COMPUTE_RETURN_ERROR_ON(!is_user_tensor(dst)); + ARM_COMPUTE_RETURN_ERROR_ON(!is_alloc_tensor(dst)); // Initialize the destination tensor info. TensorInfo dst_to_validate = *dst; diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h index d317ec7fd6..c9fc2c610f 100644 --- a/src/dynamic_fusion/utils/Utils.h +++ b/src/dynamic_fusion/utils/Utils.h @@ -33,21 +33,29 @@ namespace experimental { namespace dynamic_fusion { -inline bool is_user_tensor(const ITensorInfo *tensor_info) +/** Tensor should have backing memory. @ref MemoryType + */ +inline bool is_alloc_tensor(const ITensorInfo *tensor_info) { return tensor_info->id() > ITensorInfo::invalid_tensor_id; } -inline bool is_intermediate_tensor(const ITensorInfo *tensor_info) +/** Tensor should not have backing memory. @ref MemoryType + */ +inline bool is_noalloc_tensor(const ITensorInfo *tensor_info) { return tensor_info->id() < ITensorInfo::invalid_tensor_id; } +/** @ref ITensorInfo has valid id + */ inline bool is_valid_tensor(const ITensorInfo *tensor_info) { return tensor_info->has_valid_id(); } +/** @ref ITensorInfo has invalid id + */ inline bool is_invalid_tensor(const ITensorInfo *tensor_info) { return !is_valid_tensor(tensor_info); |