diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp | 146 |
1 files changed, 52 insertions, 94 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp index 3af4c1429d..0d2574957f 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Validate.h" #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" +#include <algorithm> + namespace arm_compute { namespace experimental @@ -35,6 +37,9 @@ namespace dynamic_fusion { bool GpuKernelComponentGroup::add_component(ComponentPtr component) { + ARM_COMPUTE_ERROR_ON_MSG( + _finalized, "The component group has been finalized and cannot be altered."); + // note: Constraint 1 is guaranteed as a precondition // Constraint 2 if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components) @@ -51,11 +56,6 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component) { return false; } - // Constraint 3.3: Disallow multiple output components - if(!_components.empty() && get_last_component()->type() == GpuComponentType::Output && component->type() == GpuComponentType::Output) - { - return false; - } // Constraint 4 if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U) { @@ -124,55 +124,68 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component) return true; } -std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_src_tensors() const +void GpuKernelComponentGroup::finalize() { - if(_components.empty()) + if(_finalized) { - return {}; + return; } - auto src_tensors = _components[0]->tensors().get_const_src_tensors(); - auto prev_dst_tensor = _components[0]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component - for(unsigned int i = 1; i < _components.size(); ++i) + + _finalized = true; + + std::set<const ITensorInfo *> input_tensors; + std::set<const ITensorInfo *> output_tensors; + + for(auto component : _components) { - auto cur_src_tensors = _components[i]->tensors().get_const_src_tensors(); - for(const auto src_tensor : cur_src_tensors) + const auto tensors = component->tensors(); + const auto src_tensors = tensors.get_const_src_tensors(); + const auto dst_tensors = tensors.get_const_dst_tensors(); + + // Detect input, output and intermediate tensors. + for(auto tensor : src_tensors) { - if(src_tensor->id() == prev_dst_tensor->id()) + const auto output_tensors_it = output_tensors.find(tensor); + + if(output_tensors_it != output_tensors.end()) { - continue; // Skip "intermediate" tensors. I.e. tensors that are used to link between two components + // This tensor is the output of another operator. + // It must be marked as intermediate tensor. + output_tensors.erase(output_tensors_it); + _interm_tensors.insert(tensor); + } + else if(_interm_tensors.find(tensor) == _interm_tensors.end()) + { + input_tensors.insert(tensor); } - src_tensors.push_back(src_tensor); } - prev_dst_tensor = _components[i]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component + + for(auto tensor : dst_tensors) + { + ARM_COMPUTE_ERROR_ON(input_tensors.find(tensor) != input_tensors.end()); + ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end()); + ARM_COMPUTE_ERROR_ON(_interm_tensors.find(tensor) != _interm_tensors.end()); + output_tensors.insert(tensor); + } } - return src_tensors; + std::set_union( + input_tensors.begin(), input_tensors.end(), + output_tensors.begin(), output_tensors.end(), + std::back_inserter(_argument_tensors)); + _any_output_tensor = *output_tensors.begin(); } -std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_dst_tensors() const +const ITensorInfo *GpuKernelComponentGroup::get_any_dst_tensor() const { - if(_components.empty()) - { - return {}; - } - const auto dst_tensor_ptrs = _components[_components.size() - 1]->tensors().get_const_dst_tensors(); - std::vector<const ITensorInfo *> dst_tensors; - for(auto tensor_ptr : dst_tensor_ptrs) - { - dst_tensors.push_back(tensor_ptr); - } - return dst_tensors; + ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized."); + return _any_output_tensor; } std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const { - std::vector<const ITensorInfo *> arguments; - const auto src_tensors = get_src_tensors(); - const auto dst_tensors = get_dst_tensors(); - arguments.reserve(src_tensors.size() + dst_tensors.size()); - arguments.insert(arguments.end(), src_tensors.begin(), src_tensors.end()); - arguments.insert(arguments.end(), dst_tensors.begin(), dst_tensors.end()); - return arguments; + ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized."); + return _argument_tensors; } GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const @@ -184,41 +197,10 @@ GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_componen return _components[0]; } -GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_last_component() const -{ - if(empty()) - { - return nullptr; - } - return _components[_components.size() - 1]; -} - -GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_previous_component(ComponentId id) const -{ - if(empty()) - { - return nullptr; - } - // Get the index of the requested component - size_t ind = 0; - for(const auto c : _components) - { - if(c->id() == id) - { - break; - } - ind++; - } - if(ind == 0 || ind >= _components.size()) - { - return nullptr; - } - return _components[ind - 1]; -} - bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const { - return is_tensor_in(tensor, get_interm_tensors()); + ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized."); + return _interm_tensors.find(tensor) != _interm_tensors.end(); } size_t GpuKernelComponentGroup::size() const @@ -262,30 +244,6 @@ typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuK return _components.cend(); } -std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_interm_tensors() const -{ - std::vector<const ITensorInfo *> interm_tensors{}; - for(unsigned int i = 0; i + 1 < _components.size(); ++i) - { - auto interm_tensor = _components[i]->tensors().get_const_dst_tensors()[0]; - interm_tensors.push_back(interm_tensor); // PRE: Only one dst tensor per component - } - - return interm_tensors; -} - -bool GpuKernelComponentGroup::is_tensor_in(const ITensorInfo *tensor, const std::vector<const ITensorInfo *> tensors) -{ - for(auto t : tensors) - { - if(tensor->id() == t->id()) - { - return true; - } - } - return false; -} - } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute |