aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp146
1 files changed, 52 insertions, 94 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
index 3af4c1429d..0d2574957f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -27,6 +27,8 @@
#include "arm_compute/core/Validate.h"
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include <algorithm>
+
namespace arm_compute
{
namespace experimental
@@ -35,6 +37,9 @@ namespace dynamic_fusion
{
bool GpuKernelComponentGroup::add_component(ComponentPtr component)
{
+ ARM_COMPUTE_ERROR_ON_MSG(
+ _finalized, "The component group has been finalized and cannot be altered.");
+
// note: Constraint 1 is guaranteed as a precondition
// Constraint 2
if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
@@ -51,11 +56,6 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component)
{
return false;
}
- // Constraint 3.3: Disallow multiple output components
- if(!_components.empty() && get_last_component()->type() == GpuComponentType::Output && component->type() == GpuComponentType::Output)
- {
- return false;
- }
// Constraint 4
if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
{
@@ -124,55 +124,68 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component)
return true;
}
-std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_src_tensors() const
+void GpuKernelComponentGroup::finalize()
{
- if(_components.empty())
+ if(_finalized)
{
- return {};
+ return;
}
- auto src_tensors = _components[0]->tensors().get_const_src_tensors();
- auto prev_dst_tensor = _components[0]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component
- for(unsigned int i = 1; i < _components.size(); ++i)
+
+ _finalized = true;
+
+ std::set<const ITensorInfo *> input_tensors;
+ std::set<const ITensorInfo *> output_tensors;
+
+ for(auto component : _components)
{
- auto cur_src_tensors = _components[i]->tensors().get_const_src_tensors();
- for(const auto src_tensor : cur_src_tensors)
+ const auto tensors = component->tensors();
+ const auto src_tensors = tensors.get_const_src_tensors();
+ const auto dst_tensors = tensors.get_const_dst_tensors();
+
+ // Detect input, output and intermediate tensors.
+ for(auto tensor : src_tensors)
{
- if(src_tensor->id() == prev_dst_tensor->id())
+ const auto output_tensors_it = output_tensors.find(tensor);
+
+ if(output_tensors_it != output_tensors.end())
{
- continue; // Skip "intermediate" tensors. I.e. tensors that are used to link between two components
+ // This tensor is the output of another operator.
+ // It must be marked as intermediate tensor.
+ output_tensors.erase(output_tensors_it);
+ _interm_tensors.insert(tensor);
+ }
+ else if(_interm_tensors.find(tensor) == _interm_tensors.end())
+ {
+ input_tensors.insert(tensor);
}
- src_tensors.push_back(src_tensor);
}
- prev_dst_tensor = _components[i]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component
+
+ for(auto tensor : dst_tensors)
+ {
+ ARM_COMPUTE_ERROR_ON(input_tensors.find(tensor) != input_tensors.end());
+ ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
+ ARM_COMPUTE_ERROR_ON(_interm_tensors.find(tensor) != _interm_tensors.end());
+ output_tensors.insert(tensor);
+ }
}
- return src_tensors;
+ std::set_union(
+ input_tensors.begin(), input_tensors.end(),
+ output_tensors.begin(), output_tensors.end(),
+ std::back_inserter(_argument_tensors));
+ _any_output_tensor = *output_tensors.begin();
}
-std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_dst_tensors() const
+const ITensorInfo *GpuKernelComponentGroup::get_any_dst_tensor() const
{
- if(_components.empty())
- {
- return {};
- }
- const auto dst_tensor_ptrs = _components[_components.size() - 1]->tensors().get_const_dst_tensors();
- std::vector<const ITensorInfo *> dst_tensors;
- for(auto tensor_ptr : dst_tensor_ptrs)
- {
- dst_tensors.push_back(tensor_ptr);
- }
- return dst_tensors;
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _any_output_tensor;
}
std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const
{
- std::vector<const ITensorInfo *> arguments;
- const auto src_tensors = get_src_tensors();
- const auto dst_tensors = get_dst_tensors();
- arguments.reserve(src_tensors.size() + dst_tensors.size());
- arguments.insert(arguments.end(), src_tensors.begin(), src_tensors.end());
- arguments.insert(arguments.end(), dst_tensors.begin(), dst_tensors.end());
- return arguments;
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _argument_tensors;
}
GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
@@ -184,41 +197,10 @@ GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_componen
return _components[0];
}
-GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_last_component() const
-{
- if(empty())
- {
- return nullptr;
- }
- return _components[_components.size() - 1];
-}
-
-GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_previous_component(ComponentId id) const
-{
- if(empty())
- {
- return nullptr;
- }
- // Get the index of the requested component
- size_t ind = 0;
- for(const auto c : _components)
- {
- if(c->id() == id)
- {
- break;
- }
- ind++;
- }
- if(ind == 0 || ind >= _components.size())
- {
- return nullptr;
- }
- return _components[ind - 1];
-}
-
bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const
{
- return is_tensor_in(tensor, get_interm_tensors());
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _interm_tensors.find(tensor) != _interm_tensors.end();
}
size_t GpuKernelComponentGroup::size() const
@@ -262,30 +244,6 @@ typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuK
return _components.cend();
}
-std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_interm_tensors() const
-{
- std::vector<const ITensorInfo *> interm_tensors{};
- for(unsigned int i = 0; i + 1 < _components.size(); ++i)
- {
- auto interm_tensor = _components[i]->tensors().get_const_dst_tensors()[0];
- interm_tensors.push_back(interm_tensor); // PRE: Only one dst tensor per component
- }
-
- return interm_tensors;
-}
-
-bool GpuKernelComponentGroup::is_tensor_in(const ITensorInfo *tensor, const std::vector<const ITensorInfo *> tensors)
-{
- for(auto t : tensors)
- {
- if(tensor->id() == t->id())
- {
- return true;
- }
- }
- return false;
-}
-
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute