aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp105
1 files changed, 52 insertions, 53 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
index 81c3f0c800..5a6d125d96 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Validate.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include <algorithm>
@@ -37,86 +38,87 @@ namespace dynamic_fusion
{
bool GpuKernelComponentGroup::add_component(ComponentPtr component)
{
- ARM_COMPUTE_ERROR_ON_MSG(
- _finalized, "The component group has been finalized and cannot be altered.");
+ ARM_COMPUTE_ERROR_ON_MSG(_finalized, "The component group has been finalized and cannot be altered.");
// note: Constraint 1 is guaranteed as a precondition
// Constraint 2
- if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
+ if (component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
{
return false;
}
// Constraint 3.1: Pattern: (Unfusable + Output)
- if(!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable && component->type() != GpuComponentType::Output)
+ if (!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable &&
+ component->type() != GpuComponentType::Output)
{
return false;
}
// Constraint 3.2
- if(!_components.empty() && (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
+ if (!_components.empty() &&
+ (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
{
return false;
}
// Constraint 4
- if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
+ if (component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
{
return false;
}
// Constraint 5
- if(!_components.empty() && !(get_root_component()->properties() == component->properties()))
+ if (!_components.empty() && !(get_root_component()->properties() == component->properties()))
{
return false;
}
// Constraint 7
- if(!_components.empty())
+ if (!_components.empty())
{
const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
const auto first_dst_tensor = root_dst_tensors[0];
const auto dst_tensors = component->tensors().get_const_dst_tensors();
- for(const auto &t : root_dst_tensors)
+ for (const auto &t : root_dst_tensors)
{
- if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
{
return false;
}
}
- for(const auto &t : dst_tensors)
+ for (const auto &t : dst_tensors)
{
- if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
{
return false;
}
}
}
// Constraint 8
- if(!_components.empty())
+ if (!_components.empty())
{
const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
const auto dst_tensors = component->tensors().get_const_dst_tensors();
- for(const auto &t : root_dst_tensors)
+ for (const auto &t : root_dst_tensors)
{
- if(t->data_layout() != first_dst_tensor_layout)
+ if (t->data_layout() != first_dst_tensor_layout)
{
return false;
}
}
- for(const auto &t : dst_tensors)
+ for (const auto &t : dst_tensors)
{
- if(t->data_layout() != first_dst_tensor_layout)
+ if (t->data_layout() != first_dst_tensor_layout)
{
return false;
}
}
}
// Constraint 9
- if(component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
+ if (component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
{
return false;
}
// Constraint 9 corollary
- if(component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
+ if (component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
{
return false;
}
@@ -126,36 +128,36 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component)
void GpuKernelComponentGroup::finalize()
{
- if(_finalized)
+ if (_finalized)
{
return;
}
_finalized = true;
- std::set<const ITensorInfo *> output_tensors;
+ std::set<const ITensorInfo *> output_tensors;
std::map<const ITensorInfo *, std::vector<const ITensorInfo *>> possible_tile_map;
- std::map<const ITensorInfo *, int32_t> tile_usages;
+ std::map<const ITensorInfo *, int32_t> tile_usages;
- for(auto component : _components)
+ for (auto component : _components)
{
- const auto tensors = component->tensors();
+ const auto tensors = component->tensors();
const auto src_tensors = tensors.get_const_src_tensors();
const auto dst_tensors = tensors.get_const_dst_tensors();
// Detect input, output and intermediate tensors.
- for(auto tensor : src_tensors)
+ for (auto tensor : src_tensors)
{
const auto output_tensors_it = output_tensors.find(tensor);
- if(output_tensors_it != output_tensors.end())
+ if (output_tensors_it != output_tensors.end())
{
// This tensor is the output of another operator.
// It must be marked as intermediate tensor.
output_tensors.erase(output_tensors_it);
_interm_tensors.insert(tensor);
}
- else if(_interm_tensors.find(tensor) == _interm_tensors.end())
+ else if (_interm_tensors.find(tensor) == _interm_tensors.end())
{
_input_tensors.insert(tensor);
@@ -164,7 +166,7 @@ void GpuKernelComponentGroup::finalize()
}
}
- for(auto tensor : dst_tensors)
+ for (auto tensor : dst_tensors)
{
ARM_COMPUTE_ERROR_ON(_input_tensors.find(tensor) != _input_tensors.end());
ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
@@ -177,27 +179,27 @@ void GpuKernelComponentGroup::finalize()
// Check if the output can overwrite the input tile.
const auto component_type = component->type();
- if(component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
+ if (component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
{
ARM_COMPUTE_ERROR_ON(dst_tensors.size() != 1);
- const auto dst_tensor = dst_tensors[0];
- const auto &dst_shape = dst_tensor->tensor_shape();
- const auto &dst_type = dst_tensor->data_type();
+ const auto dst_tensor = dst_tensors[0];
+ const auto &dst_shape = dst_tensor->tensor_shape();
+ const auto &dst_type = dst_tensor->data_type();
tile_usages[dst_tensor] = 0;
- for(auto src_tensor : src_tensors)
+ for (auto src_tensor : src_tensors)
{
const auto &src_shape = src_tensor->tensor_shape();
- const auto &src_type = src_tensor->data_type();
+ const auto &src_type = src_tensor->data_type();
- if(src_shape == dst_shape && src_type == dst_type)
+ if (src_shape == dst_shape && src_type == dst_type)
{
const auto tile_usages_it = tile_usages.find(src_tensor);
ARM_COMPUTE_ERROR_ON(tile_usages_it == tile_usages.end());
- if(component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
+ if (component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
{
// Increase the number of tile usages unless this component is an output
// and the tile has not been shared with any component.
@@ -212,7 +214,7 @@ void GpuKernelComponentGroup::finalize()
else
{
// Outputs of complex and unfusable components need dedicated tile.
- for(auto tensor : dst_tensors)
+ for (auto tensor : dst_tensors)
{
tile_usages[tensor] = 0;
}
@@ -220,25 +222,25 @@ void GpuKernelComponentGroup::finalize()
}
// Find the smallest list of tiles that the intermediate tensors need to write to.
- for(auto tensor : _input_tensors)
+ for (auto tensor : _input_tensors)
{
_tile_map[tensor] = tensor;
}
- for(auto component : _components)
+ for (auto component : _components)
{
const auto dst_tensors = component->tensors().get_const_dst_tensors();
- for(auto tensor : dst_tensors)
+ for (auto tensor : dst_tensors)
{
const auto target_tiles = possible_tile_map.at(tensor);
- _tile_map[tensor] = tensor;
+ _tile_map[tensor] = tensor;
- for(auto target : target_tiles)
+ for (auto target : target_tiles)
{
const auto num_usage = tile_usages[target];
- if(num_usage <= 1)
+ if (num_usage <= 1)
{
// The target tile is consumed by only this operator, so we can reuse it
// for the destination tensor data.
@@ -249,26 +251,23 @@ void GpuKernelComponentGroup::finalize()
}
}
- for(auto tensor : output_tensors)
+ for (auto tensor : output_tensors)
{
_tile_map[tensor] = tensor;
}
// All intermediate tensors that cannot be shared with any previous tensor
// will need to be declared as tile variable.
- for(auto tensor_tile : _tile_map)
+ for (auto tensor_tile : _tile_map)
{
- if(tensor_tile.first == tensor_tile.second &&
- _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
+ if (tensor_tile.first == tensor_tile.second && _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
{
_tiles.push_back(tensor_tile.first);
}
}
- std::set_union(
- _input_tensors.begin(), _input_tensors.end(),
- output_tensors.begin(), output_tensors.end(),
- std::back_inserter(_argument_tensors));
+ std::set_union(_input_tensors.begin(), _input_tensors.end(), output_tensors.begin(), output_tensors.end(),
+ std::back_inserter(_argument_tensors));
_any_output_tensor = *output_tensors.begin();
}
@@ -282,7 +281,7 @@ const ITensorInfo *GpuKernelComponentGroup::get_tile_for_tensor(const ITensorInf
{
ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
- if(_tile_map.find(tensor) != _tile_map.end())
+ if (_tile_map.find(tensor) != _tile_map.end())
{
return _tile_map.at(tensor);
}
@@ -304,7 +303,7 @@ std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors()
GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
{
- if(empty())
+ if (empty())
{
return nullptr;
}