aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp368
1 files changed, 368 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
new file mode 100644
index 0000000000..5a6d125d96
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentGroup.h"
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include <algorithm>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+bool GpuKernelComponentGroup::add_component(ComponentPtr component)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_finalized, "The component group has been finalized and cannot be altered.");
+
+ // note: Constraint 1 is guaranteed as a precondition
+ // Constraint 2
+ if (component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
+ {
+ return false;
+ }
+ // Constraint 3.1: Pattern: (Unfusable + Output)
+ if (!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable &&
+ component->type() != GpuComponentType::Output)
+ {
+ return false;
+ }
+ // Constraint 3.2
+ if (!_components.empty() &&
+ (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
+ {
+ return false;
+ }
+ // Constraint 4
+ if (component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
+ {
+ return false;
+ }
+ // Constraint 5
+ if (!_components.empty() && !(get_root_component()->properties() == component->properties()))
+ {
+ return false;
+ }
+ // Constraint 7
+ if (!_components.empty())
+ {
+ const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
+ ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+ const auto first_dst_tensor = root_dst_tensors[0];
+ const auto dst_tensors = component->tensors().get_const_dst_tensors();
+ for (const auto &t : root_dst_tensors)
+ {
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ {
+ return false;
+ }
+ }
+ for (const auto &t : dst_tensors)
+ {
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ {
+ return false;
+ }
+ }
+ }
+ // Constraint 8
+ if (!_components.empty())
+ {
+ const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
+ ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+ const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
+ const auto dst_tensors = component->tensors().get_const_dst_tensors();
+ for (const auto &t : root_dst_tensors)
+ {
+ if (t->data_layout() != first_dst_tensor_layout)
+ {
+ return false;
+ }
+ }
+ for (const auto &t : dst_tensors)
+ {
+ if (t->data_layout() != first_dst_tensor_layout)
+ {
+ return false;
+ }
+ }
+ }
+ // Constraint 9
+ if (component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
+ {
+ return false;
+ }
+ // Constraint 9 corollary
+ if (component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
+ {
+ return false;
+ }
+ _components.push_back(component);
+ return true;
+}
+
+void GpuKernelComponentGroup::finalize()
+{
+ if (_finalized)
+ {
+ return;
+ }
+
+ _finalized = true;
+
+ std::set<const ITensorInfo *> output_tensors;
+ std::map<const ITensorInfo *, std::vector<const ITensorInfo *>> possible_tile_map;
+ std::map<const ITensorInfo *, int32_t> tile_usages;
+
+ for (auto component : _components)
+ {
+ const auto tensors = component->tensors();
+ const auto src_tensors = tensors.get_const_src_tensors();
+ const auto dst_tensors = tensors.get_const_dst_tensors();
+
+ // Detect input, output and intermediate tensors.
+ for (auto tensor : src_tensors)
+ {
+ const auto output_tensors_it = output_tensors.find(tensor);
+
+ if (output_tensors_it != output_tensors.end())
+ {
+ // This tensor is the output of another operator.
+ // It must be marked as intermediate tensor.
+ output_tensors.erase(output_tensors_it);
+ _interm_tensors.insert(tensor);
+ }
+ else if (_interm_tensors.find(tensor) == _interm_tensors.end())
+ {
+ _input_tensors.insert(tensor);
+
+ tile_usages[tensor] = 0;
+ possible_tile_map.emplace(tensor, std::vector<const ITensorInfo *>());
+ }
+ }
+
+ for (auto tensor : dst_tensors)
+ {
+ ARM_COMPUTE_ERROR_ON(_input_tensors.find(tensor) != _input_tensors.end());
+ ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
+ ARM_COMPUTE_ERROR_ON(_interm_tensors.find(tensor) != _interm_tensors.end());
+ output_tensors.insert(tensor);
+
+ tile_usages[tensor] = 0;
+ possible_tile_map.emplace(tensor, std::vector<const ITensorInfo *>());
+ }
+
+ // Check if the output can overwrite the input tile.
+ const auto component_type = component->type();
+ if (component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
+ {
+ ARM_COMPUTE_ERROR_ON(dst_tensors.size() != 1);
+
+ const auto dst_tensor = dst_tensors[0];
+ const auto &dst_shape = dst_tensor->tensor_shape();
+ const auto &dst_type = dst_tensor->data_type();
+
+ tile_usages[dst_tensor] = 0;
+
+ for (auto src_tensor : src_tensors)
+ {
+ const auto &src_shape = src_tensor->tensor_shape();
+ const auto &src_type = src_tensor->data_type();
+
+ if (src_shape == dst_shape && src_type == dst_type)
+ {
+ const auto tile_usages_it = tile_usages.find(src_tensor);
+ ARM_COMPUTE_ERROR_ON(tile_usages_it == tile_usages.end());
+
+ if (component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
+ {
+ // Increase the number of tile usages unless this component is an output
+ // and the tile has not been shared with any component.
+ // (Reason: output component doesn't change the content of the tile)
+ ++tile_usages_it->second;
+ }
+
+ possible_tile_map[dst_tensor].push_back(src_tensor);
+ }
+ }
+ }
+ else
+ {
+ // Outputs of complex and unfusable components need dedicated tile.
+ for (auto tensor : dst_tensors)
+ {
+ tile_usages[tensor] = 0;
+ }
+ }
+ }
+
+ // Find the smallest list of tiles that the intermediate tensors need to write to.
+ for (auto tensor : _input_tensors)
+ {
+ _tile_map[tensor] = tensor;
+ }
+
+ for (auto component : _components)
+ {
+ const auto dst_tensors = component->tensors().get_const_dst_tensors();
+
+ for (auto tensor : dst_tensors)
+ {
+ const auto target_tiles = possible_tile_map.at(tensor);
+ _tile_map[tensor] = tensor;
+
+ for (auto target : target_tiles)
+ {
+ const auto num_usage = tile_usages[target];
+
+ if (num_usage <= 1)
+ {
+ // The target tile is consumed by only this operator, so we can reuse it
+ // for the destination tensor data.
+ _tile_map[tensor] = _tile_map.at(target);
+ break;
+ }
+ }
+ }
+ }
+
+ for (auto tensor : output_tensors)
+ {
+ _tile_map[tensor] = tensor;
+ }
+
+ // All intermediate tensors that cannot be shared with any previous tensor
+ // will need to be declared as tile variable.
+ for (auto tensor_tile : _tile_map)
+ {
+ if (tensor_tile.first == tensor_tile.second && _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
+ {
+ _tiles.push_back(tensor_tile.first);
+ }
+ }
+
+ std::set_union(_input_tensors.begin(), _input_tensors.end(), output_tensors.begin(), output_tensors.end(),
+ std::back_inserter(_argument_tensors));
+ _any_output_tensor = *output_tensors.begin();
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_tiles() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _tiles;
+}
+
+const ITensorInfo *GpuKernelComponentGroup::get_tile_for_tensor(const ITensorInfo *tensor) const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+
+ if (_tile_map.find(tensor) != _tile_map.end())
+ {
+ return _tile_map.at(tensor);
+ }
+
+ return tensor;
+}
+
+const ITensorInfo *GpuKernelComponentGroup::get_any_dst_tensor() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _any_output_tensor;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _argument_tensors;
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
+{
+ if (empty())
+ {
+ return nullptr;
+ }
+ return _components[0];
+}
+
+bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _interm_tensors.find(tensor) != _interm_tensors.end();
+}
+
+bool GpuKernelComponentGroup::is_input_tensor(const ITensorInfo *tensor) const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
+ return _input_tensors.find(tensor) != _input_tensors.end();
+}
+
+size_t GpuKernelComponentGroup::size() const
+{
+ return _components.size();
+}
+bool GpuKernelComponentGroup::empty() const
+{
+ return _components.empty();
+}
+GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index)
+{
+ return _components[index];
+}
+const GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index) const
+{
+ return _components[index];
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::begin()
+{
+ return _components.begin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::end()
+{
+ return _components.end();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::begin() const
+{
+ return _components.cbegin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::end() const
+{
+ return _components.cend();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cbegin() const
+{
+ return _components.cbegin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cend() const
+{
+ return _components.cend();
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute