From f9d96e5a04810b4f1182b4c1b0f27601f6deb0dd Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Wed, 27 Oct 2021 11:25:02 +0100 Subject: IVGCVSW-6303 Create a SingleAxisPacking strategy * add fsrcnn and mobilebert memory profiles to the strategy benchmark Signed-off-by: Finn Williams Change-Id: Ibd8b26f2153c561e5c5bec477f6246d0e8ffa4af --- .../memoryOptimizerStrategyLibrary/CMakeLists.txt | 2 + .../MemoryOptimizerStrategyLibrary.hpp | 2 + .../strategies/SingleAxisPriorityList.cpp | 261 +++++++++++++++++++++ .../strategies/SingleAxisPriorityList.hpp | 50 ++++ .../test/CMakeLists.txt | 1 + .../test/SingleAxisPriorityListTests.cpp | 42 ++++ .../test/TestMemBlocks.hpp | 43 ++++ 7 files changed, 401 insertions(+) create mode 100644 src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp create mode 100644 src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp create mode 100644 src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/SingleAxisPriorityListTests.cpp create mode 100644 src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/TestMemBlocks.hpp (limited to 'src/backends/backendsCommon') diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/CMakeLists.txt b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/CMakeLists.txt index 43ec9db670..9d2f7aa9a4 100644 --- a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/CMakeLists.txt +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/CMakeLists.txt @@ -10,6 +10,8 @@ list(APPEND armnnMemoryOptimizationStrategies_sources strategies/ConstantMemoryStrategy.cpp strategies/StrategyValidator.hpp strategies/StrategyValidator.cpp + strategies/SingleAxisPriorityList.hpp + strategies/SingleAxisPriorityList.cpp ) if(BUILD_UNIT_TESTS) diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp index 5e20a9f218..5fa151560b 100644 --- a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp @@ -10,6 +10,7 @@ #include "strategies/ConstantMemoryStrategy.hpp" #include "strategies/StrategyValidator.hpp" +#include "strategies/SingleAxisPriorityList.hpp" namespace { @@ -17,6 +18,7 @@ namespace static const std::vector memoryOptimizationStrategies( { "ConstantMemoryStrategy", + "SingleAxisPriorityList" "StrategyValidator" }); diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp new file mode 100644 index 0000000000..3afa061681 --- /dev/null +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp @@ -0,0 +1,261 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "SingleAxisPriorityList.hpp" + +#include +#include + + + +namespace armnn +{ + +// This strategy uses a vector of size_ts/words to represent occupancy of a memBlock in a memBin. +// Where each bit represents occupancy of a single time-step in that lifetime. +// We can then use bit masks to check for overlaps of memBlocks along the lifetime + +// For more information on the algorithm itself see: https://arxiv.org/pdf/2001.03288.pdf +// This strategy is an implementation of 4.3 Greedy by Size +constexpr size_t wordSize = sizeof(size_t) * 8; + +std::string SingleAxisPriorityList::GetName() const { + return m_Name; +} + +MemBlockStrategyType SingleAxisPriorityList::GetMemBlockStrategyType() const { + return m_MemBlockStrategyType; +} + +struct SingleAxisPriorityList::BinTracker +{ + // maxLifeTime is the number of operators in the model + // We then divide that by wordSize to get the number of words we need to store all the lifetimes + BinTracker(unsigned int maxLifeTime) + : m_OccupiedSpaces(1 + maxLifeTime/wordSize, 0) + {} + + // Add a block of a single word size to the bin + void AddBlock(MemBlock* block, const size_t word, const size_t index) + { + m_OccupiedSpaces[index] = m_OccupiedSpaces[index] | word; + + m_PlacedBlocks.push_back(block); + m_MemSize = std::max(m_MemSize, block->m_MemSize); + } + + // Add a block with a word size of two or more to the bin + void AddBlock(MemBlock* block, + const size_t startIndex, + const size_t endIndex, + const size_t firstWord, + const size_t lastWord) + { + m_OccupiedSpaces[startIndex] = m_OccupiedSpaces[startIndex] | firstWord; + m_OccupiedSpaces[endIndex] = m_OccupiedSpaces[endIndex] | lastWord; + + for (size_t i = startIndex +1; i <= endIndex -1; ++i) + { + m_OccupiedSpaces[i] = std::numeric_limits::max(); + } + + m_PlacedBlocks.push_back(block); + m_MemSize = std::max(m_MemSize, block->m_MemSize); + } + + size_t m_MemSize = 0; + std::vector m_OccupiedSpaces; + std::vector m_PlacedBlocks; +}; + +void SingleAxisPriorityList::PlaceBlocks(const std::list& priorityList, + std::vector& placedBlocks, + const unsigned int maxLifetime) +{ + // This function is used when the given block start and end lifetimes fit within a single word. + auto singleWordLoop = [&](MemBlock* curBlock, const size_t firstWord, const size_t index) + { + bool placed = false; + // loop through all existing bins + for (auto& blockList : placedBlocks) + { + // Check if the lifetimes at the given index overlap with the lifetimes of the block + if ((blockList.m_OccupiedSpaces[index] & firstWord) == 0) + { + // If the binary AND is 0 there is no overlap between the words and the block will fit + blockList.AddBlock(curBlock, firstWord, index); + placed = true; + break; + } + } + // No suitable bin was found, create a new bin/BinTracker and add it to the placedBlocks vector + if (!placed) + { + placedBlocks.emplace_back(BinTracker{maxLifetime}); + placedBlocks.back().AddBlock(curBlock, firstWord, index); + } + }; + + // This function is used when the given block start and end lifetimes fit within two words. + auto doubleWordLoop =[&](MemBlock* curBlock, + const size_t firstWord, + const size_t firstIndex, + const size_t lastWord, + const size_t lastIndex) + { + bool placed = false; + for (auto& blockList : placedBlocks) + { + // Check if the lifetimes at the given indexes overlap with the lifetimes of the block + if ((blockList.m_OccupiedSpaces[firstIndex] & firstWord) == 0 && + (blockList.m_OccupiedSpaces[lastIndex] & lastWord) == 0) + { + blockList.AddBlock(curBlock, firstIndex, lastIndex, firstWord, lastWord); + placed = true; + break; + } + } + // No suitable bin was found, create a new bin/BinTracker and add it to the placedBlocks vector + if (!placed) + { + placedBlocks.emplace_back(BinTracker{maxLifetime}); + placedBlocks.back().AddBlock(curBlock, firstIndex, lastIndex, firstWord, lastWord); + } + }; + + // Loop through the blocks to place + for(const auto curBlock : priorityList) + { + // The lifetimes of both the block and bin are represented by single bits on a word/s + // Each bin has maxLifetime/wordSize words + // The number of words for a block depends on the size of the blocks lifetime + // and the alignment of the block's lifetimes + // This allows for checking sizeof(size_t) * 8 lifetimes at once with a binary AND + const size_t firstWordIndex = curBlock->m_StartOfLife/wordSize; + const size_t lastWordIndex = curBlock->m_EndOfLife/wordSize; + + // Align and right shift the first word + // This sets the bits before curBlock->m_StartOfLife to 0 + size_t remainder = (curBlock->m_StartOfLife - firstWordIndex * wordSize); + size_t firstWord = std::numeric_limits::max() >> remainder; + + // If the indexes match the block can fit into a single word + if(firstWordIndex == lastWordIndex) + { + // We then need to zero the bits to the right of curBlock->m_EndOfLife + remainder += curBlock->m_EndOfLife + 1 - curBlock->m_StartOfLife; + firstWord = firstWord >> (wordSize - remainder); + firstWord = firstWord << (wordSize - remainder); + + singleWordLoop(curBlock, firstWord, firstWordIndex); + continue; + } + + // The indexes don't match we need at least two words + // Zero the bits to the right of curBlock->m_EndOfLife + remainder = (curBlock->m_EndOfLife +1 - lastWordIndex * wordSize); + + size_t lastWord = (1u << remainder) - 1; + lastWord = lastWord << (wordSize - remainder); + + if(firstWordIndex + 1 == lastWordIndex) + { + doubleWordLoop(curBlock, firstWord, firstWordIndex, lastWord, lastWordIndex); + continue; + } + + // The block cannot fit into two words + // We don't need to create any more words to represent this, + // as any word between the first and last block would always equal the maximum value of size_t, + // all the lifetimes would be occupied + + // Instead, we just check that the corresponding word in the bin is completely empty + + bool placed = false; + for (auto& blockList : placedBlocks) + { + // Check the first and last word + if ((blockList.m_OccupiedSpaces[firstWordIndex] & firstWord) != 0 || + (blockList.m_OccupiedSpaces[lastWordIndex] & lastWord) != 0) + { + continue; + } + + bool fits = true; + // Check that all spaces in between are clear + for (size_t i = firstWordIndex +1; i <= lastWordIndex -1; ++i) + { + if (blockList.m_OccupiedSpaces[i] != 0) + { + fits = false; + break; + } + } + + if (!fits) + { + continue; + } + + blockList.AddBlock(curBlock, firstWordIndex, lastWordIndex, firstWord, lastWord); + placed = true; + break; + } + + // No suitable bin was found, create a new bin/BinTracker and add it to the placedBlocks vector + if (!placed) + { + placedBlocks.emplace_back(BinTracker{maxLifetime}); + placedBlocks.back().AddBlock(curBlock, firstWordIndex, lastWordIndex, firstWord, lastWord); + } + } +} + +std::vector SingleAxisPriorityList::Optimize(std::vector& blocks) +{ + unsigned int maxLifetime = 0; + std::list priorityList; + for (auto& block: blocks) + { + maxLifetime = std::max(maxLifetime, block.m_EndOfLife); + priorityList.emplace_back(&block); + } + maxLifetime++; + + // From testing ordering by m_MemSize in non-descending order gives the best results overall + priorityList.sort([](const MemBlock* lhs, const MemBlock* rhs) + { + return lhs->m_MemSize > rhs->m_MemSize ; + }); + + + std::vector placedBlocks; + placedBlocks.reserve(maxLifetime); + PlaceBlocks(priorityList, placedBlocks, maxLifetime); + + std::vector bins; + bins.reserve(placedBlocks.size()); + for (auto blockList: placedBlocks) + { + MemBin bin; + bin.m_MemBlocks.reserve(blockList.m_PlacedBlocks.size()); + bin.m_MemSize = blockList.m_MemSize; + + for (auto block : blockList.m_PlacedBlocks) + { + bin.m_MemBlocks.emplace_back(MemBlock{block->m_StartOfLife, + block->m_EndOfLife, + block->m_MemSize, + 0, + block->m_Index,}); + } + bins.push_back(std::move(bin)); + } + + return bins; +} + +} // namespace armnn + diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp new file mode 100644 index 0000000000..c765c31a18 --- /dev/null +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace armnn +{ + + /// SingleAxisPriorityList sorts the MemBlocks according to some priority, + /// then trys to place them into as few bins as possible + class SingleAxisPriorityList : public IMemoryOptimizerStrategy + { + public: + SingleAxisPriorityList() + : m_Name(std::string("SingleAxisPriorityList")) + , m_MemBlockStrategyType(MemBlockStrategyType::SingleAxisPacking) {} + + std::string GetName() const override; + + MemBlockStrategyType GetMemBlockStrategyType() const override; + + std::vector Optimize(std::vector& memBlocks) override; + + private: + + // Tracks all memBlocks and their positions in a bin as well as their maximum memSize + struct BinTracker; + + // PlaceBlocks takes a list of MemBlock* and fits them into n bins. + // A block can only fit into an existing bin if it's lifetime does not overlap with the lifetime of the + // blocks already in a bin. + // If no appropriate bin is available a new one is created. + void PlaceBlocks(const std::list& priorityList, + std::vector& placedBlocks, + const unsigned int maxLifetime); + + std::string m_Name; + MemBlockStrategyType m_MemBlockStrategyType; + }; + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/CMakeLists.txt b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/CMakeLists.txt index b96782a84d..3068b609f6 100644 --- a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/CMakeLists.txt +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/CMakeLists.txt @@ -6,6 +6,7 @@ list(APPEND armnnMemoryOptimizationStrategiesUnitTests_sources ConstMemoryStrategyTests.cpp ValidatorStrategyTests.cpp + SingleAxisPriorityListTests.cpp ) add_library(armnnMemoryOptimizationStrategiesUnitTests OBJECT ${armnnMemoryOptimizationStrategiesUnitTests_sources}) diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/SingleAxisPriorityListTests.cpp b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/SingleAxisPriorityListTests.cpp new file mode 100644 index 0000000000..516f6f3771 --- /dev/null +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/SingleAxisPriorityListTests.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include "TestMemBlocks.hpp" + +#include +#include + +using namespace armnn; + +TEST_SUITE("SingleAxisPriorityListTestSuite") +{ + TEST_CASE("SingleAxisPriorityListTest") + { + std::vector memBlocks = fsrcnn; + + auto singleAxisPriorityList = std::make_shared(); + + CHECK_EQ(singleAxisPriorityList->GetName(), std::string("SingleAxisPriorityList")); + CHECK_EQ(singleAxisPriorityList->GetMemBlockStrategyType(), MemBlockStrategyType::SingleAxisPacking); + + StrategyValidator validator; + validator.SetStrategy(singleAxisPriorityList); + + std::vector memBins; + + CHECK_NOTHROW(memBins = validator.Optimize(memBlocks)); + + size_t minMemSize = GetMinPossibleMemorySize(memBlocks); + size_t actualSize = 0; + for (auto memBin : memBins) + { + actualSize += memBin.m_MemSize; + } + + CHECK(minMemSize == actualSize); + } +} diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/TestMemBlocks.hpp b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/TestMemBlocks.hpp new file mode 100644 index 0000000000..09369d1efa --- /dev/null +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/test/TestMemBlocks.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +size_t GetMinPossibleMemorySize(const std::vector& blocks) +{ + unsigned int maxLifetime = 0; + for (auto& block: blocks) + { + maxLifetime = std::max(maxLifetime, block.m_EndOfLife); + } + maxLifetime++; + + std::vector lifetimes(maxLifetime); + for (const auto& block : blocks) + { + for (auto lifetime = block.m_StartOfLife; lifetime <= block.m_EndOfLife; ++lifetime) + { + lifetimes[lifetime] += block.m_MemSize; + } + } + return *std::max_element(lifetimes.begin(), lifetimes.end()); +} + +// Generated from fsrcnn_720p.tflite +std::vector fsrcnn +{ + { 0, 1, 691200, 0, 0 }, + { 1, 3, 7372800, 0, 1 }, + { 2, 5, 7372800, 0, 2 }, + { 3, 7, 1843200, 0, 3 }, + { 4, 9, 1843200, 0, 4 }, + { 5, 11, 1843200, 0, 5 }, + { 6, 13, 1843200, 0, 6 }, + { 7, 15, 1843200, 0, 7 }, + { 8, 17, 1843200, 0, 8 }, + { 9, 19, 7372800, 0, 9 }, + { 10, 21, 7372800, 0, 10 }, + { 11, 23, 2764800, 0, 11 }, + { 12, 25, 2764800, 0, 12 }, + { 13, 27, 2764800, 0, 13 } +}; \ No newline at end of file -- cgit v1.2.1