From 17b12307edeaf488cfdf0cc3fa00b8f08293c93e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 18 Jun 2018 18:13:51 +0100 Subject: COMPMID-1293: Handle aligned allocations Change-Id: I6e642c8cd968240f883c327464519e57e5d0c3e3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/136088 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/core/utils/misc/Utility.h | 24 ++++++++++++++++++--- arm_compute/runtime/ITensorAllocator.h | 15 +++++++++---- arm_compute/runtime/MemoryRegion.h | 29 +++++++++++++++++++------- arm_compute/runtime/NEON/AssemblyHelper.h | 9 ++------ src/runtime/ITensorAllocator.cpp | 14 +++++++++---- src/runtime/TensorAllocator.cpp | 2 +- tests/validation/NEON/UNIT/TensorAllocator.cpp | 19 +++++++++++++++++ 7 files changed, 85 insertions(+), 27 deletions(-) diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index f30a417a09..a2784a2fc0 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -165,15 +165,33 @@ std::vector sort_indices(const std::vector &v) return idx; } -inline bool endswith(const std::string &filename, const std::string &suffix) +/** Checks if a string contains a given suffix + * + * @param[in] str Input string + * @param[in] suffix Suffix to check for + * + * @return True if the string ends with the given suffix else false + */ +inline bool endswith(const std::string &str, const std::string &suffix) { - if(filename.size() < suffix.size()) + if(str.size() < suffix.size()) { return false; } - return std::equal(suffix.rbegin(), suffix.rend(), filename.rbegin()); + return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin()); } +/** Checks if a pointer complies with a given alignment + * + * @param[in] ptr Pointer to check + * @param[in] alignment Alignment value + * + * @return True if the pointer is aligned else false + */ +inline bool check_aligned(void *ptr, const size_t alignment) +{ + return (reinterpret_cast(ptr) % alignment) == 0; +} } // namespace utility } // namespace arm_compute #endif /* __ARM_COMPUTE_MISC_UTILITY_H__ */ diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h index 6103e436bc..bb708f0b97 100644 --- a/arm_compute/runtime/ITensorAllocator.h +++ b/arm_compute/runtime/ITensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,9 +50,10 @@ public: /** Initialize a tensor based on the passed @ref TensorInfo. * - * @param[in] input TensorInfo object containing the description of the tensor to initialize. + * @param[in] input TensorInfo object containing the description of the tensor to initialize. + * @param[in] alignment Alignment in bytes that the underlying base pointer should comply with. */ - void init(const TensorInfo &input); + void init(const TensorInfo &input, size_t alignment = 0); /** Return a reference to the tensor's metadata * * @return Reference to the tensor's metadata. @@ -63,6 +64,11 @@ public: * @return Constant reference to the tensor's metadata. */ const TensorInfo &info() const; + /** Return underlying's tensor buffer alignment + * + * @return Tensor buffer alignment + */ + size_t alignment() const; /** Interface to be implemented by the child class to allocate the tensor. * @@ -87,7 +93,8 @@ protected: virtual void unlock() = 0; private: - TensorInfo _info; /**< Tensor's metadata. */ + TensorInfo _info; /**< Tensor's metadata. */ + size_t _alignment; /**< Tensor's alignment in bytes */ }; } #endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/MemoryRegion.h b/arm_compute/runtime/MemoryRegion.h index bf4e1719de..481b20d375 100644 --- a/arm_compute/runtime/MemoryRegion.h +++ b/arm_compute/runtime/MemoryRegion.h @@ -27,6 +27,7 @@ #include "arm_compute/runtime/IMemoryRegion.h" #include "arm_compute/core/Error.h" +#include "support/ToolchainSupport.h" #include @@ -38,18 +39,28 @@ class MemoryRegion final : public IMemoryRegion public: /** Default constructor * - * @param[in] size Region size + * @param[in] size Region size + * @param[in] alignment Alignment in bytes of the base pointer. Defaults to 0 */ - MemoryRegion(size_t size) - : IMemoryRegion(size), _mem(nullptr), _ptr(nullptr) + MemoryRegion(size_t size, size_t alignment = 0) + : IMemoryRegion(size), _mem(nullptr), _alignment(alignment), _offset(0) { if(size != 0) { - _mem = std::shared_ptr(new uint8_t[size](), [](uint8_t *ptr) + // Allocate backing memory + size_t space = size + alignment; + _mem = std::shared_ptr(new uint8_t[space](), [](uint8_t *ptr) { delete[] ptr; }); - _ptr = _mem.get(); + + // Calculate alignment offset + if(alignment != 0) + { + void *aligned_ptr = _mem.get(); + support::cpp11::align(alignment, size, aligned_ptr, space); + _offset = reinterpret_cast(aligned_ptr) - reinterpret_cast(_mem.get()); + } } } /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -64,11 +75,12 @@ public: // Inherited methods overridden : void *buffer() final { - return _mem.get(); + return reinterpret_cast(_mem.get() + _offset); } void *buffer() const final { - return _mem.get(); + // FIXME (COMPMID-1088) : Remove handle() and _offset when done + return reinterpret_cast(_mem.get() + _offset); } void **handle() final { @@ -77,7 +89,8 @@ public: protected: std::shared_ptr _mem; - uint8_t *_ptr; + size_t _alignment; + size_t _offset; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_RUNTIME_MEMORY_REGION_H__ */ diff --git a/arm_compute/runtime/NEON/AssemblyHelper.h b/arm_compute/runtime/NEON/AssemblyHelper.h index c4ba1a584e..5801c42684 100644 --- a/arm_compute/runtime/NEON/AssemblyHelper.h +++ b/arm_compute/runtime/NEON/AssemblyHelper.h @@ -89,13 +89,8 @@ public: const auto in1_ptr = reinterpret_cast(_b->buffer()); const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput); - // Forcing 128-byte alignment (required by 32-bit kernels) - const unsigned int alignment = 128; - void *raw_ptr = reinterpret_cast(_pretranspose->buffer()); - size_t space = _pretranspose->info()->total_size(); - void *aligned_ptr = support::cpp11::align(alignment, _gemm_kernel_asm->get_B_pretransposed_array_size(), raw_ptr, space); ARM_COMPUTE_ERROR_ON(_pretranspose == nullptr || _pretranspose->buffer() == nullptr); - _gemm_kernel_asm->pretranspose_B_array(aligned_ptr, in1_ptr, ldb, multi_stride_b); + _gemm_kernel_asm->pretranspose_B_array(_pretranspose->buffer(), in1_ptr, ldb, multi_stride_b); _b->mark_as_unused(); } @@ -169,7 +164,7 @@ using AssemblyKernelGlueS8S32 = AssemblyKernelGlue; inline void allocate_workspace(size_t workspace_size, Tensor &workspace, MemoryGroup *memory_group, size_t alignment, unsigned int num_threads) { ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0"); - workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment - 1) * num_threads }, 1, DataType::S8)); + workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment) * num_threads }, 1, DataType::S8), alignment); if(memory_group != nullptr) { memory_group->manage(&workspace); diff --git a/src/runtime/ITensorAllocator.cpp b/src/runtime/ITensorAllocator.cpp index 8294201384..087f324922 100644 --- a/src/runtime/ITensorAllocator.cpp +++ b/src/runtime/ITensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -31,13 +31,14 @@ using namespace arm_compute; ITensorAllocator::ITensorAllocator() - : _info() + : _info(), _alignment(0) { } -void ITensorAllocator::init(const TensorInfo &input) +void ITensorAllocator::init(const TensorInfo &input, size_t alignment) { - _info = input; + _info = input; + _alignment = alignment; } TensorInfo &ITensorAllocator::info() @@ -49,3 +50,8 @@ const TensorInfo &ITensorAllocator::info() const { return _info; } + +size_t ITensorAllocator::alignment() const +{ + return _alignment; +} diff --git a/src/runtime/TensorAllocator.cpp b/src/runtime/TensorAllocator.cpp index 993a95b6c3..c84a2719d8 100644 --- a/src/runtime/TensorAllocator.cpp +++ b/src/runtime/TensorAllocator.cpp @@ -138,7 +138,7 @@ void TensorAllocator::allocate() if(_associated_memory_group == nullptr) { - _memory = Memory(std::make_shared(info().total_size())); + _memory = Memory(std::make_shared(info().total_size(), alignment())); } else { diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp index 872054f3d1..7781107210 100644 --- a/tests/validation/NEON/UNIT/TensorAllocator.cpp +++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp @@ -23,9 +23,12 @@ */ #include "arm_compute/runtime/TensorAllocator.h" +#include "arm_compute/core/utils/misc/Utility.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/MemoryRegion.h" + #include "support/ToolchainSupport.h" + #include "tests/Utils.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" @@ -82,6 +85,22 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(t4.buffer() == nullptr, framework::LogLevel::ERRORS); } +TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL) +{ + // Init tensor info + TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32); + const size_t requested_alignment = 1024; + + Tensor t; + t.allocator()->init(info, requested_alignment); + t.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(t.buffer() != nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(t.allocator()->alignment() == requested_alignment, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::utility::check_aligned(reinterpret_cast(t.buffer()), requested_alignment), + framework::LogLevel::ERRORS); +} + TEST_SUITE_END() TEST_SUITE_END() TEST_SUITE_END() -- cgit v1.2.1