diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/utils/misc/Utility.h | 24 | ||||
-rw-r--r-- | arm_compute/runtime/ITensorAllocator.h | 15 | ||||
-rw-r--r-- | arm_compute/runtime/MemoryRegion.h | 29 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/AssemblyHelper.h | 9 |
4 files changed, 55 insertions, 22 deletions
diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index f30a417a09..a2784a2fc0 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -165,15 +165,33 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) return idx; } -inline bool endswith(const std::string &filename, const std::string &suffix) +/** Checks if a string contains a given suffix + * + * @param[in] str Input string + * @param[in] suffix Suffix to check for + * + * @return True if the string ends with the given suffix else false + */ +inline bool endswith(const std::string &str, const std::string &suffix) { - if(filename.size() < suffix.size()) + if(str.size() < suffix.size()) { return false; } - return std::equal(suffix.rbegin(), suffix.rend(), filename.rbegin()); + return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin()); } +/** Checks if a pointer complies with a given alignment + * + * @param[in] ptr Pointer to check + * @param[in] alignment Alignment value + * + * @return True if the pointer is aligned else false + */ +inline bool check_aligned(void *ptr, const size_t alignment) +{ + return (reinterpret_cast<std::uintptr_t>(ptr) % alignment) == 0; +} } // namespace utility } // namespace arm_compute #endif /* __ARM_COMPUTE_MISC_UTILITY_H__ */ diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h index 6103e436bc..bb708f0b97 100644 --- a/arm_compute/runtime/ITensorAllocator.h +++ b/arm_compute/runtime/ITensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,9 +50,10 @@ public: /** Initialize a tensor based on the passed @ref TensorInfo. * - * @param[in] input TensorInfo object containing the description of the tensor to initialize. + * @param[in] input TensorInfo object containing the description of the tensor to initialize. + * @param[in] alignment Alignment in bytes that the underlying base pointer should comply with. */ - void init(const TensorInfo &input); + void init(const TensorInfo &input, size_t alignment = 0); /** Return a reference to the tensor's metadata * * @return Reference to the tensor's metadata. @@ -63,6 +64,11 @@ public: * @return Constant reference to the tensor's metadata. */ const TensorInfo &info() const; + /** Return underlying's tensor buffer alignment + * + * @return Tensor buffer alignment + */ + size_t alignment() const; /** Interface to be implemented by the child class to allocate the tensor. * @@ -87,7 +93,8 @@ protected: virtual void unlock() = 0; private: - TensorInfo _info; /**< Tensor's metadata. */ + TensorInfo _info; /**< Tensor's metadata. */ + size_t _alignment; /**< Tensor's alignment in bytes */ }; } #endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/MemoryRegion.h b/arm_compute/runtime/MemoryRegion.h index bf4e1719de..481b20d375 100644 --- a/arm_compute/runtime/MemoryRegion.h +++ b/arm_compute/runtime/MemoryRegion.h @@ -27,6 +27,7 @@ #include "arm_compute/runtime/IMemoryRegion.h" #include "arm_compute/core/Error.h" +#include "support/ToolchainSupport.h" #include <cstddef> @@ -38,18 +39,28 @@ class MemoryRegion final : public IMemoryRegion public: /** Default constructor * - * @param[in] size Region size + * @param[in] size Region size + * @param[in] alignment Alignment in bytes of the base pointer. Defaults to 0 */ - MemoryRegion(size_t size) - : IMemoryRegion(size), _mem(nullptr), _ptr(nullptr) + MemoryRegion(size_t size, size_t alignment = 0) + : IMemoryRegion(size), _mem(nullptr), _alignment(alignment), _offset(0) { if(size != 0) { - _mem = std::shared_ptr<uint8_t>(new uint8_t[size](), [](uint8_t *ptr) + // Allocate backing memory + size_t space = size + alignment; + _mem = std::shared_ptr<uint8_t>(new uint8_t[space](), [](uint8_t *ptr) { delete[] ptr; }); - _ptr = _mem.get(); + + // Calculate alignment offset + if(alignment != 0) + { + void *aligned_ptr = _mem.get(); + support::cpp11::align(alignment, size, aligned_ptr, space); + _offset = reinterpret_cast<uintptr_t>(aligned_ptr) - reinterpret_cast<uintptr_t>(_mem.get()); + } } } /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -64,11 +75,12 @@ public: // Inherited methods overridden : void *buffer() final { - return _mem.get(); + return reinterpret_cast<void *>(_mem.get() + _offset); } void *buffer() const final { - return _mem.get(); + // FIXME (COMPMID-1088) : Remove handle() and _offset when done + return reinterpret_cast<void *>(_mem.get() + _offset); } void **handle() final { @@ -77,7 +89,8 @@ public: protected: std::shared_ptr<uint8_t> _mem; - uint8_t *_ptr; + size_t _alignment; + size_t _offset; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_RUNTIME_MEMORY_REGION_H__ */ diff --git a/arm_compute/runtime/NEON/AssemblyHelper.h b/arm_compute/runtime/NEON/AssemblyHelper.h index c4ba1a584e..5801c42684 100644 --- a/arm_compute/runtime/NEON/AssemblyHelper.h +++ b/arm_compute/runtime/NEON/AssemblyHelper.h @@ -89,13 +89,8 @@ public: const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer()); const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput); - // Forcing 128-byte alignment (required by 32-bit kernels) - const unsigned int alignment = 128; - void *raw_ptr = reinterpret_cast<void *>(_pretranspose->buffer()); - size_t space = _pretranspose->info()->total_size(); - void *aligned_ptr = support::cpp11::align(alignment, _gemm_kernel_asm->get_B_pretransposed_array_size(), raw_ptr, space); ARM_COMPUTE_ERROR_ON(_pretranspose == nullptr || _pretranspose->buffer() == nullptr); - _gemm_kernel_asm->pretranspose_B_array(aligned_ptr, in1_ptr, ldb, multi_stride_b); + _gemm_kernel_asm->pretranspose_B_array(_pretranspose->buffer(), in1_ptr, ldb, multi_stride_b); _b->mark_as_unused(); } @@ -169,7 +164,7 @@ using AssemblyKernelGlueS8S32 = AssemblyKernelGlue<int8_t, int32_t>; inline void allocate_workspace(size_t workspace_size, Tensor &workspace, MemoryGroup *memory_group, size_t alignment, unsigned int num_threads) { ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0"); - workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment - 1) * num_threads }, 1, DataType::S8)); + workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment) * num_threads }, 1, DataType::S8), alignment); if(memory_group != nullptr) { memory_group->manage(&workspace); |