aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-06-18 18:13:51 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:53:20 +0000
commit17b12307edeaf488cfdf0cc3fa00b8f08293c93e (patch)
treed7025d84a03d9897b4811ade16a2a1952bdb09c4 /arm_compute/runtime
parent09b19129b65c5b8d1ca1c3851bab919bb9b7e1a1 (diff)
downloadComputeLibrary-17b12307edeaf488cfdf0cc3fa00b8f08293c93e.tar.gz
COMPMID-1293: Handle aligned allocations
Change-Id: I6e642c8cd968240f883c327464519e57e5d0c3e3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/136088 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r--arm_compute/runtime/ITensorAllocator.h15
-rw-r--r--arm_compute/runtime/MemoryRegion.h29
-rw-r--r--arm_compute/runtime/NEON/AssemblyHelper.h9
3 files changed, 34 insertions, 19 deletions
diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h
index 6103e436bc..bb708f0b97 100644
--- a/arm_compute/runtime/ITensorAllocator.h
+++ b/arm_compute/runtime/ITensorAllocator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,9 +50,10 @@ public:
/** Initialize a tensor based on the passed @ref TensorInfo.
*
- * @param[in] input TensorInfo object containing the description of the tensor to initialize.
+ * @param[in] input TensorInfo object containing the description of the tensor to initialize.
+ * @param[in] alignment Alignment in bytes that the underlying base pointer should comply with.
*/
- void init(const TensorInfo &input);
+ void init(const TensorInfo &input, size_t alignment = 0);
/** Return a reference to the tensor's metadata
*
* @return Reference to the tensor's metadata.
@@ -63,6 +64,11 @@ public:
* @return Constant reference to the tensor's metadata.
*/
const TensorInfo &info() const;
+ /** Return underlying's tensor buffer alignment
+ *
+ * @return Tensor buffer alignment
+ */
+ size_t alignment() const;
/** Interface to be implemented by the child class to allocate the tensor.
*
@@ -87,7 +93,8 @@ protected:
virtual void unlock() = 0;
private:
- TensorInfo _info; /**< Tensor's metadata. */
+ TensorInfo _info; /**< Tensor's metadata. */
+ size_t _alignment; /**< Tensor's alignment in bytes */
};
}
#endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */
diff --git a/arm_compute/runtime/MemoryRegion.h b/arm_compute/runtime/MemoryRegion.h
index bf4e1719de..481b20d375 100644
--- a/arm_compute/runtime/MemoryRegion.h
+++ b/arm_compute/runtime/MemoryRegion.h
@@ -27,6 +27,7 @@
#include "arm_compute/runtime/IMemoryRegion.h"
#include "arm_compute/core/Error.h"
+#include "support/ToolchainSupport.h"
#include <cstddef>
@@ -38,18 +39,28 @@ class MemoryRegion final : public IMemoryRegion
public:
/** Default constructor
*
- * @param[in] size Region size
+ * @param[in] size Region size
+ * @param[in] alignment Alignment in bytes of the base pointer. Defaults to 0
*/
- MemoryRegion(size_t size)
- : IMemoryRegion(size), _mem(nullptr), _ptr(nullptr)
+ MemoryRegion(size_t size, size_t alignment = 0)
+ : IMemoryRegion(size), _mem(nullptr), _alignment(alignment), _offset(0)
{
if(size != 0)
{
- _mem = std::shared_ptr<uint8_t>(new uint8_t[size](), [](uint8_t *ptr)
+ // Allocate backing memory
+ size_t space = size + alignment;
+ _mem = std::shared_ptr<uint8_t>(new uint8_t[space](), [](uint8_t *ptr)
{
delete[] ptr;
});
- _ptr = _mem.get();
+
+ // Calculate alignment offset
+ if(alignment != 0)
+ {
+ void *aligned_ptr = _mem.get();
+ support::cpp11::align(alignment, size, aligned_ptr, space);
+ _offset = reinterpret_cast<uintptr_t>(aligned_ptr) - reinterpret_cast<uintptr_t>(_mem.get());
+ }
}
}
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -64,11 +75,12 @@ public:
// Inherited methods overridden :
void *buffer() final
{
- return _mem.get();
+ return reinterpret_cast<void *>(_mem.get() + _offset);
}
void *buffer() const final
{
- return _mem.get();
+ // FIXME (COMPMID-1088) : Remove handle() and _offset when done
+ return reinterpret_cast<void *>(_mem.get() + _offset);
}
void **handle() final
{
@@ -77,7 +89,8 @@ public:
protected:
std::shared_ptr<uint8_t> _mem;
- uint8_t *_ptr;
+ size_t _alignment;
+ size_t _offset;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_RUNTIME_MEMORY_REGION_H__ */
diff --git a/arm_compute/runtime/NEON/AssemblyHelper.h b/arm_compute/runtime/NEON/AssemblyHelper.h
index c4ba1a584e..5801c42684 100644
--- a/arm_compute/runtime/NEON/AssemblyHelper.h
+++ b/arm_compute/runtime/NEON/AssemblyHelper.h
@@ -89,13 +89,8 @@ public:
const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer());
const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);
- // Forcing 128-byte alignment (required by 32-bit kernels)
- const unsigned int alignment = 128;
- void *raw_ptr = reinterpret_cast<void *>(_pretranspose->buffer());
- size_t space = _pretranspose->info()->total_size();
- void *aligned_ptr = support::cpp11::align(alignment, _gemm_kernel_asm->get_B_pretransposed_array_size(), raw_ptr, space);
ARM_COMPUTE_ERROR_ON(_pretranspose == nullptr || _pretranspose->buffer() == nullptr);
- _gemm_kernel_asm->pretranspose_B_array(aligned_ptr, in1_ptr, ldb, multi_stride_b);
+ _gemm_kernel_asm->pretranspose_B_array(_pretranspose->buffer(), in1_ptr, ldb, multi_stride_b);
_b->mark_as_unused();
}
@@ -169,7 +164,7 @@ using AssemblyKernelGlueS8S32 = AssemblyKernelGlue<int8_t, int32_t>;
inline void allocate_workspace(size_t workspace_size, Tensor &workspace, MemoryGroup *memory_group, size_t alignment, unsigned int num_threads)
{
ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0");
- workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment - 1) * num_threads }, 1, DataType::S8));
+ workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment) * num_threads }, 1, DataType::S8), alignment);
if(memory_group != nullptr)
{
memory_group->manage(&workspace);