aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-03-19 19:57:01 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commitceff0f9a991b693f568c25b1e0933582301082e7 (patch)
tree3938e9b2151fc6b3d42d2bab9f66efa4709a5da1
parent7db3a30cd15659af75938cd964eebb17a12c6e81 (diff)
downloadComputeLibrary-ceff0f9a991b693f568c25b1e0933582301082e7.tar.gz
COMPMID-1016: Optimize kernel reconfiguration
Optimizes kernel reconfiguration when memory manager is used. Note that this works only if every sub-sequent reconfigurations leads to sizes less than the first one. Change-Id: I08898e99929c3756147a02979b726c2380b6e11d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125114 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h4
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h2
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h17
-rw-r--r--arm_compute/runtime/MemoryGroupBase.h8
-rw-r--r--src/runtime/CL/CLTensorAllocator.cpp9
-rw-r--r--src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp5
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp10
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp4
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp2
-rw-r--r--tests/validation/CL/UNIT/MemoryManager.cpp70
-rw-r--r--tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp99
-rw-r--r--tests/validation/fixtures/UNIT/MemoryManagerFixture.h411
12 files changed, 618 insertions, 23 deletions
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h
index 103ac37bdd..8fa13e59a6 100644
--- a/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h
+++ b/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h
@@ -33,7 +33,7 @@
namespace arm_compute
{
-/** Default gles buffer allocator implementation */
+/** Default GLES buffer allocator implementation */
class GCBufferAllocator : public IAllocator
{
public:
@@ -44,5 +44,5 @@ public:
void *allocate(size_t size, size_t alignment) override;
void free(void *ptr) override;
};
-} // arm_compute
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_GCBUFFERALLOCATOR_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h b/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h
index 485aa0e024..10f4fc6b05 100644
--- a/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h
+++ b/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h
@@ -44,5 +44,5 @@ inline void MemoryGroupBase<GCTensor>::associate_memory_group(GCTensor *obj)
ARM_COMPUTE_ERROR_ON(allocator == nullptr);
allocator->set_associated_memory_group(this);
}
-} // arm_compute
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_GCMEMORYGROUP_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
index 81be1de21a..1f8dc3e1a0 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
@@ -67,14 +67,17 @@ public:
GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
- * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
- * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
+ * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
+ * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
+ * @param[in] retain_internal_weights (Optional) Retain internal reshaped weights. Defaults to false.
+ * Used for reconfiguration purposes.
*/
- void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
+ void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output,
+ bool transpose_weights = true, bool are_weights_reshaped = false, bool retain_internal_weights = false);
//Inherited methods override
void run() override;
diff --git a/arm_compute/runtime/MemoryGroupBase.h b/arm_compute/runtime/MemoryGroupBase.h
index dc640f10a6..06e4321410 100644
--- a/arm_compute/runtime/MemoryGroupBase.h
+++ b/arm_compute/runtime/MemoryGroupBase.h
@@ -96,7 +96,7 @@ inline MemoryGroupBase<TensorType>::MemoryGroupBase(std::shared_ptr<IMemoryManag
template <typename TensorType>
inline void MemoryGroupBase<TensorType>::manage(TensorType *obj)
{
- if(_memory_manager)
+ if(_memory_manager && _mappings.empty())
{
ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager());
@@ -114,7 +114,11 @@ inline void MemoryGroupBase<TensorType>::manage(TensorType *obj)
template <typename TensorType>
inline void MemoryGroupBase<TensorType>::finalize_memory(TensorType *obj, void **handle, size_t size)
{
- if(_memory_manager)
+ // TODO (geopin01) : Check size (track size in MemoryMappings)
+ // Check if existing mapping is valid
+ ARM_COMPUTE_ERROR_ON(!_mappings.empty() && (_mappings.find(handle) == std::end(_mappings)));
+
+ if(_memory_manager && _mappings.empty())
{
ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager());
_memory_manager->lifetime_manager()->end_lifetime(obj, handle, size);
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index 705c4edd60..c5524b1ccb 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Log.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
@@ -58,11 +57,7 @@ void *SVMMemory::allocate(cl_context context, size_t size, cl_svm_mem_flags flag
ARM_COMPUTE_ERROR_ON(_ptr != nullptr);
ARM_COMPUTE_ERROR_ON(size > CL_DEVICE_MAX_MEM_ALLOC_SIZE);
_ptr = clSVMAlloc(context, flags, size, alignment);
- if(_ptr == nullptr)
- {
- ARM_COMPUTE_LOG_INFO_MSG_CORE("Call to clSVMAlloc() failed.");
- }
- else
+ if(_ptr != nullptr)
{
_size = size;
_fine_grain = static_cast<bool>(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER);
@@ -76,9 +71,9 @@ void *CLTensorAllocator::svm_ptr()
void CLTensorAllocator::allocate()
{
- ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr);
if(_associated_memory_group == nullptr)
{
+ ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr);
if(_svm_memory.allocate(CLScheduler::get().context()(), CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, info().total_size(), 0) == nullptr)
{
// try at coarse grain svm memory
diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
index 50e3cc7c1c..d8f6867634 100644
--- a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
+++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
@@ -29,8 +29,8 @@
#include <cstddef>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void *GCBufferAllocator::allocate(size_t size, size_t alignment)
{
ARM_COMPUTE_UNUSED(alignment);
@@ -48,3 +48,4 @@ void GCBufferAllocator::free(void *ptr)
auto *gl_buffer = reinterpret_cast<GLBufferWrapper *>(ptr);
delete gl_buffer;
}
+} // namespace arm_compute
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
index 0f8f8e6c94..a300033bb2 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
@@ -79,7 +79,8 @@ void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTen
_mm_kernel.configure(input, weights, output, 1.0f, false);
}
-void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped)
+void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output,
+ bool transpose_weights, bool are_weights_reshaped, bool retain_internal_weights)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
@@ -141,11 +142,14 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w
}
// Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
- if(!_are_weights_reshaped)
+ if(!_are_weights_reshaped && !retain_internal_weights)
{
// Allocate the tensor for the weights reshaped
_reshape_weights_output.allocator()->allocate();
}
+
+ ARM_COMPUTE_ERROR_ON(retain_internal_weights && _reshape_weights_output.gc_buffer() == 0);
+ _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights;
}
void GCFullyConnectedLayer::run()
@@ -158,6 +162,7 @@ void GCFullyConnectedLayer::run()
}
_memory_group.acquire();
+
// Linearize input if it comes from a convolutional layer
if(_is_fc_after_conv)
{
@@ -179,5 +184,6 @@ void GCFullyConnectedLayer::run()
GCScheduler::get().dispatch(_accumulate_biases_kernel);
}
+
_memory_group.release();
}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
index 46424a59f5..9c8568a329 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
@@ -92,6 +92,10 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *
TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position());
_tmp_b.allocator()->init(info_b);
+ if(!gemm_info.reshape_b_only_on_first_run())
+ {
+ _memory_group.manage(&_tmp_b);
+ }
// Configure interleave kernel
_interleave_kernel.configure(a, &_tmp_a);
diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
index 13213d2b54..b2e69ee8c6 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
@@ -57,10 +57,12 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output,
void GCNormalizationLayer::run()
{
_memory_group.acquire();
+
GCScheduler::get().dispatch(_multiply_kernel, false);
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(_border_handler, false);
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(_norm_kernel, true);
+
_memory_group.release();
}
diff --git a/tests/validation/CL/UNIT/MemoryManager.cpp b/tests/validation/CL/UNIT/MemoryManager.cpp
new file mode 100644
index 0000000000..2129c03243
--- /dev/null
+++ b/tests/validation/CL/UNIT/MemoryManager.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "support/ToolchainSupport.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/UNIT/MemoryManagerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.05f);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(MemoryManager)
+
+using CLBlobMemoryManagerSimpleWithinFunctionLevelFixture = BlobMemoryManagerSimpleTestCaseFixture<CLTensor,
+ CLAccessor,
+ CLBufferAllocator,
+ CLFullyConnectedLayer>;
+FIXTURE_TEST_CASE(BlobMemoryManagerSimpleWithinFunctionLevel,
+ CLBlobMemoryManagerSimpleWithinFunctionLevelFixture,
+ framework::DatasetMode::ALL)
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp b/tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp
new file mode 100644
index 0000000000..8f59a05b87
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+#include "support/ToolchainSupport.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/UNIT/MemoryManagerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.05f);
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(UNIT)
+TEST_SUITE(MemoryManager)
+
+// Setting BlobMemoryManagerSimpleWithinFunctionLevel test
+using GCBlobMemoryManagerSimpleWithinFunctionLevelFixture = BlobMemoryManagerSimpleTestCaseFixture<GCTensor,
+ GCAccessor,
+ GCBufferAllocator,
+ GCFullyConnectedLayer>;
+FIXTURE_TEST_CASE(BlobMemoryManagerSimpleWithinFunctionLevel,
+ GCBlobMemoryManagerSimpleWithinFunctionLevelFixture,
+ framework::DatasetMode::ALL)
+{
+ // Validate output
+ validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+
+// Setting BlobMemoryManagerReconfigure test
+using GCBlobMemoryManagerReconfigureFixture = BlobMemoryManagerReconfigureTestCaseFixture<GCTensor,
+ GCAccessor,
+ GCBufferAllocator,
+ GCFullyConnectedLayer>;
+FIXTURE_TEST_CASE(BlobMemoryManagerReconfigure,
+ GCBlobMemoryManagerReconfigureFixture,
+ framework::DatasetMode::ALL)
+{
+ // Validate output
+ validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+
+// Setting BlobMemoryManagerReconfigure2 test
+using GCBlobMemoryManagerReconfigure2Fixture = BlobMemoryManagerReconfigure2TestCaseFixture<GCTensor,
+ GCAccessor,
+ GCBufferAllocator,
+ GCFullyConnectedLayer,
+ GCSoftmaxLayer>;
+FIXTURE_TEST_CASE(BlobMemoryManagerReconfigure2,
+ GCBlobMemoryManagerReconfigure2Fixture,
+ framework::DatasetMode::ALL)
+{
+ // Validate output
+ validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/UNIT/MemoryManagerFixture.h b/tests/validation/fixtures/UNIT/MemoryManagerFixture.h
new file mode 100644
index 0000000000..21ad42bf77
--- /dev/null
+++ b/tests/validation/fixtures/UNIT/MemoryManagerFixture.h
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER
+#define ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/FullyConnectedLayer.h"
+#include "tests/validation/reference/SoftmaxLayer.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Simple test case to run two fully connected layers using a blob affinity memory manager
+ *
+ * Runs two fully connected layers back to back
+ */
+template <typename TensorType, typename AccessorType, typename AllocatorType, typename FullyConnectedFunction>
+class BlobMemoryManagerSimpleTestCaseFixture : public framework::Fixture
+{
+ using T = float;
+
+public:
+ void setup()
+ {
+ _target = compute_target();
+ _reference = compute_reference();
+ };
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ std::uniform_real_distribution<> distribution(0.5f, 1.f);
+ library->fill(tensor, distribution, i);
+ }
+
+ TensorType compute_target()
+ {
+ auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+ auto pool_mgr = std::make_shared<PoolManager>();
+ auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+ // Create tensors
+ TensorType w1 = create_tensor<TensorType>(TensorShape(128U, 128U), DataType::F32, 1);
+ TensorType b1 = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1);
+ TensorType w2 = create_tensor<TensorType>(TensorShape(128U, 24U), DataType::F32, 1);
+ TensorType b2 = create_tensor<TensorType>(TensorShape(24U), DataType::F32, 1);
+ TensorType src = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1);
+ TensorType fc1 = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1);
+ TensorType dst = create_tensor<TensorType>(TensorShape(24U), DataType::F32, 1);
+
+ // Create and configure function
+ FullyConnectedFunction fc_layer_1(mm);
+ FullyConnectedFunction fc_layer_2(mm);
+ fc_layer_1.configure(&src, &w1, &b1, &fc1);
+ fc_layer_2.configure(&fc1, &w2, &b2, &dst);
+
+ // Allocate tensors
+ w1.allocator()->allocate();
+ b1.allocator()->allocate();
+ w2.allocator()->allocate();
+ b2.allocator()->allocate();
+ src.allocator()->allocate();
+ fc1.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ // Finalize memory manager
+ mm->set_allocator(&_allocator);
+ mm->set_num_pools(1);
+ mm->finalize();
+ ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+
+ // Fill tensors
+ fill(AccessorType(src), 0);
+ fill(AccessorType(w1), 1);
+ fill(AccessorType(b1), 2);
+ fill(AccessorType(w2), 3);
+ fill(AccessorType(b2), 4);
+
+ // Compute functions
+ fc_layer_1.run();
+ fc_layer_2.run();
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference()
+ {
+ // Create reference
+ SimpleTensor<T> w1{ TensorShape(128U, 128U), DataType::F32 };
+ SimpleTensor<T> b1{ TensorShape(128U), DataType::F32 };
+ SimpleTensor<T> w2{ TensorShape(128U, 24U), DataType::F32 };
+ SimpleTensor<T> b2{ TensorShape(24U), DataType::F32 };
+ SimpleTensor<T> src{ TensorShape(128U), DataType::F32 };
+
+ // Fill reference
+ fill(src, 0);
+ fill(w1, 1);
+ fill(b1, 2);
+ fill(w2, 3);
+ fill(b2, 4);
+
+ auto fc1 = reference::fully_connected_layer(src, w1, b1, TensorShape(128U));
+ return reference::fully_connected_layer(fc1, w2, b2, TensorShape(24U));
+ }
+
+protected:
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ AllocatorType _allocator{};
+};
+
+/** Test case to run two fully connected layers using a blob affinity memory manager,
+ * reconfigure with different shapes and rerun
+ *
+ * Runs two fully connected layers back to back then reconfigures with different batch size and reruns
+ * Shapes of the reconfigure step are smaller that the initial configured step
+ */
+template <typename TensorType, typename AccessorType, typename AllocatorType, typename FullyConnectedFunction>
+class BlobMemoryManagerReconfigureTestCaseFixture : public framework::Fixture
+{
+ using T = float;
+
+public:
+ void setup()
+ {
+ _max_batches = 8;
+ _cur_batches = 6;
+ _target = compute_target();
+ _reference = compute_reference();
+ };
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ std::uniform_real_distribution<> distribution(0.5f, 1.f);
+ library->fill(tensor, distribution, i);
+ }
+
+ TensorType compute_target()
+ {
+ AllocatorType allocator{};
+ auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+ auto pool_mgr = std::make_shared<PoolManager>();
+ auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+ // Create tensors
+ TensorType w1 = create_tensor<TensorType>(TensorShape(128U, 128U), DataType::F32, 1);
+ TensorType b1 = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1);
+ TensorType w2 = create_tensor<TensorType>(TensorShape(128U, 24U), DataType::F32, 1);
+ TensorType b2 = create_tensor<TensorType>(TensorShape(24U), DataType::F32, 1);
+ TensorType src = create_tensor<TensorType>(TensorShape(128U, _max_batches), DataType::F32, 1);
+ TensorType fc1 = create_tensor<TensorType>(TensorShape(128U, _max_batches), DataType::F32, 1);
+ TensorType dst = create_tensor<TensorType>(TensorShape(24U, _max_batches), DataType::F32, 1);
+
+ // Create and configure function
+ FullyConnectedFunction fc_layer_1(mm);
+ FullyConnectedFunction fc_layer_2(mm);
+ fc_layer_1.configure(&src, &w1, &b1, &fc1);
+ fc_layer_2.configure(&fc1, &w2, &b2, &dst);
+
+ // Allocate persistent tensors
+ w1.allocator()->allocate();
+ b1.allocator()->allocate();
+ w2.allocator()->allocate();
+ b2.allocator()->allocate();
+
+ // Allocate tensors (1st iteration)
+ src.allocator()->allocate();
+ fc1.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ // Finalize memory manager
+ mm->set_allocator(&allocator);
+ mm->set_num_pools(1);
+ mm->finalize();
+ ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+
+ // Fill tensors (1st iteration)
+ fill(AccessorType(src), 0);
+ fill(AccessorType(w1), 1);
+ fill(AccessorType(b1), 2);
+ fill(AccessorType(w2), 3);
+ fill(AccessorType(b2), 4);
+
+ // Compute functions (1st iteration)
+ fc_layer_1.run();
+ fc_layer_2.run();
+
+ // Update tensor shapes (2nd iteration)
+ auto src_padding = src.allocator()->info().padding();
+ auto fc1_padding = fc1.allocator()->info().padding();
+ auto dst_padding = dst.allocator()->info().padding();
+ int diff = _max_batches - _cur_batches;
+ auto new_src_padding = PaddingSize(src_padding.top, src_padding.right, src_padding.bottom + diff, src_padding.left);
+ auto new_fc1_padding = PaddingSize(fc1_padding.top, fc1_padding.right, fc1_padding.bottom + diff, fc1_padding.left);
+ auto new_dst_padding = PaddingSize(dst_padding.top, dst_padding.right, dst_padding.bottom + diff, dst_padding.left);
+ src.allocator()->info().set_tensor_shape(TensorShape(128U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding);
+ src.allocator()->info().set_is_resizable(false);
+ fc1.allocator()->info().set_tensor_shape(TensorShape(128U, _cur_batches)).set_is_resizable(true).extend_padding(new_fc1_padding);
+ fc1.allocator()->info().set_is_resizable(false);
+ dst.allocator()->info().set_tensor_shape(TensorShape(24U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding);
+ dst.allocator()->info().set_is_resizable(false);
+
+ // Configure functions (2nd iteration)
+ fc_layer_1.configure(&src, &w1, &b1, &fc1, true, false, true);
+ fc_layer_2.configure(&fc1, &w2, &b2, &dst, true, false, true);
+
+ // Fill tensors (2nd iteration)
+ fill(AccessorType(src), 5);
+
+ // Compute functions (2nd iteration)
+ fc_layer_1.run();
+ fc_layer_2.run();
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference()
+ {
+ // Create reference
+ SimpleTensor<T> w1{ TensorShape(128U, 128U), DataType::F32 };
+ SimpleTensor<T> b1{ TensorShape(128U), DataType::F32 };
+ SimpleTensor<T> w2{ TensorShape(128U, 24U), DataType::F32 };
+ SimpleTensor<T> b2{ TensorShape(24U), DataType::F32 };
+ SimpleTensor<T> src{ TensorShape(128U, _cur_batches), DataType::F32 };
+
+ // Fill reference
+ fill(src, 5);
+ fill(w1, 1);
+ fill(b1, 2);
+ fill(w2, 3);
+ fill(b2, 4);
+
+ auto fc1 = reference::fully_connected_layer(src, w1, b1, TensorShape(128U, _cur_batches));
+ return reference::fully_connected_layer(fc1, w2, b2, TensorShape(24U, _cur_batches));
+ }
+
+protected:
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ AllocatorType _allocator{};
+ unsigned int _max_batches{};
+ unsigned int _cur_batches{};
+};
+
+/** Test case to run a fully connected layer followed by a softmax layer using a blob affinity memory manager,
+ * reconfigure with different shapes and rerun
+ *
+ * Runs a fully connected convolution layer followed by a softmax layer then reconfigures with different batch size and reruns
+ * Shapes of the reconfigure step are smaller that the initial configured step
+ */
+template <typename TensorType, typename AccessorType, typename AllocatorType, typename FullyConnectedFunction, typename SoftmaxFunction>
+class BlobMemoryManagerReconfigure2TestCaseFixture : public framework::Fixture
+{
+ using T = float;
+
+public:
+ void setup()
+ {
+ _max_batches = 30;
+ _cur_batches = 3;
+ _target = compute_target();
+ _reference = compute_reference();
+ };
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ std::uniform_real_distribution<> distribution(0.5f, 1.f);
+ library->fill(tensor, distribution, i);
+ }
+
+ TensorType compute_target()
+ {
+ AllocatorType allocator{};
+ auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+ auto pool_mgr = std::make_shared<PoolManager>();
+ auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+ // Create tensors
+ TensorType w = create_tensor<TensorType>(TensorShape(112U, 8U), DataType::F32, 1);
+ TensorType b = create_tensor<TensorType>(TensorShape(8U), DataType::F32, 1);
+ TensorType src = create_tensor<TensorType>(TensorShape(1U, 1U, 112U, _max_batches), DataType::F32, 1);
+ TensorType fc = create_tensor<TensorType>(TensorShape(8U, _max_batches), DataType::F32, 1);
+ TensorType dst = create_tensor<TensorType>(TensorShape(8U, _max_batches), DataType::F32, 1);
+
+ // Create and configure function
+ FullyConnectedFunction fc_layer(mm);
+ SoftmaxFunction smx_layer(mm);
+ fc_layer.configure(&src, &w, &b, &fc);
+ smx_layer.configure(&fc, &dst);
+
+ // Allocate persistent tensors
+ w.allocator()->allocate();
+ b.allocator()->allocate();
+
+ // Allocate tensors (1st iteration)
+ src.allocator()->allocate();
+ fc.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ // Finalize memory manager
+ mm->set_allocator(&allocator);
+ mm->set_num_pools(1);
+ mm->finalize();
+ ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+
+ // Fill tensors (1st iteration)
+ fill(AccessorType(src), 0);
+ fill(AccessorType(w), 1);
+ fill(AccessorType(b), 2);
+
+ // Compute functions (1st iteration)
+ fc_layer.run();
+ smx_layer.run();
+
+ // Get padding requirements
+ auto fc_padding = fc.allocator()->info().padding();
+
+ // Run rest iterations
+ for(int i = _max_batches; i >= static_cast<int>(_cur_batches); --i)
+ {
+ int diff = _max_batches - i;
+ auto new_fc_padding = PaddingSize(fc_padding.top, fc_padding.right, fc_padding.bottom + diff, fc_padding.left);
+ src.allocator()->info().set_tensor_shape(TensorShape(1U, 1U, 112U, i));
+ fc.allocator()->info().set_tensor_shape(TensorShape(8U, i)).set_is_resizable(true).extend_padding(new_fc_padding);
+ fc.allocator()->info().set_is_resizable(false);
+ dst.allocator()->info().set_tensor_shape(TensorShape(8U, i));
+
+ // Configure functions
+ fc_layer.configure(&src, &w, &b, &fc, true, false, true);
+ smx_layer.configure(&fc, &dst);
+
+ // Fill tensors
+ fill(AccessorType(src), 3);
+
+ // Compute functions
+ fc_layer.run();
+ smx_layer.run();
+ }
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference()
+ {
+ // Create reference
+ SimpleTensor<T> w{ TensorShape(112U, 8U), DataType::F32 };
+ SimpleTensor<T> b{ TensorShape(8U), DataType::F32 };
+ SimpleTensor<T> src{ TensorShape(1U, 1U, 112U, _cur_batches), DataType::F32 };
+
+ // Fill reference
+ fill(src, 3);
+ fill(w, 1);
+ fill(b, 2);
+
+ auto fc = reference::fully_connected_layer(src, w, b, TensorShape(8U, _cur_batches));
+ return reference::softmax_layer(fc, 1.f);
+ }
+
+protected:
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ AllocatorType _allocator{};
+ unsigned int _max_batches{};
+ unsigned int _cur_batches{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER */