From ceff0f9a991b693f568c25b1e0933582301082e7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 19 Mar 2018 19:57:01 +0000 Subject: COMPMID-1016: Optimize kernel reconfiguration Optimizes kernel reconfiguration when memory manager is used. Note that this works only if every sub-sequent reconfigurations leads to sizes less than the first one. Change-Id: I08898e99929c3756147a02979b726c2380b6e11d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125114 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- .../runtime/GLES_COMPUTE/GCBufferAllocator.h | 4 +- arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h | 2 +- .../GLES_COMPUTE/functions/GCFullyConnectedLayer.h | 17 +- arm_compute/runtime/MemoryGroupBase.h | 8 +- src/runtime/CL/CLTensorAllocator.cpp | 9 +- src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp | 5 +- .../functions/GCFullyConnectedLayer.cpp | 10 +- src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp | 4 + .../functions/GCNormalizationLayer.cpp | 2 + tests/validation/CL/UNIT/MemoryManager.cpp | 70 ++++ .../validation/GLES_COMPUTE/UNIT/MemoryManager.cpp | 99 +++++ .../fixtures/UNIT/MemoryManagerFixture.h | 411 +++++++++++++++++++++ 12 files changed, 618 insertions(+), 23 deletions(-) create mode 100644 tests/validation/CL/UNIT/MemoryManager.cpp create mode 100644 tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp create mode 100644 tests/validation/fixtures/UNIT/MemoryManagerFixture.h diff --git a/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h index 103ac37bdd..8fa13e59a6 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h @@ -33,7 +33,7 @@ namespace arm_compute { -/** Default gles buffer allocator implementation */ +/** Default GLES buffer allocator implementation */ class GCBufferAllocator : public IAllocator { public: @@ -44,5 +44,5 @@ public: void *allocate(size_t size, size_t alignment) override; void free(void *ptr) override; }; -} // arm_compute +} // namespace arm_compute #endif /*__ARM_COMPUTE_GCBUFFERALLOCATOR_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h b/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h index 485aa0e024..10f4fc6b05 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h @@ -44,5 +44,5 @@ inline void MemoryGroupBase::associate_memory_group(GCTensor *obj) ARM_COMPUTE_ERROR_ON(allocator == nullptr); allocator->set_associated_memory_group(this); } -} // arm_compute +} // namespace arm_compute #endif /*__ARM_COMPUTE_GCMEMORYGROUP_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h index 81be1de21a..1f8dc3e1a0 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h @@ -67,14 +67,17 @@ public: GCFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input - * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. - * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input + * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + * @param[in] retain_internal_weights (Optional) Retain internal reshaped weights. Defaults to false. + * Used for reconfiguration purposes. */ - void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, + bool transpose_weights = true, bool are_weights_reshaped = false, bool retain_internal_weights = false); //Inherited methods override void run() override; diff --git a/arm_compute/runtime/MemoryGroupBase.h b/arm_compute/runtime/MemoryGroupBase.h index dc640f10a6..06e4321410 100644 --- a/arm_compute/runtime/MemoryGroupBase.h +++ b/arm_compute/runtime/MemoryGroupBase.h @@ -96,7 +96,7 @@ inline MemoryGroupBase::MemoryGroupBase(std::shared_ptr inline void MemoryGroupBase::manage(TensorType *obj) { - if(_memory_manager) + if(_memory_manager && _mappings.empty()) { ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager()); @@ -114,7 +114,11 @@ inline void MemoryGroupBase::manage(TensorType *obj) template inline void MemoryGroupBase::finalize_memory(TensorType *obj, void **handle, size_t size) { - if(_memory_manager) + // TODO (geopin01) : Check size (track size in MemoryMappings) + // Check if existing mapping is valid + ARM_COMPUTE_ERROR_ON(!_mappings.empty() && (_mappings.find(handle) == std::end(_mappings))); + + if(_memory_manager && _mappings.empty()) { ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager()); _memory_manager->lifetime_manager()->end_lifetime(obj, handle, size); diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 705c4edd60..c5524b1ccb 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -24,7 +24,6 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/Log.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -58,11 +57,7 @@ void *SVMMemory::allocate(cl_context context, size_t size, cl_svm_mem_flags flag ARM_COMPUTE_ERROR_ON(_ptr != nullptr); ARM_COMPUTE_ERROR_ON(size > CL_DEVICE_MAX_MEM_ALLOC_SIZE); _ptr = clSVMAlloc(context, flags, size, alignment); - if(_ptr == nullptr) - { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Call to clSVMAlloc() failed."); - } - else + if(_ptr != nullptr) { _size = size; _fine_grain = static_cast(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER); @@ -76,9 +71,9 @@ void *CLTensorAllocator::svm_ptr() void CLTensorAllocator::allocate() { - ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr); if(_associated_memory_group == nullptr) { + ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr); if(_svm_memory.allocate(CLScheduler::get().context()(), CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, info().total_size(), 0) == nullptr) { // try at coarse grain svm memory diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp index 50e3cc7c1c..d8f6867634 100644 --- a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp +++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp @@ -29,8 +29,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void *GCBufferAllocator::allocate(size_t size, size_t alignment) { ARM_COMPUTE_UNUSED(alignment); @@ -48,3 +48,4 @@ void GCBufferAllocator::free(void *ptr) auto *gl_buffer = reinterpret_cast(ptr); delete gl_buffer; } +} // namespace arm_compute diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp index 0f8f8e6c94..a300033bb2 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -79,7 +79,8 @@ void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTen _mm_kernel.configure(input, weights, output, 1.0f, false); } -void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped) +void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, + bool transpose_weights, bool are_weights_reshaped, bool retain_internal_weights) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); @@ -141,11 +142,14 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w } // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called - if(!_are_weights_reshaped) + if(!_are_weights_reshaped && !retain_internal_weights) { // Allocate the tensor for the weights reshaped _reshape_weights_output.allocator()->allocate(); } + + ARM_COMPUTE_ERROR_ON(retain_internal_weights && _reshape_weights_output.gc_buffer() == 0); + _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights; } void GCFullyConnectedLayer::run() @@ -158,6 +162,7 @@ void GCFullyConnectedLayer::run() } _memory_group.acquire(); + // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { @@ -179,5 +184,6 @@ void GCFullyConnectedLayer::run() GCScheduler::get().dispatch(_accumulate_biases_kernel); } + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp index 46424a59f5..9c8568a329 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -92,6 +92,10 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position()); _tmp_b.allocator()->init(info_b); + if(!gemm_info.reshape_b_only_on_first_run()) + { + _memory_group.manage(&_tmp_b); + } // Configure interleave kernel _interleave_kernel.configure(a, &_tmp_a); diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp index 13213d2b54..b2e69ee8c6 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp @@ -57,10 +57,12 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, void GCNormalizationLayer::run() { _memory_group.acquire(); + GCScheduler::get().dispatch(_multiply_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_border_handler, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_norm_kernel, true); + _memory_group.release(); } diff --git a/tests/validation/CL/UNIT/MemoryManager.cpp b/tests/validation/CL/UNIT/MemoryManager.cpp new file mode 100644 index 0000000000..2129c03243 --- /dev/null +++ b/tests/validation/CL/UNIT/MemoryManager.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLBufferAllocator.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "support/ToolchainSupport.h" +#include "tests/AssetsLibrary.h" +#include "tests/CL/CLAccessor.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/UNIT/MemoryManagerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_f32(0.05f); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(MemoryManager) + +using CLBlobMemoryManagerSimpleWithinFunctionLevelFixture = BlobMemoryManagerSimpleTestCaseFixture; +FIXTURE_TEST_CASE(BlobMemoryManagerSimpleWithinFunctionLevel, + CLBlobMemoryManagerSimpleWithinFunctionLevelFixture, + framework::DatasetMode::ALL) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp b/tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp new file mode 100644 index 0000000000..8f59a05b87 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/UNIT/MemoryManager.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" +#include "support/ToolchainSupport.h" +#include "tests/AssetsLibrary.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/UNIT/MemoryManagerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_f32(0.05f); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(UNIT) +TEST_SUITE(MemoryManager) + +// Setting BlobMemoryManagerSimpleWithinFunctionLevel test +using GCBlobMemoryManagerSimpleWithinFunctionLevelFixture = BlobMemoryManagerSimpleTestCaseFixture; +FIXTURE_TEST_CASE(BlobMemoryManagerSimpleWithinFunctionLevel, + GCBlobMemoryManagerSimpleWithinFunctionLevelFixture, + framework::DatasetMode::ALL) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} + +// Setting BlobMemoryManagerReconfigure test +using GCBlobMemoryManagerReconfigureFixture = BlobMemoryManagerReconfigureTestCaseFixture; +FIXTURE_TEST_CASE(BlobMemoryManagerReconfigure, + GCBlobMemoryManagerReconfigureFixture, + framework::DatasetMode::ALL) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} + +// Setting BlobMemoryManagerReconfigure2 test +using GCBlobMemoryManagerReconfigure2Fixture = BlobMemoryManagerReconfigure2TestCaseFixture; +FIXTURE_TEST_CASE(BlobMemoryManagerReconfigure2, + GCBlobMemoryManagerReconfigure2Fixture, + framework::DatasetMode::ALL) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/UNIT/MemoryManagerFixture.h b/tests/validation/fixtures/UNIT/MemoryManagerFixture.h new file mode 100644 index 0000000000..21ad42bf77 --- /dev/null +++ b/tests/validation/fixtures/UNIT/MemoryManagerFixture.h @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER +#define ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/FullyConnectedLayer.h" +#include "tests/validation/reference/SoftmaxLayer.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** Simple test case to run two fully connected layers using a blob affinity memory manager + * + * Runs two fully connected layers back to back + */ +template +class BlobMemoryManagerSimpleTestCaseFixture : public framework::Fixture +{ + using T = float; + +public: + void setup() + { + _target = compute_target(); + _reference = compute_reference(); + }; + +protected: + template + void fill(U &&tensor, int i) + { + std::uniform_real_distribution<> distribution(0.5f, 1.f); + library->fill(tensor, distribution, i); + } + + TensorType compute_target() + { + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + auto mm = std::make_shared(lifetime_mgr, pool_mgr); + + // Create tensors + TensorType w1 = create_tensor(TensorShape(128U, 128U), DataType::F32, 1); + TensorType b1 = create_tensor(TensorShape(128U), DataType::F32, 1); + TensorType w2 = create_tensor(TensorShape(128U, 24U), DataType::F32, 1); + TensorType b2 = create_tensor(TensorShape(24U), DataType::F32, 1); + TensorType src = create_tensor(TensorShape(128U), DataType::F32, 1); + TensorType fc1 = create_tensor(TensorShape(128U), DataType::F32, 1); + TensorType dst = create_tensor(TensorShape(24U), DataType::F32, 1); + + // Create and configure function + FullyConnectedFunction fc_layer_1(mm); + FullyConnectedFunction fc_layer_2(mm); + fc_layer_1.configure(&src, &w1, &b1, &fc1); + fc_layer_2.configure(&fc1, &w2, &b2, &dst); + + // Allocate tensors + w1.allocator()->allocate(); + b1.allocator()->allocate(); + w2.allocator()->allocate(); + b2.allocator()->allocate(); + src.allocator()->allocate(); + fc1.allocator()->allocate(); + dst.allocator()->allocate(); + + // Finalize memory manager + mm->set_allocator(&_allocator); + mm->set_num_pools(1); + mm->finalize(); + ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0); + fill(AccessorType(w1), 1); + fill(AccessorType(b1), 2); + fill(AccessorType(w2), 3); + fill(AccessorType(b2), 4); + + // Compute functions + fc_layer_1.run(); + fc_layer_2.run(); + + return dst; + } + + SimpleTensor compute_reference() + { + // Create reference + SimpleTensor w1{ TensorShape(128U, 128U), DataType::F32 }; + SimpleTensor b1{ TensorShape(128U), DataType::F32 }; + SimpleTensor w2{ TensorShape(128U, 24U), DataType::F32 }; + SimpleTensor b2{ TensorShape(24U), DataType::F32 }; + SimpleTensor src{ TensorShape(128U), DataType::F32 }; + + // Fill reference + fill(src, 0); + fill(w1, 1); + fill(b1, 2); + fill(w2, 3); + fill(b2, 4); + + auto fc1 = reference::fully_connected_layer(src, w1, b1, TensorShape(128U)); + return reference::fully_connected_layer(fc1, w2, b2, TensorShape(24U)); + } + +protected: + TensorType _target{}; + SimpleTensor _reference{}; + AllocatorType _allocator{}; +}; + +/** Test case to run two fully connected layers using a blob affinity memory manager, + * reconfigure with different shapes and rerun + * + * Runs two fully connected layers back to back then reconfigures with different batch size and reruns + * Shapes of the reconfigure step are smaller that the initial configured step + */ +template +class BlobMemoryManagerReconfigureTestCaseFixture : public framework::Fixture +{ + using T = float; + +public: + void setup() + { + _max_batches = 8; + _cur_batches = 6; + _target = compute_target(); + _reference = compute_reference(); + }; + +protected: + template + void fill(U &&tensor, int i) + { + std::uniform_real_distribution<> distribution(0.5f, 1.f); + library->fill(tensor, distribution, i); + } + + TensorType compute_target() + { + AllocatorType allocator{}; + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + auto mm = std::make_shared(lifetime_mgr, pool_mgr); + + // Create tensors + TensorType w1 = create_tensor(TensorShape(128U, 128U), DataType::F32, 1); + TensorType b1 = create_tensor(TensorShape(128U), DataType::F32, 1); + TensorType w2 = create_tensor(TensorShape(128U, 24U), DataType::F32, 1); + TensorType b2 = create_tensor(TensorShape(24U), DataType::F32, 1); + TensorType src = create_tensor(TensorShape(128U, _max_batches), DataType::F32, 1); + TensorType fc1 = create_tensor(TensorShape(128U, _max_batches), DataType::F32, 1); + TensorType dst = create_tensor(TensorShape(24U, _max_batches), DataType::F32, 1); + + // Create and configure function + FullyConnectedFunction fc_layer_1(mm); + FullyConnectedFunction fc_layer_2(mm); + fc_layer_1.configure(&src, &w1, &b1, &fc1); + fc_layer_2.configure(&fc1, &w2, &b2, &dst); + + // Allocate persistent tensors + w1.allocator()->allocate(); + b1.allocator()->allocate(); + w2.allocator()->allocate(); + b2.allocator()->allocate(); + + // Allocate tensors (1st iteration) + src.allocator()->allocate(); + fc1.allocator()->allocate(); + dst.allocator()->allocate(); + + // Finalize memory manager + mm->set_allocator(&allocator); + mm->set_num_pools(1); + mm->finalize(); + ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); + + // Fill tensors (1st iteration) + fill(AccessorType(src), 0); + fill(AccessorType(w1), 1); + fill(AccessorType(b1), 2); + fill(AccessorType(w2), 3); + fill(AccessorType(b2), 4); + + // Compute functions (1st iteration) + fc_layer_1.run(); + fc_layer_2.run(); + + // Update tensor shapes (2nd iteration) + auto src_padding = src.allocator()->info().padding(); + auto fc1_padding = fc1.allocator()->info().padding(); + auto dst_padding = dst.allocator()->info().padding(); + int diff = _max_batches - _cur_batches; + auto new_src_padding = PaddingSize(src_padding.top, src_padding.right, src_padding.bottom + diff, src_padding.left); + auto new_fc1_padding = PaddingSize(fc1_padding.top, fc1_padding.right, fc1_padding.bottom + diff, fc1_padding.left); + auto new_dst_padding = PaddingSize(dst_padding.top, dst_padding.right, dst_padding.bottom + diff, dst_padding.left); + src.allocator()->info().set_tensor_shape(TensorShape(128U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding); + src.allocator()->info().set_is_resizable(false); + fc1.allocator()->info().set_tensor_shape(TensorShape(128U, _cur_batches)).set_is_resizable(true).extend_padding(new_fc1_padding); + fc1.allocator()->info().set_is_resizable(false); + dst.allocator()->info().set_tensor_shape(TensorShape(24U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding); + dst.allocator()->info().set_is_resizable(false); + + // Configure functions (2nd iteration) + fc_layer_1.configure(&src, &w1, &b1, &fc1, true, false, true); + fc_layer_2.configure(&fc1, &w2, &b2, &dst, true, false, true); + + // Fill tensors (2nd iteration) + fill(AccessorType(src), 5); + + // Compute functions (2nd iteration) + fc_layer_1.run(); + fc_layer_2.run(); + + return dst; + } + + SimpleTensor compute_reference() + { + // Create reference + SimpleTensor w1{ TensorShape(128U, 128U), DataType::F32 }; + SimpleTensor b1{ TensorShape(128U), DataType::F32 }; + SimpleTensor w2{ TensorShape(128U, 24U), DataType::F32 }; + SimpleTensor b2{ TensorShape(24U), DataType::F32 }; + SimpleTensor src{ TensorShape(128U, _cur_batches), DataType::F32 }; + + // Fill reference + fill(src, 5); + fill(w1, 1); + fill(b1, 2); + fill(w2, 3); + fill(b2, 4); + + auto fc1 = reference::fully_connected_layer(src, w1, b1, TensorShape(128U, _cur_batches)); + return reference::fully_connected_layer(fc1, w2, b2, TensorShape(24U, _cur_batches)); + } + +protected: + TensorType _target{}; + SimpleTensor _reference{}; + AllocatorType _allocator{}; + unsigned int _max_batches{}; + unsigned int _cur_batches{}; +}; + +/** Test case to run a fully connected layer followed by a softmax layer using a blob affinity memory manager, + * reconfigure with different shapes and rerun + * + * Runs a fully connected convolution layer followed by a softmax layer then reconfigures with different batch size and reruns + * Shapes of the reconfigure step are smaller that the initial configured step + */ +template +class BlobMemoryManagerReconfigure2TestCaseFixture : public framework::Fixture +{ + using T = float; + +public: + void setup() + { + _max_batches = 30; + _cur_batches = 3; + _target = compute_target(); + _reference = compute_reference(); + }; + +protected: + template + void fill(U &&tensor, int i) + { + std::uniform_real_distribution<> distribution(0.5f, 1.f); + library->fill(tensor, distribution, i); + } + + TensorType compute_target() + { + AllocatorType allocator{}; + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + auto mm = std::make_shared(lifetime_mgr, pool_mgr); + + // Create tensors + TensorType w = create_tensor(TensorShape(112U, 8U), DataType::F32, 1); + TensorType b = create_tensor(TensorShape(8U), DataType::F32, 1); + TensorType src = create_tensor(TensorShape(1U, 1U, 112U, _max_batches), DataType::F32, 1); + TensorType fc = create_tensor(TensorShape(8U, _max_batches), DataType::F32, 1); + TensorType dst = create_tensor(TensorShape(8U, _max_batches), DataType::F32, 1); + + // Create and configure function + FullyConnectedFunction fc_layer(mm); + SoftmaxFunction smx_layer(mm); + fc_layer.configure(&src, &w, &b, &fc); + smx_layer.configure(&fc, &dst); + + // Allocate persistent tensors + w.allocator()->allocate(); + b.allocator()->allocate(); + + // Allocate tensors (1st iteration) + src.allocator()->allocate(); + fc.allocator()->allocate(); + dst.allocator()->allocate(); + + // Finalize memory manager + mm->set_allocator(&allocator); + mm->set_num_pools(1); + mm->finalize(); + ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); + + // Fill tensors (1st iteration) + fill(AccessorType(src), 0); + fill(AccessorType(w), 1); + fill(AccessorType(b), 2); + + // Compute functions (1st iteration) + fc_layer.run(); + smx_layer.run(); + + // Get padding requirements + auto fc_padding = fc.allocator()->info().padding(); + + // Run rest iterations + for(int i = _max_batches; i >= static_cast(_cur_batches); --i) + { + int diff = _max_batches - i; + auto new_fc_padding = PaddingSize(fc_padding.top, fc_padding.right, fc_padding.bottom + diff, fc_padding.left); + src.allocator()->info().set_tensor_shape(TensorShape(1U, 1U, 112U, i)); + fc.allocator()->info().set_tensor_shape(TensorShape(8U, i)).set_is_resizable(true).extend_padding(new_fc_padding); + fc.allocator()->info().set_is_resizable(false); + dst.allocator()->info().set_tensor_shape(TensorShape(8U, i)); + + // Configure functions + fc_layer.configure(&src, &w, &b, &fc, true, false, true); + smx_layer.configure(&fc, &dst); + + // Fill tensors + fill(AccessorType(src), 3); + + // Compute functions + fc_layer.run(); + smx_layer.run(); + } + + return dst; + } + + SimpleTensor compute_reference() + { + // Create reference + SimpleTensor w{ TensorShape(112U, 8U), DataType::F32 }; + SimpleTensor b{ TensorShape(8U), DataType::F32 }; + SimpleTensor src{ TensorShape(1U, 1U, 112U, _cur_batches), DataType::F32 }; + + // Fill reference + fill(src, 3); + fill(w, 1); + fill(b, 2); + + auto fc = reference::fully_connected_layer(src, w, b, TensorShape(8U, _cur_batches)); + return reference::softmax_layer(fc, 1.f); + } + +protected: + TensorType _target{}; + SimpleTensor _reference{}; + AllocatorType _allocator{}; + unsigned int _max_batches{}; + unsigned int _cur_batches{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER */ -- cgit v1.2.1