From 1a569a30a2f456ff1a3e0a665201e1c3ab92df80 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 10 Sep 2019 17:20:34 +0100 Subject: COMPMID-2161 [NEON] Create IWeightManager class Change-Id: I1a9a46da2f98e896b825099151b56d1d8271dd31 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/1915 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/graph/GraphContext.h | 38 ++++++- arm_compute/graph/IDeviceBackend.h | 8 +- arm_compute/graph/Types.h | 1 + arm_compute/graph/backends/CL/CLDeviceBackend.h | 1 + arm_compute/graph/backends/FunctionHelpers.h | 4 +- arm_compute/graph/backends/GLES/GCDeviceBackend.h | 3 +- arm_compute/graph/backends/NEON/NEDeviceBackend.h | 3 +- arm_compute/graph/backends/Utils.h | 16 ++- arm_compute/graph/frontend/Layers.h | 29 ++++- .../CL/functions/CLConvertFullyConnectedWeights.h | 4 +- .../runtime/CL/functions/CLFullyConnectedLayer.h | 3 +- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 2 +- .../GLES_COMPUTE/functions/GCFullyConnectedLayer.h | 3 +- arm_compute/runtime/ITransformWeights.h | 117 +++++++++++++++++++++ arm_compute/runtime/IWeightsManager.h | 85 +++++++++++++++ .../functions/NEConvertFullyConnectedWeights.h | 48 ++++++++- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 2 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 85 +++++++++++---- arm_compute/runtime/NEON/functions/NEGEMM.h | 6 +- .../NEON/functions/NEGEMMAssemblyDispatch.h | 12 +-- .../NEON/functions/NEGEMMConvolutionLayer.h | 77 ++++++++++++-- arm_compute/runtime/NEON/functions/NERNNLayer.h | 4 +- .../functions/assembly/NEGEMMInterleavedWrapper.h | 5 +- 23 files changed, 495 insertions(+), 61 deletions(-) create mode 100644 arm_compute/runtime/ITransformWeights.h create mode 100644 arm_compute/runtime/IWeightsManager.h (limited to 'arm_compute') diff --git a/arm_compute/graph/GraphContext.h b/arm_compute/graph/GraphContext.h index 21ba6df785..0eb9e81175 100644 --- a/arm_compute/graph/GraphContext.h +++ b/arm_compute/graph/GraphContext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/graph/Types.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" #include #include @@ -45,6 +46,13 @@ struct MemoryManagerContext IAllocator *allocator = { nullptr }; /**< Backend allocator to use */ }; +/** Contains structs required for weights management */ +struct WeightsManagerContext +{ + Target target = { Target::UNSPECIFIED }; /**< Target */ + std::shared_ptr wm = { nullptr }; /**< Weights manager */ +}; + /** Graph context **/ class GraphContext final { @@ -77,7 +85,7 @@ public: * * @param[in] memory_ctx Memory manage context * - * @return If the insertion succeeded else false + * @return True if the insertion succeeded else false */ bool insert_memory_management_ctx(MemoryManagerContext &&memory_ctx); /** Gets a memory manager context for a given target @@ -92,12 +100,34 @@ public: * @return Memory manager contexts */ std::map &memory_managers(); + /** Inserts a weights manager context + * + * @param[in] weights_ctx Weights manager context + * + * @return True if the insertion succeeded else false + */ + bool insert_weights_management_ctx(WeightsManagerContext &&weights_ctx); + + /** Gets a weights manager context for a given target + * + * @param[in] target To retrieve the weights management context + * + * @return Management context for the target if exists else nullptr + */ + WeightsManagerContext *weights_management_ctx(Target target); + + /** Gets the weights managers map + * + * @return Weights manager contexts + */ + std::map &weights_managers(); /** Finalizes memory managers in graph context */ void finalize(); private: - GraphConfig _config; /**< Graph configuration */ - std::map _memory_managers; /**< Memory managers for each target */ + GraphConfig _config; /**< Graph configuration */ + std::map _memory_managers; /**< Memory managers for each target */ + std::map _weights_managers; /**< Weights managers for each target */ }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h index 358d26af81..cf54976c28 100644 --- a/arm_compute/graph/IDeviceBackend.h +++ b/arm_compute/graph/IDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ #include "arm_compute/graph/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" #include @@ -112,6 +113,11 @@ public: * @return Memory manager */ virtual std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) = 0; + /** Create a backend weights manager + * + * @return Weights manager + */ + virtual std::shared_ptr create_weights_manager() = 0; }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index 8b97708a63..63b1c94ac8 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -78,6 +78,7 @@ class TensorDescriptor; struct GraphConfig { bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */ + bool use_function_weights_manager{ true }; /**< Use a weights manager to manage transformed weights */ bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */ bool use_tuner{ false }; /**< Use a tuner in tunable backends */ CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; /**< Tuner mode to be used by the CL tuner */ diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index afe01fff70..8569cf1f34 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -67,6 +67,7 @@ public: std::unique_ptr configure_node(INode &node, GraphContext &ctx) override; Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; + std::shared_ptr create_weights_manager() override; private: int _context_count; /**< Counts how many contexts are currently using the backend */ diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h index dd833061a9..10f8c0c5c7 100644 --- a/arm_compute/graph/backends/FunctionHelpers.h +++ b/arm_compute/graph/backends/FunctionHelpers.h @@ -827,7 +827,9 @@ std::unique_ptr create_fully_connected_layer(FullyConnectedLayerNode ARM_COMPUTE_ERROR_ON(output == nullptr); // Create and configure function - auto func = support::cpp14::make_unique(get_memory_manager(ctx, TargetInfo::TargetType)); + auto wm = get_weights_manager(ctx, TargetInfo::TargetType); + auto mm = get_memory_manager(ctx, TargetInfo::TargetType); + auto func = support::cpp14::make_unique(mm, wm.get()); func->configure(input, weights, biases, output, fc_info); const bool is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h index ca2d3734eb..83a7458c98 100644 --- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h +++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,7 @@ public: std::unique_ptr configure_node(INode &node, GraphContext &ctx) override; Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; + std::shared_ptr create_weights_manager() override; private: bool _initialized; /**< Flag that specifies if the backend has been default initialized */ diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h index abc17d9e83..9891170fbd 100644 --- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h +++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,7 @@ public: std::unique_ptr configure_node(INode &node, GraphContext &ctx) override; Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; + std::shared_ptr create_weights_manager() override; private: Allocator _allocator; /**< NEON backend allocator */ diff --git a/arm_compute/graph/backends/Utils.h b/arm_compute/graph/backends/Utils.h index c7a50d93c6..2ca97ff5c5 100644 --- a/arm_compute/graph/backends/Utils.h +++ b/arm_compute/graph/backends/Utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/graph/GraphContext.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" namespace arm_compute { @@ -90,6 +91,19 @@ inline std::shared_ptr get_memory_manager(GraphContext &ctx, Tar bool enabled = ctx.config().use_function_memory_manager && (ctx.memory_management_ctx(target) != nullptr); return enabled ? ctx.memory_management_ctx(target)->intra_mm : nullptr; } + +/** Returns the weights manager for a given target + * + * @param[in] ctx Graph context containing weight management metadata + * @param[in] target Target to retrieve the weights manager from + * + * @return The weights manager for the given target else false + */ +inline std::shared_ptr get_weights_manager(GraphContext &ctx, Target target) +{ + bool enabled = ctx.config().use_function_weights_manager && (ctx.weights_management_ctx(target) != nullptr); + return enabled ? ctx.weights_management_ctx(target)->wm : nullptr; +} } // namespace backends } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h index 27a0cd3026..120997a8b4 100644 --- a/arm_compute/graph/frontend/Layers.h +++ b/arm_compute/graph/frontend/Layers.h @@ -66,6 +66,31 @@ private: ITensorAccessorUPtr _accessor; }; +/** Constant Layer */ +class ConstantLayer final : public ILayer +{ +public: + /** Construct a constant layer. + * + * @param[in] desc Description of input tensor. + * @param[in] accessor Accessor to get input tensor data from. + */ + ConstantLayer(TensorDescriptor desc, ITensorAccessorUPtr accessor) + : _desc(desc), _accessor(std::move(accessor)) + { + } + + NodeID create_layer(IStream &s) override + { + NodeParams common_params = { name(), s.hints().target_hint }; + return GraphBuilder::add_const_node(s.graph(), common_params, _desc, std::move(_accessor)); + } + +private: + TensorDescriptor _desc; + ITensorAccessorUPtr _accessor; +}; + /** Output Layer */ class OutputLayer final : public ILayer { @@ -635,8 +660,8 @@ public: * @param[in] out_quant_info (Optional) Output quantization info */ FullyConnectedLayer(unsigned int num_outputs, - SubStream &&sub_stream_weights, - SubStream &&sub_stream_bias, + SubStream sub_stream_weights, + SubStream sub_stream_bias, const FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), const QuantizationInfo weights_quant_info = QuantizationInfo(), const QuantizationInfo out_quant_info = QuantizationInfo()) diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h index 9bfade4894..43abb6769b 100644 --- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,8 @@ public: * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. + * + * @return A status */ void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 7cf7d951b6..d54304ed77 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -34,6 +34,7 @@ #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" namespace arm_compute @@ -76,7 +77,7 @@ class CLFullyConnectedLayer : public IFunction { public: /** Constructor */ - CLFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); + CLFullyConnectedLayer(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLFullyConnectedLayer(const CLFullyConnectedLayer &) = delete; /** Default move constructor */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index d29a31a530..0b27c824d9 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -94,7 +94,7 @@ private: class CLGEMMConvolutionLayer : public IFunction { public: - /** Default constructor + /** Constructor * * @param[in] memory_manager (Optional) Memory manager. */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h index 6fcebd63b4..3a13e659f9 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h @@ -30,6 +30,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" namespace arm_compute @@ -64,7 +65,7 @@ class GCFullyConnectedLayer : public IFunction { public: /** Constructor */ - GCFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); + GCFullyConnectedLayer(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ GCFullyConnectedLayer(const GCFullyConnectedLayer &) = delete; /** Default move constructor */ diff --git a/arm_compute/runtime/ITransformWeights.h b/arm_compute/runtime/ITransformWeights.h new file mode 100644 index 0000000000..6376c30088 --- /dev/null +++ b/arm_compute/runtime/ITransformWeights.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITRANSFORMWEIGHTS_H__ +#define __ARM_COMPUTE_ITRANSFORMWEIGHTS_H__ + +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; + +/** Weights tensor transform interface + * In order to identify the different reshape functions, each reshape function has + * to generate a unique id. We use the following conversion using an unsigned 32bit value: + * + * Lower two bits store the target: + * 00 -> NEON + * 01 -> CL + * 10 -> GLES + * 11 -> Unused + * + * Five bits store the id of the reshape function: + * 00000 -> FullyConnectedLayerReshapeWeights + * 00001 -> ConvertFullyConnectedWeights + * 00010 -> ConvolutionLayerReshapeWeights + * 00011 -> DepthwiseConvolutionLayerReshapeWeights + * 00100 -> GEMMReshapeLHSMatrixKernel + * 00101 -> GEMMReshapeRHSMatrixKernel + * + * Rest of the bits are used for identifying special cases such as assembly functions and extra + * arguments in the reshape kernels. + * + * */ +class ITransformWeights +{ +public: + /** Default Constructor */ + ITransformWeights() = default; + /** Default Destructor */ + virtual ~ITransformWeights() = default; + /** Prevent instances of this class to be copy constructed */ + ITransformWeights(const ITransformWeights &) = delete; + /** Prevent instances of this class to be copied */ + ITransformWeights &operator=(const ITransformWeights &) = delete; + /** Allow instances of this class to be move constructed */ + ITransformWeights(ITransformWeights &&) = default; + /** Allow instances of this class to be moved */ + ITransformWeights &operator=(ITransformWeights &&) = default; + + /** Get a pointer to the transformed weights + * + * @return The pointer to the transformed ITensor weights + */ + virtual ITensor *get_weights() = 0; + /** Function that returns a unique id of the reshape function + * + * @return The computed unique id + */ + virtual uint32_t uid() = 0; + /** Run the transformation function */ + virtual void run() = 0; + /** Release transformed weights memory */ + virtual void release() = 0; + /** Increase the object's refcount */ + void increase_refcount() + { + ++_num_refcount; + } + + /** Decrease the object's refcount and return the updated value + * + * @return The updated refcount + * */ + int32_t decrease_refcount() + { + return --_num_refcount; + } + + /** Function that returns a flag on whether the weights are reshaped or not + * + * @return True if the function is reshaped + */ + bool is_reshape_run() + { + return _reshape_run; + } + +protected: + std::atomic _num_refcount{ 0 }; + bool _reshape_run{ false }; +}; + +} // arm_compute + +#endif /*__ARM_COMPUTE_ITRANSFORMWEIGHTS_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/IWeightsManager.h b/arm_compute/runtime/IWeightsManager.h new file mode 100644 index 0000000000..2d61b89bc6 --- /dev/null +++ b/arm_compute/runtime/IWeightsManager.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_IWEIGHTSMANAGER_H__ +#define __ARM_COMPUTE_IWEIGHTSMANAGER_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/runtime/ITransformWeights.h" + +#include + +namespace arm_compute +{ +/** Weights manager interface to handle weights transformations */ +class IWeightsManager +{ +public: + /** Constructor */ + IWeightsManager(); + /** Default Destructor */ + virtual ~IWeightsManager() = default; + /** Prevent instances of this class to be copy constructed */ + IWeightsManager(const IWeightsManager &) = delete; + /** Prevent instances of this class to be copied */ + IWeightsManager &operator=(const IWeightsManager &) = delete; + /** Allow instances of this class to be move constructed */ + IWeightsManager(IWeightsManager &&) = default; + /** Allow instances of this class to be moved */ + IWeightsManager &operator=(IWeightsManager &&) = default; + + /** Start managing a weights tensor + * + * @param[in] weights Pointer to the weights tensor to be managed + * @param[in] parent Parent node in case where the weights are coming from a previous reshape function + */ + void manage(const ITensor *weights, ITransformWeights *parent = nullptr); + /** Run the reshape function. + * + * @param[in] weights Pointer to the weights tensor we want to reshape + * @param[in] weights_transform Weights transformation object + * + * @return The reshaped tensor + */ + ITensor *run(const ITensor *weights, ITransformWeights *weights_transform); + /** Acquire the requested reshape tensor of the selected weights + * + * @param[in] weights Pointer to the weights tensor to be managed + * @param[in] weights_transform Weights transformation object + */ + ITensor *acquire(const ITensor *weights, ITransformWeights *weights_transform); + /** Check if the weights are managed + * + * @param[in] weights Pointer to the weights tensor we want to check if managed + * + * @return True if the weights tensor is managed else false + */ + bool are_weights_managed(const ITensor *weights); + +private: + std::map> _managed_weights; + std::map _managed_weights_parents; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_IWEIGHTSMANAGER_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h index 8f261421e6..50a86bd7c4 100644 --- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,9 @@ #include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/ITransformWeights.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/Tensor.h" namespace arm_compute { @@ -52,6 +54,8 @@ public: * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. + * + * @return A Status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); @@ -61,5 +65,45 @@ public: private: NEConvertFullyConnectedWeightsKernel _kernel; }; -} + +namespace weights_transformations +{ +/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ +class NEConvertFullyConnectedWeightsManaged : public ITransformWeights +{ +public: + void run() override + { + _output.allocator()->allocate(); + _func.run(); + _reshape_run = true; + } + + void release() override + { + _output.allocator()->free(); + } + + ITensor *get_weights() override + { + return &_output; + } + + uint32_t uid() override + { + return _uid; + } + + void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout) + { + _func.configure(input, &_output, original_input_shape, data_layout); + } + +private: + static constexpr uint32_t _uid = 0x4; + Tensor _output{}; + NEConvertFullyConnectedWeights _func{}; +}; +} // namespace weights_transformations +} // namespace arm_compute #endif /* __ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 360bb23f22..6880bbba6b 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -73,7 +73,7 @@ namespace arm_compute class NEDeconvolutionLayer : public IFunction { public: - /** Default constructor */ + /** Constructor */ NEDeconvolutionLayer(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 56ce274572..b80e0e49e0 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -63,6 +63,46 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; +namespace weights_transformations +{ +/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */ +class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights +{ +public: + void run() override + { + _output.allocator()->allocate(); + _func.run(); + _reshape_run = true; + } + + void release() override + { + _output.allocator()->free(); + } + + ITensor *get_weights() override + { + return &_output; + } + + uint32_t uid() override + { + return _uid; + } + + void configure(const ITensor *input) + { + _func.configure(input, &_output); + } + +private: + static constexpr uint32_t _uid = 0x0; + Tensor _output{}; + NEFullyConnectedLayerReshapeWeights _func{}; +}; +} // namespace weights_transformations + /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) @@ -75,7 +115,7 @@ class NEFullyConnectedLayer : public IFunction { public: /** Constructor */ - NEFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); + NEFullyConnectedLayer(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete; /** Default move constructor */ @@ -128,25 +168,28 @@ private: void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output); void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); - MemoryGroup _memory_group; - NEFlattenLayerKernel _flatten_kernel; - NEConvertFullyConnectedWeights _convert_weights; - NEFullyConnectedLayerReshapeWeights _reshape_weights_function; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - Tensor _flatten_output; - Tensor _gemmlowp_output; - Tensor _converted_weights_output; - Tensor _reshape_weights_output; - const ITensor *_original_weights; - bool _are_weights_converted; - bool _are_weights_reshaped; - bool _is_fc_after_conv; - bool _accumulate_biases; - bool _is_quantized; - bool _is_prepared; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + NEFlattenLayerKernel _flatten_kernel; + NEConvertFullyConnectedWeights _convert_weights; + weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed; + NEFullyConnectedLayerReshapeWeights _reshape_weights_function; + weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function; + NEGEMM _mm_gemm; + NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; + NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + Tensor _flatten_output; + Tensor _gemmlowp_output; + Tensor _converted_weights_output; + Tensor _reshape_weights_output; + const ITensor *_original_weights; + bool _are_weights_converted; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _accumulate_biases; + bool _is_quantized; + bool _is_prepared; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 7f9e3181bc..d947be1ef9 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" @@ -51,7 +52,7 @@ class NEGEMM : public IFunction { public: /** Constructor */ - NEGEMM(std::shared_ptr memory_manager = nullptr); + NEGEMM(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMM(const NEGEMM &) = delete; /** Default move constructor */ @@ -96,6 +97,7 @@ public: private: MemoryGroup _memory_group; + IWeightsManager *_weights_manager; NEGEMMInterleave4x4Kernel _interleave_kernel; NEGEMMTranspose1xWKernel _transpose_kernel; NEGEMMMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h index ec4f700034..83e495e695 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -27,6 +27,7 @@ #include "arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" @@ -38,9 +39,8 @@ namespace arm_compute class NEGEMMAssemblyDispatch : public IFunction { public: - /** Default constructor */ - NEGEMMAssemblyDispatch(std::shared_ptr memory_manager = nullptr); - + /** Constructor */ + NEGEMMAssemblyDispatch(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copy constructed */ NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; /** Prevent instances of this class from being copied */ @@ -79,8 +79,9 @@ private: /** Interface for the arm_gemm fallback */ std::unique_ptr _arm_gemm; - MemoryGroup _memory_group; /**< Function memory group */ - std::shared_ptr _memory_manager; /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */ + MemoryGroup _memory_group; /**< Function memory group */ + std::shared_ptr _memory_manager; /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */ + IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ public: /** If supported create an ACL function else fallback to the arm_gemm function. * @@ -117,6 +118,5 @@ public: void prepare() override; void run() override; }; - } // namespace arm_compute #endif /* __ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index ace924f146..dccc35f0af 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -32,6 +32,7 @@ #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -54,6 +55,14 @@ class NEConvolutionLayerReshapeWeights : public IFunction public: /** Constructor */ NEConvolutionLayerReshapeWeights(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete; + /** Default move constructor */ + NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete; + /** Default move assignment operator */ + NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default; /** Set the input and output tensors. * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32. @@ -78,6 +87,52 @@ private: NEWeightsReshapeKernel _weights_reshape_kernel; }; +namespace weights_transformations +{ +/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */ +class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights +{ +public: + void configure(const ITensor *input, const ITensor *biases) + { + _bias_bit = (biases != nullptr) ? 1 : 0; + _func.configure(input, biases, &_output); + } + + void run() override + { + _output.allocator()->allocate(); + _func.run(); + _reshape_run = true; + } + + ITensor *get_weights() override + { + return &_output; + } + + void release() override + { + _output.allocator()->free(); + } + + uint32_t uid() override + { + return ((0x8) | (_bias_bit << 7)); + } + + bool is_reshape_run() + { + return _reshape_run; + } + +private: + Tensor _output{}; + NEConvolutionLayerReshapeWeights _func{}; + int32_t _bias_bit{ 0 }; +}; +} // namespace weights_transformations + /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: * * -# @ref NEIm2ColKernel @@ -92,7 +147,7 @@ class NEGEMMConvolutionLayer : public IFunction { public: /** Constructor */ - NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager = nullptr); + NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete; /** Default move constructor */ @@ -187,15 +242,17 @@ private: static Status validate_gemm3d(const ITensorInfo *input_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col); private: - MemoryGroup _memory_group; - NEConvolutionLayerReshapeWeights _reshape_weights; - NEIm2ColKernel _im2col_kernel; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NECol2ImKernel _col2im_kernel; - NEActivationLayer _activationlayer_function; - NEArithmeticAdditionKernel _add_bias_kernel; - NEReshapeLayer _reshape_layer; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + NEConvolutionLayerReshapeWeights _reshape_weights; + weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; + NEIm2ColKernel _im2col_kernel; + NEGEMM _mm_gemm; + NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + NECol2ImKernel _col2im_kernel; + NEActivationLayer _activationlayer_function; + NEArithmeticAdditionKernel _add_bias_kernel; + NEReshapeLayer _reshape_layer; const ITensor *_original_weights; diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index ec394392de..978c445927 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -86,7 +86,7 @@ private: NEGEMM _gemm_state_f; NEArithmeticAdditionKernel _add_kernel; NEActivationLayerKernel _activation_kernel; - NEFullyConnectedLayer _fully_connected_kernel; + NEFullyConnectedLayer _fully_connected; NECopyKernel _copy_kernel; Tensor _fully_connected_out; Tensor _gemm_output; diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h index ad89e1fbec..d3dda9a95f 100644 --- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h +++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h @@ -32,6 +32,7 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IScheduler.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" @@ -94,8 +95,8 @@ public: class NEGEMMInterleavedWrapper : public IFunction { public: - NEGEMMInterleavedWrapper(std::shared_ptr memory_manager = nullptr); - ~NEGEMMInterleavedWrapper() = default; + NEGEMMInterleavedWrapper(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + ~NEGEMMInterleavedWrapper() = default; NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete; NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete; -- cgit v1.2.1