From 1562be3e8a449360a90af75f6f1481a30d41be75 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 8 Mar 2018 19:09:19 +0000 Subject: COMPMID-998: Release unused trainable parameters. Change-Id: I361a520f34080016a25bc86e1e6789777c5152c1 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124432 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/core/ITensor.h | 10 ++++++++++ arm_compute/graph2/ITensorHandle.h | 21 +++++++++++++++------ arm_compute/graph2/backends/CL/CLSubTensorHandle.h | 1 + arm_compute/graph2/backends/CL/CLTensorHandle.h | 1 + arm_compute/graph2/backends/GLES/GCTensorHandle.h | 1 + .../graph2/backends/NEON/NESubTensorHandle.h | 1 + arm_compute/graph2/backends/NEON/NETensorHandle.h | 1 + arm_compute/graph2/detail/ExecutionHelpers.h | 5 +++++ .../CL/functions/CLDepthwiseConvolutionLayer.h | 10 ++++++++++ .../runtime/CL/functions/CLFullyConnectedLayer.h | 11 ++++++++++- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 10 ++++++++++ .../runtime/CL/functions/CLLocallyConnectedLayer.h | 9 +++++++++ .../NEON/functions/NEDepthwiseConvolutionLayer.h | 10 ++++++++++ .../runtime/NEON/functions/NEFullyConnectedLayer.h | 11 ++++++++++- .../runtime/NEON/functions/NEGEMMConvolutionLayer.h | 11 ++++++++++- .../NEON/functions/NELocallyConnectedLayer.h | 9 +++++++++ 16 files changed, 113 insertions(+), 9 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h index 1ef9c6d3f6..255a236861 100644 --- a/arm_compute/core/ITensor.h +++ b/arm_compute/core/ITensor.h @@ -83,6 +83,16 @@ public: * @param io_fmt Format information */ void print(std::ostream &s, IOFormatInfo io_fmt = IOFormatInfo()) const; + /** Flags if the tensor is used or not + * + * @return True if it is used else false + */ + bool is_used() const; + /** Marks a tensor as unused */ + void mark_as_unused() const; + +private: + mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */ }; using IImage = ITensor; diff --git a/arm_compute/graph2/ITensorHandle.h b/arm_compute/graph2/ITensorHandle.h index 68f79d8b01..c92a213121 100644 --- a/arm_compute/graph2/ITensorHandle.h +++ b/arm_compute/graph2/ITensorHandle.h @@ -30,25 +30,34 @@ namespace arm_compute { namespace graph2 { -/** Tensor handle interface object **/ +/** Tensor handle interface object */ class ITensorHandle { public: - /** Default virtual destructor **/ + /** Default virtual destructor */ virtual ~ITensorHandle() = default; - /** Allocates backend memory for the handle **/ + /** Allocates backend memory for the handle */ virtual void allocate() = 0; - /** Backend tensor object accessor **/ + /** Backend tensor object accessor */ virtual arm_compute::ITensor &tensor() = 0; - /** Backend tensor object const accessor **/ + /** Backend tensor object const accessor */ virtual const arm_compute::ITensor &tensor() const = 0; /** Maps backend tensor object * * @param[in] blocking Flags if the mapping operations should be blocking */ virtual void map(bool blocking) = 0; - /** Un-maps a backend tensor object **/ + /** Un-maps a backend tensor object */ virtual void unmap() = 0; + /** Releases backend tensor if is marked as unused + * + * + * @note This has no effect on sub-tensors + * @warning Parent tensors don't keep track of sub-tensors, + * thus if a parent is set as unused then all sub-tensors will be invalidated, + * on the other hand if a sub-tensor is marked as unused then the parent tensor won't be released + */ + virtual void release_if_unused() = 0; /** Checks if a backing tensor is a sub-tensor object or not * * @return True if the backend tensor is a sub-tensor else false diff --git a/arm_compute/graph2/backends/CL/CLSubTensorHandle.h b/arm_compute/graph2/backends/CL/CLSubTensorHandle.h index 9910980e59..6f3c00c041 100644 --- a/arm_compute/graph2/backends/CL/CLSubTensorHandle.h +++ b/arm_compute/graph2/backends/CL/CLSubTensorHandle.h @@ -59,6 +59,7 @@ public: const arm_compute::ITensor &tensor() const override; void map(bool blocking) override; void unmap() override; + void release_if_unused() override; bool is_subtensor() const override; private: diff --git a/arm_compute/graph2/backends/CL/CLTensorHandle.h b/arm_compute/graph2/backends/CL/CLTensorHandle.h index 37d7147b6b..0b20d1d8fc 100644 --- a/arm_compute/graph2/backends/CL/CLTensorHandle.h +++ b/arm_compute/graph2/backends/CL/CLTensorHandle.h @@ -56,6 +56,7 @@ public: const arm_compute::ITensor &tensor() const override; void map(bool blocking) override; void unmap() override; + void release_if_unused() override; bool is_subtensor() const override; private: diff --git a/arm_compute/graph2/backends/GLES/GCTensorHandle.h b/arm_compute/graph2/backends/GLES/GCTensorHandle.h index 8ead236614..281adee428 100644 --- a/arm_compute/graph2/backends/GLES/GCTensorHandle.h +++ b/arm_compute/graph2/backends/GLES/GCTensorHandle.h @@ -56,6 +56,7 @@ public: const arm_compute::ITensor &tensor() const override; void map(bool blocking) override; void unmap() override; + void release_if_unused() override; bool is_subtensor() const override; private: diff --git a/arm_compute/graph2/backends/NEON/NESubTensorHandle.h b/arm_compute/graph2/backends/NEON/NESubTensorHandle.h index eacdfe0fb4..d62b66f343 100644 --- a/arm_compute/graph2/backends/NEON/NESubTensorHandle.h +++ b/arm_compute/graph2/backends/NEON/NESubTensorHandle.h @@ -59,6 +59,7 @@ public: const arm_compute::ITensor &tensor() const override; void map(bool blocking) override; void unmap() override; + void release_if_unused() override; bool is_subtensor() const override; private: diff --git a/arm_compute/graph2/backends/NEON/NETensorHandle.h b/arm_compute/graph2/backends/NEON/NETensorHandle.h index c22fcdf216..23fd7ccc69 100644 --- a/arm_compute/graph2/backends/NEON/NETensorHandle.h +++ b/arm_compute/graph2/backends/NEON/NETensorHandle.h @@ -56,6 +56,7 @@ public: const arm_compute::ITensor &tensor() const override; void map(bool blocking) override; void unmap() override; + void release_if_unused() override; bool is_subtensor() const override; private: diff --git a/arm_compute/graph2/detail/ExecutionHelpers.h b/arm_compute/graph2/detail/ExecutionHelpers.h index e4523ecf47..bc3cfd5d72 100644 --- a/arm_compute/graph2/detail/ExecutionHelpers.h +++ b/arm_compute/graph2/detail/ExecutionHelpers.h @@ -63,6 +63,11 @@ void validate_all_nodes(Graph &g); * @return The execution workload */ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx); +/** Release the memory of all unused const nodes + * + * @param[in] g Graph to release the memory from + */ +void release_unused_tensors(Graph &g); /** Calls accessor of a given tensor * * @param[in] tensor The tensor of which the accessor should be called diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index eb12fe4cca..d6fc8f0fcc 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -83,6 +83,14 @@ class CLDepthwiseConvolutionLayer : public IFunction public: /** Default constructor */ CLDepthwiseConvolutionLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete; + /** Default move constructor */ + CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete; + /** Default move assignment operator */ + CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default; /** Initialize the function's source, destination, weights and convolution information. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). @@ -109,7 +117,9 @@ private: CLTensor _weights_reshaped; CLTensor _v2mm_output; CLTensor _output_reshaped; + bool _is_first_run; bool _is_quantized; + const ICLTensor *_original_weights; }; } #endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 1e9ee492ad..ad821318e9 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -76,6 +76,14 @@ class CLFullyConnectedLayer : public IFunction public: /** Constructor */ CLFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFullyConnectedLayer(const CLFullyConnectedLayer &) = delete; + /** Default move constructor */ + CLFullyConnectedLayer(CLFullyConnectedLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFullyConnectedLayer &operator=(const CLFullyConnectedLayer &) = delete; + /** Default move assignment operator */ + CLFullyConnectedLayer &operator=(CLFullyConnectedLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: QS8/QASYMM8/QS16/F16/F32. @@ -121,6 +129,7 @@ private: bool _is_fc_after_conv; bool _accumulate_biases; bool _is_quantized; + const ICLTensor *_original_weights; }; } #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index dc1211a51a..91d35ca0b2 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -103,6 +103,14 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLGEMMConvolutionLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMConvolutionLayer(const CLGEMMConvolutionLayer &) = delete; + /** Default move constructor */ + CLGEMMConvolutionLayer(CLGEMMConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMConvolutionLayer &operator=(const CLGEMMConvolutionLayer &) = delete; + /** Default move assignment operator */ + CLGEMMConvolutionLayer &operator=(CLGEMMConvolutionLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -172,6 +180,8 @@ private: CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; CLCol2ImKernel _col2im_kernel; + const ICLTensor *_original_weights; + CLTensor _im2col_output; CLTensor _weights_reshaped; CLTensor _gemm_output; diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h index e5aafd8547..b7b2587454 100644 --- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -53,6 +53,14 @@ class CLLocallyConnectedLayer : public IFunction public: /** Default constructor */ CLLocallyConnectedLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedLayer(const CLLocallyConnectedLayer &) = delete; + /** Default move constructor */ + CLLocallyConnectedLayer(CLLocallyConnectedLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedLayer &operator=(const CLLocallyConnectedLayer &) = delete; + /** Default move assignment operator */ + CLLocallyConnectedLayer &operator=(CLLocallyConnectedLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -93,6 +101,7 @@ private: CLTensor _weights_reshaped; CLTensor _gemm_output; bool _is_first_run; + const ICLTensor *_original_weights; }; } #endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index f5805f5ad1..fe65ac1a43 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -97,6 +97,14 @@ class NEDepthwiseConvolutionLayer : public IFunction public: /** Default constructor */ NEDepthwiseConvolutionLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete; + /** Default move constructor */ + NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete; + /** Default move assignment operator */ + NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default; /** Initialize the function's source, destination, weights and convolution information. * * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). @@ -123,7 +131,9 @@ private: Tensor _weights_reshaped; Tensor _v2mm_output; Tensor _output_reshaped; + bool _is_first_run; bool _is_quantized; + const ITensor *_original_weights; }; } #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 9bc8d21fc4..071eecc3f7 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -94,6 +94,14 @@ class NEFullyConnectedLayer : public IFunction public: /** Constructor */ NEFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete; + /** Default move constructor */ + NEFullyConnectedLayer(NEFullyConnectedLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete; + /** Default move assignment operator */ + NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: QS8/QS16/F32. @@ -134,6 +142,7 @@ private: bool _is_batched_fc_layer; bool _linearize_input; bool _accumulate_biases; + const ITensor *_original_weights; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 43e9304414..e733fec4b6 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -101,7 +101,14 @@ class NEGEMMConvolutionLayer : public IFunction public: /** Constructor */ NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager = nullptr); - + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete; + /** Default move constructor */ + NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete; + /** Default move assignment operator */ + NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -165,6 +172,8 @@ private: NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; NECol2ImKernel _output_col2im_kernel; + const ITensor *_original_weights; + Tensor _input_im2col_reshaped; Tensor _input_interleaved_reshaped; Tensor _weights_reshaped; diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h index e07f09c588..18cd27414e 100644 --- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h @@ -53,6 +53,14 @@ class NELocallyConnectedLayer : public IFunction public: /** Default constructor */ NELocallyConnectedLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedLayer(const NELocallyConnectedLayer &) = delete; + /** Default move constructor */ + NELocallyConnectedLayer(NELocallyConnectedLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete; + /** Default move assignment operator */ + NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default; /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -93,6 +101,7 @@ private: Tensor _weights_reshaped; Tensor _gemm_output; bool _is_first_run; + const ITensor *_original_weights; }; } #endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ */ -- cgit v1.2.1