aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-03-08 19:09:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:16 +0000
commit1562be3e8a449360a90af75f6f1481a30d41be75 (patch)
tree3180551f4c8595ae5c9630694754f3b1f8b493b1
parent684cb0f29d23fbe418e5e5347234abf9eccef363 (diff)
downloadComputeLibrary-1562be3e8a449360a90af75f6f1481a30d41be75.tar.gz
COMPMID-998: Release unused trainable parameters.
Change-Id: I361a520f34080016a25bc86e1e6789777c5152c1 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124432 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/ITensor.h10
-rw-r--r--arm_compute/graph2/ITensorHandle.h21
-rw-r--r--arm_compute/graph2/backends/CL/CLSubTensorHandle.h1
-rw-r--r--arm_compute/graph2/backends/CL/CLTensorHandle.h1
-rw-r--r--arm_compute/graph2/backends/GLES/GCTensorHandle.h1
-rw-r--r--arm_compute/graph2/backends/NEON/NESubTensorHandle.h1
-rw-r--r--arm_compute/graph2/backends/NEON/NETensorHandle.h1
-rw-r--r--arm_compute/graph2/detail/ExecutionHelpers.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h9
-rw-r--r--src/core/ITensor.cpp12
-rw-r--r--src/graph2/GraphManager.cpp6
-rw-r--r--src/graph2/backends/CL/CLSubTensorHandle.cpp5
-rw-r--r--src/graph2/backends/CL/CLTensorHandle.cpp9
-rw-r--r--src/graph2/backends/GLES/GCTensorHandle.cpp9
-rw-r--r--src/graph2/backends/NEON/NESubTensorHandle.cpp5
-rw-r--r--src/graph2/backends/NEON/NETensorHandle.cpp9
-rw-r--r--src/graph2/detail/ExecutionHelpers.cpp11
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp23
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp8
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp14
-rw-r--r--src/runtime/CL/functions/CLLocallyConnectedLayer.cpp12
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp21
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp8
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp8
-rw-r--r--src/runtime/NEON/functions/NELocallyConnectedLayer.cpp12
32 files changed, 260 insertions, 34 deletions
diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h
index 1ef9c6d3f6..255a236861 100644
--- a/arm_compute/core/ITensor.h
+++ b/arm_compute/core/ITensor.h
@@ -83,6 +83,16 @@ public:
* @param io_fmt Format information
*/
void print(std::ostream &s, IOFormatInfo io_fmt = IOFormatInfo()) const;
+ /** Flags if the tensor is used or not
+ *
+ * @return True if it is used else false
+ */
+ bool is_used() const;
+ /** Marks a tensor as unused */
+ void mark_as_unused() const;
+
+private:
+ mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */
};
using IImage = ITensor;
diff --git a/arm_compute/graph2/ITensorHandle.h b/arm_compute/graph2/ITensorHandle.h
index 68f79d8b01..c92a213121 100644
--- a/arm_compute/graph2/ITensorHandle.h
+++ b/arm_compute/graph2/ITensorHandle.h
@@ -30,25 +30,34 @@ namespace arm_compute
{
namespace graph2
{
-/** Tensor handle interface object **/
+/** Tensor handle interface object */
class ITensorHandle
{
public:
- /** Default virtual destructor **/
+ /** Default virtual destructor */
virtual ~ITensorHandle() = default;
- /** Allocates backend memory for the handle **/
+ /** Allocates backend memory for the handle */
virtual void allocate() = 0;
- /** Backend tensor object accessor **/
+ /** Backend tensor object accessor */
virtual arm_compute::ITensor &tensor() = 0;
- /** Backend tensor object const accessor **/
+ /** Backend tensor object const accessor */
virtual const arm_compute::ITensor &tensor() const = 0;
/** Maps backend tensor object
*
* @param[in] blocking Flags if the mapping operations should be blocking
*/
virtual void map(bool blocking) = 0;
- /** Un-maps a backend tensor object **/
+ /** Un-maps a backend tensor object */
virtual void unmap() = 0;
+ /** Releases backend tensor if is marked as unused
+ *
+ *
+ * @note This has no effect on sub-tensors
+ * @warning Parent tensors don't keep track of sub-tensors,
+ * thus if a parent is set as unused then all sub-tensors will be invalidated,
+ * on the other hand if a sub-tensor is marked as unused then the parent tensor won't be released
+ */
+ virtual void release_if_unused() = 0;
/** Checks if a backing tensor is a sub-tensor object or not
*
* @return True if the backend tensor is a sub-tensor else false
diff --git a/arm_compute/graph2/backends/CL/CLSubTensorHandle.h b/arm_compute/graph2/backends/CL/CLSubTensorHandle.h
index 9910980e59..6f3c00c041 100644
--- a/arm_compute/graph2/backends/CL/CLSubTensorHandle.h
+++ b/arm_compute/graph2/backends/CL/CLSubTensorHandle.h
@@ -59,6 +59,7 @@ public:
const arm_compute::ITensor &tensor() const override;
void map(bool blocking) override;
void unmap() override;
+ void release_if_unused() override;
bool is_subtensor() const override;
private:
diff --git a/arm_compute/graph2/backends/CL/CLTensorHandle.h b/arm_compute/graph2/backends/CL/CLTensorHandle.h
index 37d7147b6b..0b20d1d8fc 100644
--- a/arm_compute/graph2/backends/CL/CLTensorHandle.h
+++ b/arm_compute/graph2/backends/CL/CLTensorHandle.h
@@ -56,6 +56,7 @@ public:
const arm_compute::ITensor &tensor() const override;
void map(bool blocking) override;
void unmap() override;
+ void release_if_unused() override;
bool is_subtensor() const override;
private:
diff --git a/arm_compute/graph2/backends/GLES/GCTensorHandle.h b/arm_compute/graph2/backends/GLES/GCTensorHandle.h
index 8ead236614..281adee428 100644
--- a/arm_compute/graph2/backends/GLES/GCTensorHandle.h
+++ b/arm_compute/graph2/backends/GLES/GCTensorHandle.h
@@ -56,6 +56,7 @@ public:
const arm_compute::ITensor &tensor() const override;
void map(bool blocking) override;
void unmap() override;
+ void release_if_unused() override;
bool is_subtensor() const override;
private:
diff --git a/arm_compute/graph2/backends/NEON/NESubTensorHandle.h b/arm_compute/graph2/backends/NEON/NESubTensorHandle.h
index eacdfe0fb4..d62b66f343 100644
--- a/arm_compute/graph2/backends/NEON/NESubTensorHandle.h
+++ b/arm_compute/graph2/backends/NEON/NESubTensorHandle.h
@@ -59,6 +59,7 @@ public:
const arm_compute::ITensor &tensor() const override;
void map(bool blocking) override;
void unmap() override;
+ void release_if_unused() override;
bool is_subtensor() const override;
private:
diff --git a/arm_compute/graph2/backends/NEON/NETensorHandle.h b/arm_compute/graph2/backends/NEON/NETensorHandle.h
index c22fcdf216..23fd7ccc69 100644
--- a/arm_compute/graph2/backends/NEON/NETensorHandle.h
+++ b/arm_compute/graph2/backends/NEON/NETensorHandle.h
@@ -56,6 +56,7 @@ public:
const arm_compute::ITensor &tensor() const override;
void map(bool blocking) override;
void unmap() override;
+ void release_if_unused() override;
bool is_subtensor() const override;
private:
diff --git a/arm_compute/graph2/detail/ExecutionHelpers.h b/arm_compute/graph2/detail/ExecutionHelpers.h
index e4523ecf47..bc3cfd5d72 100644
--- a/arm_compute/graph2/detail/ExecutionHelpers.h
+++ b/arm_compute/graph2/detail/ExecutionHelpers.h
@@ -63,6 +63,11 @@ void validate_all_nodes(Graph &g);
* @return The execution workload
*/
ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx);
+/** Release the memory of all unused const nodes
+ *
+ * @param[in] g Graph to release the memory from
+ */
+void release_unused_tensors(Graph &g);
/** Calls accessor of a given tensor
*
* @param[in] tensor The tensor of which the accessor should be called
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index eb12fe4cca..d6fc8f0fcc 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -83,6 +83,14 @@ class CLDepthwiseConvolutionLayer : public IFunction
public:
/** Default constructor */
CLDepthwiseConvolutionLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
+ /** Default move constructor */
+ CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
@@ -109,7 +117,9 @@ private:
CLTensor _weights_reshaped;
CLTensor _v2mm_output;
CLTensor _output_reshaped;
+ bool _is_first_run;
bool _is_quantized;
+ const ICLTensor *_original_weights;
};
}
#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 1e9ee492ad..ad821318e9 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -76,6 +76,14 @@ class CLFullyConnectedLayer : public IFunction
public:
/** Constructor */
CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayer(const CLFullyConnectedLayer &) = delete;
+ /** Default move constructor */
+ CLFullyConnectedLayer(CLFullyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayer &operator=(const CLFullyConnectedLayer &) = delete;
+ /** Default move assignment operator */
+ CLFullyConnectedLayer &operator=(CLFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data type supported: QS8/QASYMM8/QS16/F16/F32.
@@ -121,6 +129,7 @@ private:
bool _is_fc_after_conv;
bool _accumulate_biases;
bool _is_quantized;
+ const ICLTensor *_original_weights;
};
}
#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index dc1211a51a..91d35ca0b2 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -103,6 +103,14 @@ public:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMConvolutionLayer(const CLGEMMConvolutionLayer &) = delete;
+ /** Default move constructor */
+ CLGEMMConvolutionLayer(CLGEMMConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMConvolutionLayer &operator=(const CLGEMMConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ CLGEMMConvolutionLayer &operator=(CLGEMMConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -172,6 +180,8 @@ private:
CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
CLCol2ImKernel _col2im_kernel;
+ const ICLTensor *_original_weights;
+
CLTensor _im2col_output;
CLTensor _weights_reshaped;
CLTensor _gemm_output;
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index e5aafd8547..b7b2587454 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -53,6 +53,14 @@ class CLLocallyConnectedLayer : public IFunction
public:
/** Default constructor */
CLLocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLocallyConnectedLayer(const CLLocallyConnectedLayer &) = delete;
+ /** Default move constructor */
+ CLLocallyConnectedLayer(CLLocallyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLocallyConnectedLayer &operator=(const CLLocallyConnectedLayer &) = delete;
+ /** Default move assignment operator */
+ CLLocallyConnectedLayer &operator=(CLLocallyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -93,6 +101,7 @@ private:
CLTensor _weights_reshaped;
CLTensor _gemm_output;
bool _is_first_run;
+ const ICLTensor *_original_weights;
};
}
#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index f5805f5ad1..fe65ac1a43 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -97,6 +97,14 @@ class NEDepthwiseConvolutionLayer : public IFunction
public:
/** Default constructor */
NEDepthwiseConvolutionLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
@@ -123,7 +131,9 @@ private:
Tensor _weights_reshaped;
Tensor _v2mm_output;
Tensor _output_reshaped;
+ bool _is_first_run;
bool _is_quantized;
+ const ITensor *_original_weights;
};
}
#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 9bc8d21fc4..071eecc3f7 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -94,6 +94,14 @@ class NEFullyConnectedLayer : public IFunction
public:
/** Constructor */
NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete;
+ /** Default move constructor */
+ NEFullyConnectedLayer(NEFullyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete;
+ /** Default move assignment operator */
+ NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data type supported: QS8/QS16/F32.
@@ -134,6 +142,7 @@ private:
bool _is_batched_fc_layer;
bool _linearize_input;
bool _accumulate_biases;
+ const ITensor *_original_weights;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 43e9304414..e733fec4b6 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -101,7 +101,14 @@ class NEGEMMConvolutionLayer : public IFunction
public:
/** Constructor */
NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
-
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
+ /** Default move constructor */
+ NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -165,6 +172,8 @@ private:
NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
NECol2ImKernel _output_col2im_kernel;
+ const ITensor *_original_weights;
+
Tensor _input_im2col_reshaped;
Tensor _input_interleaved_reshaped;
Tensor _weights_reshaped;
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
index e07f09c588..18cd27414e 100644
--- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
@@ -53,6 +53,14 @@ class NELocallyConnectedLayer : public IFunction
public:
/** Default constructor */
NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELocallyConnectedLayer(const NELocallyConnectedLayer &) = delete;
+ /** Default move constructor */
+ NELocallyConnectedLayer(NELocallyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete;
+ /** Default move assignment operator */
+ NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -93,6 +101,7 @@ private:
Tensor _weights_reshaped;
Tensor _gemm_output;
bool _is_first_run;
+ const ITensor *_original_weights;
};
}
#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ */
diff --git a/src/core/ITensor.cpp b/src/core/ITensor.cpp
index b65c4f4c97..eb5f072d82 100644
--- a/src/core/ITensor.cpp
+++ b/src/core/ITensor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -151,3 +151,13 @@ void ITensor::print(std::ostream &s, IOFormatInfo io_fmt) const
}
}
}
+
+bool ITensor::is_used() const
+{
+ return _is_used;
+}
+
+void ITensor::mark_as_unused() const
+{
+ _is_used = false;
+}
diff --git a/src/graph2/GraphManager.cpp b/src/graph2/GraphManager.cpp
index e708dc6a61..a51ba61104 100644
--- a/src/graph2/GraphManager.cpp
+++ b/src/graph2/GraphManager.cpp
@@ -82,6 +82,12 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
// Finalize Graph context
ctx.finalize();
+
+ // Make first run
+ execute_graph(graph);
+
+ // Release all unused const nodes
+ detail::release_unused_tensors(graph);
}
void GraphManager::execute_graph(Graph &graph)
diff --git a/src/graph2/backends/CL/CLSubTensorHandle.cpp b/src/graph2/backends/CL/CLSubTensorHandle.cpp
index a001d57832..65a1ba4d5f 100644
--- a/src/graph2/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph2/backends/CL/CLSubTensorHandle.cpp
@@ -64,6 +64,11 @@ void CLSubTensorHandle::unmap()
_sub_tensor.unmap();
}
+void CLSubTensorHandle::release_if_unused()
+{
+ // noop
+}
+
bool CLSubTensorHandle::is_subtensor() const
{
return true;
diff --git a/src/graph2/backends/CL/CLTensorHandle.cpp b/src/graph2/backends/CL/CLTensorHandle.cpp
index f515e0bac3..89678fb280 100644
--- a/src/graph2/backends/CL/CLTensorHandle.cpp
+++ b/src/graph2/backends/CL/CLTensorHandle.cpp
@@ -60,6 +60,15 @@ void CLTensorHandle::unmap()
_tensor.unmap();
}
+void CLTensorHandle::release_if_unused()
+{
+ // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
+ if(!_tensor.is_used())
+ {
+ _tensor.allocator()->free();
+ }
+}
+
bool CLTensorHandle::is_subtensor() const
{
return false;
diff --git a/src/graph2/backends/GLES/GCTensorHandle.cpp b/src/graph2/backends/GLES/GCTensorHandle.cpp
index aa9ac8c2e9..2165cd2de6 100644
--- a/src/graph2/backends/GLES/GCTensorHandle.cpp
+++ b/src/graph2/backends/GLES/GCTensorHandle.cpp
@@ -60,6 +60,15 @@ void GCTensorHandle::unmap()
_tensor.unmap();
}
+void GCTensorHandle::release_if_unused()
+{
+ // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
+ if(!_tensor.is_used())
+ {
+ _tensor.allocator()->free();
+ }
+}
+
bool GCTensorHandle::is_subtensor() const
{
return false;
diff --git a/src/graph2/backends/NEON/NESubTensorHandle.cpp b/src/graph2/backends/NEON/NESubTensorHandle.cpp
index 491cf8259c..1cd15be29c 100644
--- a/src/graph2/backends/NEON/NESubTensorHandle.cpp
+++ b/src/graph2/backends/NEON/NESubTensorHandle.cpp
@@ -61,6 +61,11 @@ void NESubTensorHandle::unmap()
// noop
}
+void NESubTensorHandle::release_if_unused()
+{
+ // noop
+}
+
bool NESubTensorHandle::is_subtensor() const
{
return true;
diff --git a/src/graph2/backends/NEON/NETensorHandle.cpp b/src/graph2/backends/NEON/NETensorHandle.cpp
index a4af8aaf9b..0b901c3497 100644
--- a/src/graph2/backends/NEON/NETensorHandle.cpp
+++ b/src/graph2/backends/NEON/NETensorHandle.cpp
@@ -59,6 +59,15 @@ void NETensorHandle::unmap()
{
}
+void NETensorHandle::release_if_unused()
+{
+ // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
+ if(!_tensor.is_used())
+ {
+ _tensor.allocator()->free();
+ }
+}
+
bool NETensorHandle::is_subtensor() const
{
return false;
diff --git a/src/graph2/detail/ExecutionHelpers.cpp b/src/graph2/detail/ExecutionHelpers.cpp
index ae214ad5a6..3688d0b0dc 100644
--- a/src/graph2/detail/ExecutionHelpers.cpp
+++ b/src/graph2/detail/ExecutionHelpers.cpp
@@ -135,6 +135,17 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
return workload;
}
+void release_unused_tensors(Graph &g)
+{
+ for(auto &tensor : g.tensors())
+ {
+ if(tensor != nullptr && tensor->handle() != nullptr)
+ {
+ tensor->handle()->release_if_unused();
+ }
+ }
+}
+
void call_tensor_accessor(Tensor *tensor)
{
ARM_COMPUTE_ERROR_ON(!tensor);
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 112af60f35..8d7c92bdf1 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -64,7 +64,7 @@ void CLDepthwiseConvolutionLayer3x3::run()
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer()
: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(),
- _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_quantized(false)
+ _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_first_run(true), _is_quantized(false), _original_weights(nullptr)
{
}
@@ -78,7 +78,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
const size_t weights_h = weights->info()->dimension(1);
const size_t weights_z = weights->info()->dimension(2);
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _is_first_run = true;
+ _original_weights = weights;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
bool append_bias = (biases != nullptr) && !_is_quantized;
const GPUTarget gpu_target = CLScheduler::get().target();
@@ -154,16 +156,23 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
void CLDepthwiseConvolutionLayer::run()
{
- CLScheduler::get().enqueue(_im2col_kernel);
+ // Run weights reshaping (Runs once for every configure)
+ if(_is_first_run)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
- CLScheduler::get().enqueue(_weights_reshape_kernel);
+ CLScheduler::get().enqueue(_weights_reshape_kernel);
+ CLScheduler::get().enqueue(_v2mm_weights_fill_border);
+ _is_first_run = false;
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
+ }
+ CLScheduler::get().enqueue(_im2col_kernel);
CLScheduler::get().enqueue(_v2mm_input_fill_border);
- CLScheduler::get().enqueue(_v2mm_weights_fill_border);
CLScheduler::get().enqueue(_v2mm_kernel);
-
CLScheduler::get().enqueue(_vector_to_tensor_kernel);
-
if(_is_quantized)
{
CLScheduler::get().enqueue(_output_stage_kernel);
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 2b4670b98c..676706fb17 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -76,7 +76,7 @@ Status CLFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c
CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _accumulate_biases_kernel(), _im2col_output(),
- _gemmlowp_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false)
+ _gemmlowp_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false), _original_weights(nullptr)
{
}
@@ -152,6 +152,7 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
_is_fc_after_conv = true;
_accumulate_biases = false;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _original_weights = weights;
// Configure gemmlowp output
if(_is_quantized)
@@ -316,8 +317,13 @@ void CLFullyConnectedLayer::run()
// Reshape of the weights (happens only once)
if(!_are_weights_reshaped)
{
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
_are_weights_reshaped = true;
_reshape_weights_kernel.run();
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
}
_memory_group.acquire();
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index e7ad62f5ff..f43e100565 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -90,8 +90,8 @@ void CLConvolutionLayerReshapeWeights::run()
}
CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _im2col_output(),
- _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true)
+ : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _original_weights(nullptr),
+ _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true)
{
}
@@ -164,7 +164,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
weights_info,
dilation));
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _is_first_run = true;
+ _original_weights = weights;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
const DataType dt = input->info()->data_type();
@@ -349,9 +351,13 @@ void CLGEMMConvolutionLayer::run()
// Run weights reshaping (Runs once for every configure)
if(_is_first_run)
{
- _reshape_weights.run();
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+ _reshape_weights.run();
_is_first_run = false;
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
}
_memory_group.acquire();
diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
index a3eb5010bd..986fe00973 100644
--- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
@@ -73,7 +73,7 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons
CLLocallyConnectedLayer::CLLocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
- _is_first_run(false)
+ _is_first_run(false), _original_weights(nullptr)
{
}
@@ -126,8 +126,9 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(CLLocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info));
- bool _has_bias = (biases != nullptr);
- _is_first_run = true;
+ bool _has_bias = (biases != nullptr);
+ _original_weights = weights;
+ _is_first_run = true;
const unsigned int kernel_width = weights->info()->dimension(0);
const unsigned int kernel_height = weights->info()->dimension(1);
@@ -169,8 +170,13 @@ void CLLocallyConnectedLayer::run()
// Run weights reshaping (Runs once for every configure)
if(_is_first_run)
{
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
_is_first_run = false;
CLScheduler::get().enqueue(_weights_reshape_kernel);
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
}
_memory_group.acquire();
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 95fcf8805e..f28ed715f6 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -153,7 +153,7 @@ void NEDepthwiseConvolutionLayer3x3::run()
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()
: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(),
- _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_quantized(false)
+ _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_first_run(true), _is_quantized(false), _original_weights(nullptr)
{
}
@@ -167,7 +167,9 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
const size_t weights_h = weights->info()->dimension(1);
const size_t weights_z = weights->info()->dimension(2);
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _is_first_run = true;
+ _original_weights = weights;
// Should bias be appended ?
bool append_bias = (biases != nullptr) && !_is_quantized;
@@ -241,10 +243,21 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
void NEDepthwiseConvolutionLayer::run()
{
+ // Run weights reshaping (Runs once for every configure)
+ if(_is_first_run)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
+ NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);
+ _is_first_run = false;
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
+ }
+
NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);
- NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);
- NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);
NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);
NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);
if(_is_quantized)
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 26b7271710..b310ad35e3 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -132,7 +132,7 @@ void NEFullyConnectedLayerReshapeWeights::run()
NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(),
- _reshape_weights_output(), _are_weights_reshaped(false), _is_batched_fc_layer(false), _linearize_input(false), _accumulate_biases(false)
+ _reshape_weights_output(), _are_weights_reshaped(false), _is_batched_fc_layer(false), _linearize_input(false), _accumulate_biases(false), _original_weights(nullptr)
{
}
@@ -163,6 +163,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
const int num_input_dimensions = input->info()->tensor_shape().num_dimensions() - num_batch_dimensions;
const size_t linear_input_size = input->info()->tensor_shape().total_size_lower(num_input_dimensions);
+ _original_weights = weights;
_linearize_input = (input->info()->tensor_shape().x() != linear_input_size) || (num_input_dimensions > 1 && linear_input_size == 1);
_are_weights_reshaped = are_weights_reshaped;
_accumulate_biases = biases != nullptr;
@@ -324,8 +325,13 @@ void NEFullyConnectedLayer::run()
// Reshape of the weights (happens only once)
if(!_are_weights_reshaped)
{
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
_are_weights_reshaped = true;
_reshape_weights_kernel.run();
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
}
_memory_group.acquire();
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index d9707d95e0..b2dd0227a5 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -217,7 +217,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
: _asm_glue(), _memory_group(memory_manager), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(),
- _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false),
+ _output_col2im_kernel(), _original_weights(nullptr), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false),
_is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false)
{
}
@@ -267,6 +267,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
ARM_COMPUTE_ERROR_THROW_ON(status);
+ _original_weights = weights;
const unsigned int fixed_point_position = input->info()->fixed_point_position();
const ITensor *biases_to_use = (_append_bias) ? biases : nullptr;
@@ -549,8 +550,13 @@ void NEGEMMConvolutionLayer::run()
// Run weights reshaping (Runs once for every configure)
if(!_are_weights_reshaped)
{
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
_are_weights_reshaped = true;
_reshape_weights.run();
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
}
_memory_group.acquire();
diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
index 973559441f..913acf86a2 100644
--- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
@@ -73,7 +73,7 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons
NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
- _is_first_run(false)
+ _is_first_run(false), _original_weights(nullptr)
{
}
@@ -126,8 +126,9 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NELocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info));
- bool _has_bias = (biases != nullptr);
- _is_first_run = true;
+ bool _has_bias = (biases != nullptr);
+ _is_first_run = true;
+ _original_weights = weights;
const unsigned int kernel_width = weights->info()->dimension(0);
const unsigned int kernel_height = weights->info()->dimension(1);
@@ -169,8 +170,13 @@ void NELocallyConnectedLayer::run()
// Run weights reshaping (Runs once for every configure)
if(_is_first_run)
{
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
_is_first_run = false;
NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+
+ // Mark original weights tensor as unused
+ _original_weights->mark_as_unused();
}
_memory_group.acquire();