aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-09-10 17:20:34 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2019-09-26 10:17:30 +0000
commit1a569a30a2f456ff1a3e0a665201e1c3ab92df80 (patch)
tree9d68934f461579edefbe65246f6ee435aaa18808 /arm_compute
parentf1cf394ae882e6e8fb2e0986f88d2548b82a85bb (diff)
downloadComputeLibrary-1a569a30a2f456ff1a3e0a665201e1c3ab92df80.tar.gz
COMPMID-2161 [NEON] Create IWeightManager class
Change-Id: I1a9a46da2f98e896b825099151b56d1d8271dd31 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/1915 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/graph/GraphContext.h38
-rw-r--r--arm_compute/graph/IDeviceBackend.h8
-rw-r--r--arm_compute/graph/Types.h1
-rw-r--r--arm_compute/graph/backends/CL/CLDeviceBackend.h1
-rw-r--r--arm_compute/graph/backends/FunctionHelpers.h4
-rw-r--r--arm_compute/graph/backends/GLES/GCDeviceBackend.h3
-rw-r--r--arm_compute/graph/backends/NEON/NEDeviceBackend.h3
-rw-r--r--arm_compute/graph/backends/Utils.h16
-rw-r--r--arm_compute/graph/frontend/Layers.h29
-rw-r--r--arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h2
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h3
-rw-r--r--arm_compute/runtime/ITransformWeights.h117
-rw-r--r--arm_compute/runtime/IWeightsManager.h85
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h48
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h85
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h77
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h5
23 files changed, 495 insertions, 61 deletions
diff --git a/arm_compute/graph/GraphContext.h b/arm_compute/graph/GraphContext.h
index 21ba6df785..0eb9e81175 100644
--- a/arm_compute/graph/GraphContext.h
+++ b/arm_compute/graph/GraphContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/graph/Types.h"
#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include <map>
#include <memory>
@@ -45,6 +46,13 @@ struct MemoryManagerContext
IAllocator *allocator = { nullptr }; /**< Backend allocator to use */
};
+/** Contains structs required for weights management */
+struct WeightsManagerContext
+{
+ Target target = { Target::UNSPECIFIED }; /**< Target */
+ std::shared_ptr<arm_compute::IWeightsManager> wm = { nullptr }; /**< Weights manager */
+};
+
/** Graph context **/
class GraphContext final
{
@@ -77,7 +85,7 @@ public:
*
* @param[in] memory_ctx Memory manage context
*
- * @return If the insertion succeeded else false
+ * @return True if the insertion succeeded else false
*/
bool insert_memory_management_ctx(MemoryManagerContext &&memory_ctx);
/** Gets a memory manager context for a given target
@@ -92,12 +100,34 @@ public:
* @return Memory manager contexts
*/
std::map<Target, MemoryManagerContext> &memory_managers();
+ /** Inserts a weights manager context
+ *
+ * @param[in] weights_ctx Weights manager context
+ *
+ * @return True if the insertion succeeded else false
+ */
+ bool insert_weights_management_ctx(WeightsManagerContext &&weights_ctx);
+
+ /** Gets a weights manager context for a given target
+ *
+ * @param[in] target To retrieve the weights management context
+ *
+ * @return Management context for the target if exists else nullptr
+ */
+ WeightsManagerContext *weights_management_ctx(Target target);
+
+ /** Gets the weights managers map
+ *
+ * @return Weights manager contexts
+ */
+ std::map<Target, WeightsManagerContext> &weights_managers();
/** Finalizes memory managers in graph context */
void finalize();
private:
- GraphConfig _config; /**< Graph configuration */
- std::map<Target, MemoryManagerContext> _memory_managers; /**< Memory managers for each target */
+ GraphConfig _config; /**< Graph configuration */
+ std::map<Target, MemoryManagerContext> _memory_managers; /**< Memory managers for each target */
+ std::map<Target, WeightsManagerContext> _weights_managers; /**< Weights managers for each target */
};
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h
index 358d26af81..cf54976c28 100644
--- a/arm_compute/graph/IDeviceBackend.h
+++ b/arm_compute/graph/IDeviceBackend.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,7 @@
#include "arm_compute/graph/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include <memory>
@@ -112,6 +113,11 @@ public:
* @return Memory manager
*/
virtual std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) = 0;
+ /** Create a backend weights manager
+ *
+ * @return Weights manager
+ */
+ virtual std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() = 0;
};
} // namespace backends
} // namespace graph
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 8b97708a63..63b1c94ac8 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -78,6 +78,7 @@ class TensorDescriptor;
struct GraphConfig
{
bool use_function_memory_manager{ true }; /**< Use a memory manager to manage per-funcion auxilary memory */
+ bool use_function_weights_manager{ true }; /**< Use a weights manager to manage transformed weights */
bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */
bool use_tuner{ false }; /**< Use a tuner in tunable backends */
CLTunerMode tuner_mode{ CLTunerMode::EXHAUSTIVE }; /**< Tuner mode to be used by the CL tuner */
diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index afe01fff70..8569cf1f34 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h
@@ -67,6 +67,7 @@ public:
std::unique_ptr<arm_compute::IFunction> configure_node(INode &node, GraphContext &ctx) override;
Status validate_node(INode &node) override;
std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
+ std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
private:
int _context_count; /**< Counts how many contexts are currently using the backend */
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index dd833061a9..10f8c0c5c7 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -827,7 +827,9 @@ std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode
ARM_COMPUTE_ERROR_ON(output == nullptr);
// Create and configure function
- auto func = support::cpp14::make_unique<FullyConnectedLayerFunction>(get_memory_manager(ctx, TargetInfo::TargetType));
+ auto wm = get_weights_manager(ctx, TargetInfo::TargetType);
+ auto mm = get_memory_manager(ctx, TargetInfo::TargetType);
+ auto func = support::cpp14::make_unique<FullyConnectedLayerFunction>(mm, wm.get());
func->configure(input, weights, biases, output, fc_info);
const bool is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h
index ca2d3734eb..83a7458c98 100644
--- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h
+++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,6 +52,7 @@ public:
std::unique_ptr<arm_compute::IFunction> configure_node(INode &node, GraphContext &ctx) override;
Status validate_node(INode &node) override;
std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
+ std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
private:
bool _initialized; /**< Flag that specifies if the backend has been default initialized */
diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
index abc17d9e83..9891170fbd 100644
--- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h
+++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,6 +51,7 @@ public:
std::unique_ptr<arm_compute::IFunction> configure_node(INode &node, GraphContext &ctx) override;
Status validate_node(INode &node) override;
std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
+ std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
private:
Allocator _allocator; /**< NEON backend allocator */
diff --git a/arm_compute/graph/backends/Utils.h b/arm_compute/graph/backends/Utils.h
index c7a50d93c6..2ca97ff5c5 100644
--- a/arm_compute/graph/backends/Utils.h
+++ b/arm_compute/graph/backends/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
namespace arm_compute
{
@@ -90,6 +91,19 @@ inline std::shared_ptr<IMemoryManager> get_memory_manager(GraphContext &ctx, Tar
bool enabled = ctx.config().use_function_memory_manager && (ctx.memory_management_ctx(target) != nullptr);
return enabled ? ctx.memory_management_ctx(target)->intra_mm : nullptr;
}
+
+/** Returns the weights manager for a given target
+ *
+ * @param[in] ctx Graph context containing weight management metadata
+ * @param[in] target Target to retrieve the weights manager from
+ *
+ * @return The weights manager for the given target else false
+ */
+inline std::shared_ptr<IWeightsManager> get_weights_manager(GraphContext &ctx, Target target)
+{
+ bool enabled = ctx.config().use_function_weights_manager && (ctx.weights_management_ctx(target) != nullptr);
+ return enabled ? ctx.weights_management_ctx(target)->wm : nullptr;
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index 27a0cd3026..120997a8b4 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -66,6 +66,31 @@ private:
ITensorAccessorUPtr _accessor;
};
+/** Constant Layer */
+class ConstantLayer final : public ILayer
+{
+public:
+ /** Construct a constant layer.
+ *
+ * @param[in] desc Description of input tensor.
+ * @param[in] accessor Accessor to get input tensor data from.
+ */
+ ConstantLayer(TensorDescriptor desc, ITensorAccessorUPtr accessor)
+ : _desc(desc), _accessor(std::move(accessor))
+ {
+ }
+
+ NodeID create_layer(IStream &s) override
+ {
+ NodeParams common_params = { name(), s.hints().target_hint };
+ return GraphBuilder::add_const_node(s.graph(), common_params, _desc, std::move(_accessor));
+ }
+
+private:
+ TensorDescriptor _desc;
+ ITensorAccessorUPtr _accessor;
+};
+
/** Output Layer */
class OutputLayer final : public ILayer
{
@@ -635,8 +660,8 @@ public:
* @param[in] out_quant_info (Optional) Output quantization info
*/
FullyConnectedLayer(unsigned int num_outputs,
- SubStream &&sub_stream_weights,
- SubStream &&sub_stream_bias,
+ SubStream sub_stream_weights,
+ SubStream sub_stream_bias,
const FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
const QuantizationInfo weights_quant_info = QuantizationInfo(),
const QuantizationInfo out_quant_info = QuantizationInfo())
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 9bfade4894..43abb6769b 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,6 +41,8 @@ public:
* @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
+ *
+ * @return A status
*/
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 7cf7d951b6..d54304ed77 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -34,6 +34,7 @@
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
@@ -76,7 +77,7 @@ class CLFullyConnectedLayer : public IFunction
{
public:
/** Constructor */
- CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLFullyConnectedLayer(const CLFullyConnectedLayer &) = delete;
/** Default move constructor */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index d29a31a530..0b27c824d9 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -94,7 +94,7 @@ private:
class CLGEMMConvolutionLayer : public IFunction
{
public:
- /** Default constructor
+ /** Constructor
*
* @param[in] memory_manager (Optional) Memory manager.
*/
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
index 6fcebd63b4..3a13e659f9 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
@@ -30,6 +30,7 @@
#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
@@ -64,7 +65,7 @@ class GCFullyConnectedLayer : public IFunction
{
public:
/** Constructor */
- GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
GCFullyConnectedLayer(const GCFullyConnectedLayer &) = delete;
/** Default move constructor */
diff --git a/arm_compute/runtime/ITransformWeights.h b/arm_compute/runtime/ITransformWeights.h
new file mode 100644
index 0000000000..6376c30088
--- /dev/null
+++ b/arm_compute/runtime/ITransformWeights.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ITRANSFORMWEIGHTS_H__
+#define __ARM_COMPUTE_ITRANSFORMWEIGHTS_H__
+
+#include <atomic>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Weights tensor transform interface
+ * In order to identify the different reshape functions, each reshape function has
+ * to generate a unique id. We use the following conversion using an unsigned 32bit value:
+ *
+ * Lower two bits store the target:
+ * 00 -> NEON
+ * 01 -> CL
+ * 10 -> GLES
+ * 11 -> Unused
+ *
+ * Five bits store the id of the reshape function:
+ * 00000 -> FullyConnectedLayerReshapeWeights
+ * 00001 -> ConvertFullyConnectedWeights
+ * 00010 -> ConvolutionLayerReshapeWeights
+ * 00011 -> DepthwiseConvolutionLayerReshapeWeights
+ * 00100 -> GEMMReshapeLHSMatrixKernel
+ * 00101 -> GEMMReshapeRHSMatrixKernel
+ *
+ * Rest of the bits are used for identifying special cases such as assembly functions and extra
+ * arguments in the reshape kernels.
+ *
+ * */
+class ITransformWeights
+{
+public:
+ /** Default Constructor */
+ ITransformWeights() = default;
+ /** Default Destructor */
+ virtual ~ITransformWeights() = default;
+ /** Prevent instances of this class to be copy constructed */
+ ITransformWeights(const ITransformWeights &) = delete;
+ /** Prevent instances of this class to be copied */
+ ITransformWeights &operator=(const ITransformWeights &) = delete;
+ /** Allow instances of this class to be move constructed */
+ ITransformWeights(ITransformWeights &&) = default;
+ /** Allow instances of this class to be moved */
+ ITransformWeights &operator=(ITransformWeights &&) = default;
+
+ /** Get a pointer to the transformed weights
+ *
+ * @return The pointer to the transformed ITensor weights
+ */
+ virtual ITensor *get_weights() = 0;
+ /** Function that returns a unique id of the reshape function
+ *
+ * @return The computed unique id
+ */
+ virtual uint32_t uid() = 0;
+ /** Run the transformation function */
+ virtual void run() = 0;
+ /** Release transformed weights memory */
+ virtual void release() = 0;
+ /** Increase the object's refcount */
+ void increase_refcount()
+ {
+ ++_num_refcount;
+ }
+
+ /** Decrease the object's refcount and return the updated value
+ *
+ * @return The updated refcount
+ * */
+ int32_t decrease_refcount()
+ {
+ return --_num_refcount;
+ }
+
+ /** Function that returns a flag on whether the weights are reshaped or not
+ *
+ * @return True if the function is reshaped
+ */
+ bool is_reshape_run()
+ {
+ return _reshape_run;
+ }
+
+protected:
+ std::atomic<int32_t> _num_refcount{ 0 };
+ bool _reshape_run{ false };
+};
+
+} // arm_compute
+
+#endif /*__ARM_COMPUTE_ITRANSFORMWEIGHTS_H__ */ \ No newline at end of file
diff --git a/arm_compute/runtime/IWeightsManager.h b/arm_compute/runtime/IWeightsManager.h
new file mode 100644
index 0000000000..2d61b89bc6
--- /dev/null
+++ b/arm_compute/runtime/IWeightsManager.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_IWEIGHTSMANAGER_H__
+#define __ARM_COMPUTE_IWEIGHTSMANAGER_H__
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/runtime/ITransformWeights.h"
+
+#include <map>
+
+namespace arm_compute
+{
+/** Weights manager interface to handle weights transformations */
+class IWeightsManager
+{
+public:
+ /** Constructor */
+ IWeightsManager();
+ /** Default Destructor */
+ virtual ~IWeightsManager() = default;
+ /** Prevent instances of this class to be copy constructed */
+ IWeightsManager(const IWeightsManager &) = delete;
+ /** Prevent instances of this class to be copied */
+ IWeightsManager &operator=(const IWeightsManager &) = delete;
+ /** Allow instances of this class to be move constructed */
+ IWeightsManager(IWeightsManager &&) = default;
+ /** Allow instances of this class to be moved */
+ IWeightsManager &operator=(IWeightsManager &&) = default;
+
+ /** Start managing a weights tensor
+ *
+ * @param[in] weights Pointer to the weights tensor to be managed
+ * @param[in] parent Parent node in case where the weights are coming from a previous reshape function
+ */
+ void manage(const ITensor *weights, ITransformWeights *parent = nullptr);
+ /** Run the reshape function.
+ *
+ * @param[in] weights Pointer to the weights tensor we want to reshape
+ * @param[in] weights_transform Weights transformation object
+ *
+ * @return The reshaped tensor
+ */
+ ITensor *run(const ITensor *weights, ITransformWeights *weights_transform);
+ /** Acquire the requested reshape tensor of the selected weights
+ *
+ * @param[in] weights Pointer to the weights tensor to be managed
+ * @param[in] weights_transform Weights transformation object
+ */
+ ITensor *acquire(const ITensor *weights, ITransformWeights *weights_transform);
+ /** Check if the weights are managed
+ *
+ * @param[in] weights Pointer to the weights tensor we want to check if managed
+ *
+ * @return True if the weights tensor is managed else false
+ */
+ bool are_weights_managed(const ITensor *weights);
+
+private:
+ std::map<const ITensor *, std::vector<ITransformWeights *>> _managed_weights;
+ std::map<const ITensor *, ITransformWeights *> _managed_weights_parents;
+};
+} // arm_compute
+#endif /*__ARM_COMPUTE_IWEIGHTSMANAGER_H__ */ \ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 8f261421e6..50a86bd7c4 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,9 @@
#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/ITransformWeights.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
@@ -52,6 +54,8 @@ public:
* @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
* @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
* @param[in] data_layout The data layout the weights have been trained in.
+ *
+ * @return A Status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
@@ -61,5 +65,45 @@ public:
private:
NEConvertFullyConnectedWeightsKernel _kernel;
};
-}
+
+namespace weights_transformations
+{
+/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
+class NEConvertFullyConnectedWeightsManaged : public ITransformWeights
+{
+public:
+ void run() override
+ {
+ _output.allocator()->allocate();
+ _func.run();
+ _reshape_run = true;
+ }
+
+ void release() override
+ {
+ _output.allocator()->free();
+ }
+
+ ITensor *get_weights() override
+ {
+ return &_output;
+ }
+
+ uint32_t uid() override
+ {
+ return _uid;
+ }
+
+ void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
+ {
+ _func.configure(input, &_output, original_input_shape, data_layout);
+ }
+
+private:
+ static constexpr uint32_t _uid = 0x4;
+ Tensor _output{};
+ NEConvertFullyConnectedWeights _func{};
+};
+} // namespace weights_transformations
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 360bb23f22..6880bbba6b 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -73,7 +73,7 @@ namespace arm_compute
class NEDeconvolutionLayer : public IFunction
{
public:
- /** Default constructor */
+ /** Constructor */
NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 56ce274572..b80e0e49e0 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,6 +63,46 @@ public:
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
+namespace weights_transformations
+{
+/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */
+class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights
+{
+public:
+ void run() override
+ {
+ _output.allocator()->allocate();
+ _func.run();
+ _reshape_run = true;
+ }
+
+ void release() override
+ {
+ _output.allocator()->free();
+ }
+
+ ITensor *get_weights() override
+ {
+ return &_output;
+ }
+
+ uint32_t uid() override
+ {
+ return _uid;
+ }
+
+ void configure(const ITensor *input)
+ {
+ _func.configure(input, &_output);
+ }
+
+private:
+ static constexpr uint32_t _uid = 0x0;
+ Tensor _output{};
+ NEFullyConnectedLayerReshapeWeights _func{};
+};
+} // namespace weights_transformations
+
/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
* -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
@@ -75,7 +115,7 @@ class NEFullyConnectedLayer : public IFunction
{
public:
/** Constructor */
- NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete;
/** Default move constructor */
@@ -128,25 +168,28 @@ private:
void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output);
void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
- MemoryGroup _memory_group;
- NEFlattenLayerKernel _flatten_kernel;
- NEConvertFullyConnectedWeights _convert_weights;
- NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
- NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
- Tensor _flatten_output;
- Tensor _gemmlowp_output;
- Tensor _converted_weights_output;
- Tensor _reshape_weights_output;
- const ITensor *_original_weights;
- bool _are_weights_converted;
- bool _are_weights_reshaped;
- bool _is_fc_after_conv;
- bool _accumulate_biases;
- bool _is_quantized;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ NEFlattenLayerKernel _flatten_kernel;
+ NEConvertFullyConnectedWeights _convert_weights;
+ weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed;
+ NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
+ weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
+ NEGEMM _mm_gemm;
+ NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
+ NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ Tensor _flatten_output;
+ Tensor _gemmlowp_output;
+ Tensor _converted_weights_output;
+ Tensor _reshape_weights_output;
+ const ITensor *_original_weights;
+ bool _are_weights_converted;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _accumulate_biases;
+ bool _is_quantized;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 7f9e3181bc..d947be1ef9 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,7 @@
#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/Tensor.h"
@@ -51,7 +52,7 @@ class NEGEMM : public IFunction
{
public:
/** Constructor */
- NEGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ NEGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMM(const NEGEMM &) = delete;
/** Default move constructor */
@@ -96,6 +97,7 @@ public:
private:
MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
NEGEMMInterleave4x4Kernel _interleave_kernel;
NEGEMMTranspose1xWKernel _transpose_kernel;
NEGEMMMatrixMultiplyKernel _mm_kernel;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index ec4f700034..83e495e695 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
@@ -38,9 +39,8 @@ namespace arm_compute
class NEGEMMAssemblyDispatch : public IFunction
{
public:
- /** Default constructor */
- NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+ /** Constructor */
+ NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copy constructed */
NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete;
/** Prevent instances of this class from being copied */
@@ -79,8 +79,9 @@ private:
/** Interface for the arm_gemm fallback */
std::unique_ptr<IFallback> _arm_gemm;
- MemoryGroup _memory_group; /**< Function memory group */
- std::shared_ptr<IMemoryManager> _memory_manager; /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::shared_ptr<IMemoryManager> _memory_manager; /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */
+ IWeightsManager *_weights_manager; /**< Pointer to the weights manager */
public:
/** If supported create an ACL function else fallback to the arm_gemm function.
*
@@ -117,6 +118,5 @@ public:
void prepare() override;
void run() override;
};
-
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index ace924f146..dccc35f0af 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -32,6 +32,7 @@
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
@@ -54,6 +55,14 @@ class NEConvolutionLayerReshapeWeights : public IFunction
public:
/** Constructor */
NEConvolutionLayerReshapeWeights();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete;
+ /** Default move constructor */
+ NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
+ /** Default move assignment operator */
+ NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32.
@@ -78,6 +87,52 @@ private:
NEWeightsReshapeKernel _weights_reshape_kernel;
};
+namespace weights_transformations
+{
+/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */
+class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
+{
+public:
+ void configure(const ITensor *input, const ITensor *biases)
+ {
+ _bias_bit = (biases != nullptr) ? 1 : 0;
+ _func.configure(input, biases, &_output);
+ }
+
+ void run() override
+ {
+ _output.allocator()->allocate();
+ _func.run();
+ _reshape_run = true;
+ }
+
+ ITensor *get_weights() override
+ {
+ return &_output;
+ }
+
+ void release() override
+ {
+ _output.allocator()->free();
+ }
+
+ uint32_t uid() override
+ {
+ return ((0x8) | (_bias_bit << 7));
+ }
+
+ bool is_reshape_run()
+ {
+ return _reshape_run;
+ }
+
+private:
+ Tensor _output{};
+ NEConvolutionLayerReshapeWeights _func{};
+ int32_t _bias_bit{ 0 };
+};
+} // namespace weights_transformations
+
/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
*
* -# @ref NEIm2ColKernel
@@ -92,7 +147,7 @@ class NEGEMMConvolutionLayer : public IFunction
{
public:
/** Constructor */
- NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+ NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
/** Default move constructor */
@@ -187,15 +242,17 @@ private:
static Status validate_gemm3d(const ITensorInfo *input_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);
private:
- MemoryGroup _memory_group;
- NEConvolutionLayerReshapeWeights _reshape_weights;
- NEIm2ColKernel _im2col_kernel;
- NEGEMM _mm_gemm;
- NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- NECol2ImKernel _col2im_kernel;
- NEActivationLayer _activationlayer_function;
- NEArithmeticAdditionKernel _add_bias_kernel;
- NEReshapeLayer _reshape_layer;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ NEConvolutionLayerReshapeWeights _reshape_weights;
+ weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
+ NEIm2ColKernel _im2col_kernel;
+ NEGEMM _mm_gemm;
+ NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ NECol2ImKernel _col2im_kernel;
+ NEActivationLayer _activationlayer_function;
+ NEArithmeticAdditionKernel _add_bias_kernel;
+ NEReshapeLayer _reshape_layer;
const ITensor *_original_weights;
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index ec394392de..978c445927 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -86,7 +86,7 @@ private:
NEGEMM _gemm_state_f;
NEArithmeticAdditionKernel _add_kernel;
NEActivationLayerKernel _activation_kernel;
- NEFullyConnectedLayer _fully_connected_kernel;
+ NEFullyConnectedLayer _fully_connected;
NECopyKernel _copy_kernel;
Tensor _fully_connected_out;
Tensor _gemm_output;
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
index ad89e1fbec..d3dda9a95f 100644
--- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
+++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
@@ -32,6 +32,7 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IScheduler.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
@@ -94,8 +95,8 @@ public:
class NEGEMMInterleavedWrapper : public IFunction
{
public:
- NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- ~NEGEMMInterleavedWrapper() = default;
+ NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+ ~NEGEMMInterleavedWrapper() = default;
NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete;
NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete;