Release 18.08

author: telsoa01 <telmo.soares@arm.com> 2018-08-31 09:22:23 +0100
committer: telsoa01 <telmo.soares@arm.com> 2018-08-31 09:22:23 +0100
commit: c577f2c6a3b4ddb6ba87a882723c53a248afbeba (patch)
tree: bd7d4c148df27f8be6649d313efb24f536b7cf34 /include/armnn
parent: 4c7098bfeab1ffe1cdc77f6c15548d3e73274746 (diff)
download: armnn-c577f2c6a3b4ddb6ba87a882723c53a248afbeba.tar.gz
15 files changed, 504 insertions, 210 deletions
diff --git a/include/armnn/ArmNN.hpp b/include/armnn/ArmNN.hpp
index d1cb7a8488..66697c428b 100644
--- a/include/armnn/ArmNN.hpp
+++ b/include/armnn/ArmNN.hpp
@@ -9,6 +9,7 @@
 #include "IRuntime.hpp"
 #include "INetwork.hpp"
 #include "LayerSupport.hpp"
+#include "LstmParams.hpp"
 #include "Tensor.hpp"
 #include "Types.hpp"
 #include "TypesUtils.hpp"
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 2595656c70..3cf152befe 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -95,8 +95,8 @@ private:
     uint32_t** m_ViewSizes;
 };
 
-// Convenience template to create a OriginsDescriptor to use when creating a Merger layer for performing concatenation
-// of a number of input tensors
+/// Convenience template to create an OriginsDescriptor to use when creating a Merger layer for performing concatenation
+/// of a number of input tensors
 template <typename TensorShapeIt>
 OriginsDescriptor CreateMergerDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last,
     unsigned int concatenationDimension)
@@ -301,7 +301,35 @@ struct ResizeBilinearDescriptor
 
 struct ReshapeDescriptor
 {
+    ReshapeDescriptor()
+    : m_TargetShape()
+    {}
+
+    ReshapeDescriptor(const TensorShape& shape)
+    : m_TargetShape(shape)
+    {}
+
     TensorShape m_TargetShape;
 };
 
+// temporary descriptor for Lstm
+struct LstmDescriptor
+{
+    LstmDescriptor()
+    : m_ActivationFunc(1) // 0: None, 1: Relu, 3: Relu6, 4: Tanh, 6: Sigmoid
+    , m_ClippingThresCell(0.0)
+    , m_ClippingThresProj(0.0)
+    , m_CifgEnabled(true)
+    , m_PeepholeEnabled(false)
+    , m_ProjectionEnabled(false)
+    {}
+
+    uint32_t m_ActivationFunc;
+    float m_ClippingThresCell;
+    float m_ClippingThresProj;
+    bool m_CifgEnabled;
+    bool m_PeepholeEnabled;
+    bool m_ProjectionEnabled;
+};
+
 }
diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
index 58b4bcc626..8c14614876 100644
--- a/include/armnn/DescriptorsFwd.hpp
+++ b/include/armnn/DescriptorsFwd.hpp
@@ -12,6 +12,7 @@ struct Convolution2dDescriptor;
 struct DepthwiseConvolution2dDescriptor;
 struct FakeQuantizationDescriptor;
 struct FullyConnectedDescriptor;
+struct LstmDescriptor;
 struct PermuteDescriptor;
 struct NormalizationDescriptor;
 struct Pooling2dDescriptor;
diff --git a/include/armnn/Exceptions.hpp b/include/armnn/Exceptions.hpp
index 630c77660d..403fc593b5 100644
--- a/include/armnn/Exceptions.hpp
+++ b/include/armnn/Exceptions.hpp
@@ -11,7 +11,38 @@
 namespace armnn
 {
 
-// base class for all ArmNN exceptions so that users can filter to just those
+struct CheckLocation
+{
+    const char* m_Function;
+    const char* m_File;
+    unsigned int m_Line;
+
+    CheckLocation(const char* func,
+                  const char* file,
+                  unsigned int line)
+    : m_Function{func}
+    , m_File{file}
+    , m_Line{line}
+    {
+    }
+
+    std::string AsString() const
+    {
+        std::stringstream ss;
+        ss << " at function " << m_Function
+           << " [" << m_File << ':' << m_Line << "]";
+        return ss.str();
+    }
+
+    std::string FileLine() const
+    {
+        std::stringstream ss;
+        ss << " [" << m_File << ':' << m_Line << "]";
+        return ss.str();
+    }
+};
+
+/// Base class for all ArmNN exceptions so that users can filter to just those.
 class Exception : public std::exception
 {
 public:
@@ -91,4 +122,6 @@ void ConditionalThrowIfNotEqual(const std::string& message,
     }
 }
 
-}
+} // namespace armnn
+
+#define CHECK_LOCATION() armnn::CheckLocation(__func__, __FILE__, __LINE__)
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 5cff810db5..cefcbfb06c 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -11,6 +11,7 @@
 #include "armnn/Types.hpp"
 
 #include <memory>
+#include <vector>
 
 namespace armnn
 {
@@ -25,7 +26,8 @@ public:
     virtual IOutputSlot* GetConnection() = 0;
 
 protected:
-    ~IInputSlot() {} /// Not user deletable
+   /// Not user deletable.
+    ~IInputSlot() {}
 };
 
 /// @brief An output connection slot for a layer.
@@ -45,7 +47,8 @@ public:
     virtual void Disconnect(IInputSlot& slot) = 0;
 
 protected:
-    ~IOutputSlot() {} /// Not user deletable
+    /// Not user deletable.
+    ~IOutputSlot() {}
 };
 
 /// @brief Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
@@ -63,9 +66,12 @@ public:
     virtual const IOutputSlot& GetOutputSlot(unsigned int index) const = 0;
     virtual IOutputSlot& GetOutputSlot(unsigned int index) = 0;
 
+    virtual std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const = 0;
+
     virtual LayerGuid GetGuid() const = 0;
 protected:
-    ~IConnectableLayer() {} // Objects are not deletable via the handle
+      /// Objects are not deletable via the handle
+    ~IConnectableLayer() {}
 };
 
 using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>;
@@ -81,19 +87,19 @@ public:
 
     virtual Status PrintGraph() = 0;
 
-    /// Add an input layer to the network.
-    /// @param id User generated id to uniquely identify a particular input. The same id needs to be specified
+    /// Adds an input layer to the network.
+    /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified.
     /// when passing the inputs to the IRuntime::EnqueueWorkload() function.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name = nullptr) = 0;
 
-    /// Add a 2D convolution layer to the network.
-    /// @param convolution2dDescriptor Description of the 2D convolution layer
-    /// @param weights Tensor for the weights data.
-    /// @param biases (Optional) Tensor for the bias data. Must match the output tensor shape.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a 2D convolution layer to the network.
+    /// @param convolution2dDescriptor - Description of the 2D convolution layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - (Optional) Tensor for the bias data. Must match the output tensor shape.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
         const ConstTensor& weights,
         const char* name = nullptr) = 0;
@@ -103,12 +109,12 @@ public:
         const ConstTensor& biases,
         const char* name = nullptr) = 0;
 
-    /// Add a 2D depthwise convolution layer to the network.
-    /// @param convolution2dDescriptor Description of the 2D depthwise convolution layer
-    /// @param weights Tensor for the weights data. Expected format: [1, outputChannels, height, width]
-    /// @param biases (Optional) Tensor for the bias data. Must match the output tensor shape.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a 2D depthwise convolution layer to the network.
+    /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
+    /// @param weights - Tensor for the weights data. Expected format: [1, outputChannels, height, width].
+    /// @param biases (Optional) - Tensor for the bias data. Must match the output tensor shape.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddDepthwiseConvolution2dLayer(
         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
         const ConstTensor& weights,
@@ -120,12 +126,12 @@ public:
         const ConstTensor& biases,
         const char* name = nullptr) = 0;
 
-    /// Add a fully connected layer to the network.
-    /// @param fullyConnectedDescriptor Description of the fully connected layer
-    /// @param weights Tensor for the weights data.
-    /// @param biases (Optional) Tensor for the bias data.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a fully connected layer to the network.
+    /// @param fullyConnectedDescriptor - Description of the fully connected layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - (Optional) Tensor for the bias data.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
         const ConstTensor& weights,
         const char* name = nullptr) = 0;
@@ -135,76 +141,77 @@ public:
         const ConstTensor& biases,
         const char* name = nullptr) = 0;
 
-    /// Add a permute layer to the network.
-    /// @param permuteDescriptor PermuteDescriptor to configure the permute
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a permute layer to the network.
+    /// @param permuteDescriptor - PermuteDescriptor to configure the permute.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
                                                const char* name = nullptr) = 0;
 
-    /// Add a pooling layer to the network.
-    /// @param pooling2dDescriptor Pooling2dDescriptor to configure the pooling
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a pooling layer to the network.
+    /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
         const char* name = nullptr) = 0;
 
-    /// Add an activation layer to the network.
-    /// @param activationDescriptor ActivationDescriptor to configure the activation
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds an activation layer to the network.
+    /// @param activationDescriptor - ActivationDescriptor to configure the activation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor,
         const char* name = nullptr) = 0;
 
-    /// Add a normalization layer to the network.
-    /// @param normalizationDescriptor NormalizationDescriptor to configure the normalization
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a normalization layer to the network.
+    /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor,
         const char* name = nullptr) = 0;
 
-    /// Add a softmax layer to the network.
-    /// @param softmaxDescriptor SoftmaxDescriptor to configure the softmax
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a softmax layer to the network.
+    /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
         const char* name = nullptr) = 0;
 
-    /// Add a splitter layer to the network.
-    /// @param splitterDescriptor WindowsDescriptor to configure the splitting process. Number of Views must be equal to
-    ///                           the number of outputs, and their order must match - e.g. first view corresponds to
-    ///                           the first output, second view to the second output, etc....
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a splitter layer to the network.
+    /// @param splitterDescriptor - WindowsDescriptor to configure the splitting process.
+    ///                             Number of Views must be equal to the number of outputs,
+    ///                             and their order must match - e.g. first view corresponds to
+    ///                             the first output, second view to the second output, etc....
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor
         , const char* name = nullptr) = 0;
 
-    /// Add a merger layer to the network.
-    /// @param mergerDescriptor WindowsDescriptor to configure the merging process. Number of Views must be equal to
+    /// Adds a merger layer to the network.
+    /// @param mergerDescriptor - WindowsDescriptor to configure the merging process. Number of Views must be equal to
     ///                           the number of inputs, and their order must match - e.g. first view corresponds to
     ///                           the first input, second view to the second input, etc....
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddMergerLayer(const OriginsDescriptor& mergerDescriptor,
         const char* name = nullptr) = 0;
 
-    /// Add an addition layer to the network.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds an addition layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddAdditionLayer(const char* name = nullptr) = 0;
 
-    /// Add a multiplication layer to the network.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a multiplication layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr) = 0;
 
-    /// Add a batch normalization layer to the network.
-    /// @param mean Pre-calculated mean for each channel
-    /// @param variance Pre-calculated variance for each channel
-    /// @param beta Per-channel additive factor
-    /// @param gamma Per-channel multiplicative factor
-    /// @return Interface for configuring the layer.
-    /// @param name Optional name for the layer
+    /// Adds a batch normalization layer to the network.
+    /// @param mean - Pre-calculated mean for each channel.
+    /// @param variance - Pre-calculated variance for each channel.
+    /// @param beta - Per-channel additive factor.
+    /// @param gamma - Per-channel multiplicative factor.
+    /// @return - Interface for configuring the layer.
+    /// @param name - Optional name for the layer.
     virtual IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
         const ConstTensor& mean,
         const ConstTensor& variance,
@@ -212,47 +219,55 @@ public:
         const ConstTensor& gamma,
         const char* name = nullptr) = 0;
 
-    /// Add a resize bilinear layer to the network.
-    /// @param resizeDesc Parameters for the resize operation
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer
+    /// Adds a resize bilinear layer to the network.
+    /// @param resizeDesc - Parameters for the resize operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDesc,
                                                       const char* name = nullptr) = 0;
 
-    /// Add an L2 normalization layer to the network.
+    /// Adds an L2 normalization layer to the network.
     /// Normalization is performed along dimension 1, but requires a 4d input.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddL2NormalizationLayer(const char* name = nullptr) = 0;
 
     /// Adds a layer with no inputs and a single output, which always corresponds to
     /// the passed in constant tensor.
-    /// @param input Tensor to be provided as the only output of the layer. The layer will maintain its own copy of the
-    ///        tensor data, meaning the memory referenced by @a input can be freed or reused after this function is
-    ///        called.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer
+    /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain
+    ///                its own copy of the tensor data, meaning the memory referenced by @a input can
+    ///                be freed or reused after this function is called.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddConstantLayer(const ConstTensor& input,
         const char* name = nullptr) = 0;
 
-    /// Add a reshape layer to the network.
-    /// @param reshapeDescriptor Parameters for the reshape operation
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a reshape layer to the network.
+    /// @param reshapeDescriptor - Parameters for the reshape operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
                                                const char* name = nullptr) = 0;
 
-    /// Add a floor layer to the network.
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// Adds a floor layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddFloorLayer(const char* name = nullptr) = 0;
 
-    /// Add an output layer to the network.
-    /// @param id User generated id to uniquely identify a particular output. The same id needs to be specified
+    /// Adds an output layer to the network.
+    /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified
     /// when passing the outputs to the IRuntime::EnqueueWorkload() function.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    virtual IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr) = 0;
+
+    /// Add a Lstm layer to the network
+    /// @param descriptor Parameters for the Lstm operation
     /// @param name Optional name for the layer
     /// @return Interface for configuring the layer.
-    virtual IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr) = 0;
+    virtual IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor,
+                                            const LstmInputParams& params,
+                                            const char* name = nullptr) = 0;
 
 protected:
     ~INetwork() {}
@@ -268,16 +283,34 @@ public:
     virtual Status PrintGraph() = 0;
     virtual Status SerializeToDot(std::ostream& stream) const = 0;
 
+
 protected:
     ~IOptimizedNetwork() {}
 };
 
+struct OptimizerOptions
+{
+    OptimizerOptions() : m_ReduceFp32ToFp16(false) {}
+
+    OptimizerOptions(bool reduceFp32ToFp16)
+        : m_ReduceFp32ToFp16(reduceFp32ToFp16)
+    {
+    }
+
+    // Reduce Fp32 data to Fp16 for faster processing
+    bool m_ReduceFp32ToFp16;
+};
 
 /// Create an optimized version of the network
 /// @param network INetwork description of the network to be optimized.
-/// @param deviceSpec The choice of the default computation backend.
+/// @param backendPreferences The choice of the backend ordered by user preferences.
+/// @param deviceSpec DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
+/// @param options OptimizerOptions object with optimizer configuration options
 /// @return An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from
 /// armnn::Exception if process fails.
-IOptimizedNetworkPtr Optimize(const INetwork& network, const DeviceSpec& deviceSpec);
 
+IOptimizedNetworkPtr Optimize(const INetwork& network,
+                              const std::vector<armnn::Compute>& backendPreferences,
+                              const IDeviceSpec& deviceSpec,
+                              const OptimizerOptions& options = OptimizerOptions());
 } //namespace armnn
diff --git a/include/armnn/IProfiler.hpp b/include/armnn/IProfiler.hpp
new file mode 100644
index 0000000000..a28173e5e1
--- /dev/null
+++ b/include/armnn/IProfiler.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <iostream>
+
+namespace armnn
+{
+
+class IProfiler
+{
+public:
+    /// Enables/disables profiling for this profiler.
+    /// @param [in] enableProfiling A flag that indicates whether profiling should be enabled or not.
+    virtual void EnableProfiling(bool enableProfiling) = 0;
+
+    /// Checks whether profiling is enabled.
+    /// Profiling is disabled by default.
+    /// @return true if profiling is enabled, false otherwise.
+    virtual bool IsProfilingEnabled() = 0;
+
+    /// Analyzes the tracked events and writes the results to the given output stream.
+    /// Please refer to the configuration variables in Profiling.cpp to customize the information written.
+    /// @param [out] outStream The stream where to write the profiling results to.
+    virtual void AnalyzeEventsAndWriteResults(std::ostream& outStream) const = 0;
+
+    /// Print stats for events in JSON Format to the given output stream.
+    /// @param [out] outStream The stream where to write the profiling results to.
+    virtual void Print(std::ostream& outStream) const = 0;
+
+protected:
+    ~IProfiler() {}
+};
+
+} // namespace armnn
diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp
index a1a3f0fda9..36efdbdcab 100644
--- a/include/armnn/IRuntime.hpp
+++ b/include/armnn/IRuntime.hpp
@@ -9,6 +9,7 @@
 #include "Types.hpp"
 #include "Tensor.hpp"
 #include "INetwork.hpp"
+#include "IProfiler.hpp"
 #include "TypesUtils.hpp"
 
 namespace armnn
@@ -16,7 +17,7 @@ namespace armnn
 
 using NetworkId = int;
 
-class IClTunedParameters;
+class IGpuAccTunedParameters;
 
 class IRuntime;
 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
@@ -26,66 +27,80 @@ class IRuntime
 public:
     struct CreationOptions
     {
-        Compute m_DefaultComputeDevice;
-        bool m_UseCpuRefAsFallback;
-        /// If set, uses the CL tuned parameters from the given object when executing CL workloads.
+        CreationOptions()
+            : m_GpuAccTunedParameters(nullptr)
+            , m_EnableGpuProfiling(false)
+        {}
+
+        /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
         /// It will also be updated with new tuned parameters if it is configured to do so.
-        IClTunedParameters* m_ClTunedParameters;
-
-        CreationOptions(Compute defaultComputeDevice)
-            : m_DefaultComputeDevice(defaultComputeDevice)
-            , m_UseCpuRefAsFallback(true)
-            , m_ClTunedParameters(nullptr)
-        {
-        }
+        std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
+
+        // Setting this flag will allow the user to obtain GPU profiling information from the runtime.
+        bool m_EnableGpuProfiling;
     };
 
     static IRuntime* CreateRaw(const CreationOptions& options);
     static IRuntimePtr Create(const CreationOptions& options);
     static void Destroy(IRuntime* runtime);
 
+    /// Loads a complete network into the IRuntime.
+    /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
+    /// @param [in] network - Complete network to load into the IRuntime.
+    /// The runtime takes ownership of the network once passed in.
+    /// @return armnn::Status
+    virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network) = 0;
+
     /// Load a complete network into the IRuntime.
     /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
     /// @param [in] network Complete network to load into the IRuntime.
+    /// @param [out] errorMessage Error message if there were any errors.
     /// The runtime takes ownership of the network once passed in.
     /// @return armnn::Status
-    virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network) = 0;
+    virtual Status LoadNetwork(NetworkId& networkIdOut,
+                               IOptimizedNetworkPtr network,
+                               std::string & errorMessage) = 0;
 
     virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0;
     virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0;
 
-    // Evaluate network using input in inputTensors, outputs filled into outputTensors
+    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
     virtual Status EnqueueWorkload(NetworkId networkId,
-                           const InputTensors& inputTensors,
-                           const OutputTensors& outputTensors) = 0;
+                                   const InputTensors& inputTensors,
+                                   const OutputTensors& outputTensors) = 0;
 
-    /// Unload a network from the IRuntime.
+    /// Unloads a network from the IRuntime.
     /// At the moment this only removes the network from the m_Impl->m_Network.
     /// This might need more work in the future to be AndroidNN compliant.
-    /// @param [in] networkId Unique identifier for the network to be unloaded. Generated in LoadNetwork().
+    /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
     /// @return armnn::Status
     virtual Status UnloadNetwork(NetworkId networkId) = 0;
 
-    virtual const DeviceSpec& GetDeviceSpec() const = 0;
+    virtual const IDeviceSpec& GetDeviceSpec() const = 0;
+
+    /// Gets the profiler corresponding to the given network id.
+    /// @param networkId The id of the network for which to get the profile.
+    /// @return A pointer to the requested profiler, or nullptr if not found.
+    virtual const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const = 0;
 
 protected:
     ~IRuntime() {}
 };
 
-using IClTunedParametersPtr = std::unique_ptr<IClTunedParameters, void(*)(IClTunedParameters* params)>;
+using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
 
-/// Manages a set of Open CL parameters which have been tuned for maximum performance.
-/// Pass an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
-/// for all CL workload execution.
+/// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
+/// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
+/// for all GPU workload execution.
 ///
 /// Can be created in two modes:
-///     - In UseTunedParameters mode the parameters stored in this object are used to execute CL workloads.
-///     - In UpdateTunedParameters mode, additionally, whenever a CL workload is executed for the first time the
+///     - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
+///     - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
 ///       optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
 ///
-/// The parameters can be loaded from and saved to a file so that you first run a slow initial read-write
+/// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
-class IClTunedParameters
+class IGpuAccTunedParameters
 {
 public:
     enum class Mode
@@ -96,10 +111,10 @@ public:
 
     /// Creates an IClTunedParameters with the given mode.
     /// @{
-    static IClTunedParameters* CreateRaw(Mode mode);
-    static IClTunedParametersPtr Create(Mode mode);
+    static IGpuAccTunedParameters* CreateRaw(Mode mode);
+    static IGpuAccTunedParametersPtr Create(Mode mode);
     /// @}
-    static void Destroy(IClTunedParameters* params);
+    static void Destroy(IGpuAccTunedParameters* params);
 
     /// Loads an existing set of tuned parameters from the given file.
     /// If there is an error loading the file, an armnn::Exception is thrown.
@@ -110,7 +125,7 @@ public:
     virtual void Save(const char* filename) const = 0;
 
 protected:
-    virtual ~IClTunedParameters() {};
+    virtual ~IGpuAccTunedParameters() {};
 };
 
 }
diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp
index 43a5756e4a..c875619949 100644
--- a/include/armnn/LayerSupport.hpp
+++ b/include/armnn/LayerSupport.hpp
@@ -13,6 +13,7 @@ namespace armnn
 
 bool IsActivationSupported(Compute compute,
                            const TensorInfo& input,
+                           const TensorInfo& output,
                            const ActivationDescriptor& descriptor,
                            char* reasonIfUnsupported = nullptr,
                            size_t reasonIfUnsupportedMaxLength = 1024);
@@ -26,6 +27,11 @@ bool IsAdditionSupported(Compute compute,
 
 bool IsBatchNormalizationSupported(Compute compute,
                                    const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const TensorInfo& mean,
+                                   const TensorInfo& var,
+                                   const TensorInfo& beta,
+                                   const TensorInfo& gamma,
                                    const BatchNormalizationDescriptor& descriptor,
                                    char* reasonIfUnsupported = nullptr,
                                    size_t reasonIfUnsupportedMaxLength = 1024);
@@ -35,6 +41,18 @@ bool IsConstantSupported(Compute compute,
                          char* reasonIfUnsupported = nullptr,
                          size_t reasonIfUnsupportedMaxLength = 1024);
 
+bool IsConvertFp16ToFp32Supported(Compute compute,
+                                  const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  char* reasonIfUnsupported = nullptr,
+                                  size_t reasonIfUnsupportedMaxLength = 1024);
+
+bool IsConvertFp32ToFp16Supported(Compute compute,
+                                  const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  char* reasonIfUnsupported = nullptr,
+                                  size_t reasonIfUnsupportedMaxLength = 1024);
+
 bool IsConvolution2dSupported(Compute compute,
                               const TensorInfo& input,
                               const TensorInfo& output,
@@ -46,8 +64,10 @@ bool IsConvolution2dSupported(Compute compute,
 
 bool IsDepthwiseConvolutionSupported(Compute compute,
                                      const TensorInfo& input,
+                                     const TensorInfo& output,
                                      const DepthwiseConvolution2dDescriptor& descriptor,
                                      const TensorInfo& weights,
+                                     const TensorInfo& biases,
                                      char* reasonIfUnsupported = nullptr,
                                      size_t reasonIfUnsupportedMaxLength = 1024);
 
@@ -57,16 +77,35 @@ bool IsInputSupported(Compute compute,
                       size_t reasonIfUnsupportedMaxLength = 1024);
 
 bool IsFullyConnectedSupported(Compute compute,
-                               const TensorInfo& input,const
-                               FullyConnectedDescriptor& descriptor,
+                               const TensorInfo& input,
+                               const TensorInfo& output,
+                               const TensorInfo& weights,
+                               const TensorInfo& biases,
+                               const FullyConnectedDescriptor& descriptor,
                                char* reasonIfUnsupported = nullptr,
                                size_t reasonIfUnsupportedMaxLength = 1024);
 
 bool IsL2NormalizationSupported(Compute compute,
                                 const TensorInfo& input,
+                                const TensorInfo& output,
                                 char* reasonIfUnsupported = nullptr,
                                 size_t reasonIfUnsupportedMaxLength = 1024);
 
+bool IsLstmSupported(Compute compute, const TensorInfo& input, const TensorInfo& outputStateIn,
+                     const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+                     const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+                     const TensorInfo& output, const LstmDescriptor& descriptor,
+                     const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights,
+                     const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights,
+                     const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights,
+                     const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
+                     const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights,
+                     const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights,
+                     const TensorInfo* inputGateBias, const TensorInfo* projectionWeights,
+                     const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights,
+                     const TensorInfo* cellToOutputWeights, char* reasonIfUnsupported = nullptr,
+                     size_t reasonIfUnsupportedMaxLength = 1024);
+
 bool IsMergerSupported(Compute compute,
                        const std::vector<const TensorInfo*> inputs,
                        const OriginsDescriptor& descriptor,
@@ -76,6 +115,7 @@ bool IsMergerSupported(Compute compute,
 bool IsMultiplicationSupported(Compute compute,
                                const TensorInfo& input0,
                                const TensorInfo& input1,
+                               const TensorInfo& output,
                                char* reasonIfUnsupported = nullptr,
                                size_t reasonIfUnsupportedMaxLength = 1024);
 
@@ -112,6 +152,7 @@ bool IsResizeBilinearSupported(Compute compute,
 
 bool IsSoftmaxSupported(Compute compute,
                         const TensorInfo& input,
+                        const TensorInfo& output,
                         const SoftmaxDescriptor& descriptor,
                         char* reasonIfUnsupported = nullptr,
                         size_t reasonIfUnsupportedMaxLength = 1024);
diff --git a/include/armnn/LstmParams.hpp b/include/armnn/LstmParams.hpp
new file mode 100644
index 0000000000..cfca0df5bb
--- /dev/null
+++ b/include/armnn/LstmParams.hpp
@@ -0,0 +1,55 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "TensorFwd.hpp"
+
+namespace armnn
+{
+
+struct LstmInputParams
+{
+    LstmInputParams()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+        , m_CellToInputWeights(nullptr)
+        , m_CellToForgetWeights(nullptr)
+        , m_CellToOutputWeights(nullptr)
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+        , m_ProjectionWeights(nullptr)
+        , m_ProjectionBias(nullptr)
+    {
+    }
+
+    const ConstTensor* m_InputToInputWeights;
+    const ConstTensor* m_InputToForgetWeights;
+    const ConstTensor* m_InputToCellWeights;
+    const ConstTensor* m_InputToOutputWeights;
+    const ConstTensor* m_RecurrentToInputWeights;
+    const ConstTensor* m_RecurrentToForgetWeights;
+    const ConstTensor* m_RecurrentToCellWeights;
+    const ConstTensor* m_RecurrentToOutputWeights;
+    const ConstTensor* m_CellToInputWeights;
+    const ConstTensor* m_CellToForgetWeights;
+    const ConstTensor* m_CellToOutputWeights;
+    const ConstTensor* m_InputGateBias;
+    const ConstTensor* m_ForgetGateBias;
+    const ConstTensor* m_CellBias;
+    const ConstTensor* m_OutputGateBias;
+    const ConstTensor* m_ProjectionWeights;
+    const ConstTensor* m_ProjectionBias;
+};
+
+} // namespace armnn
+
diff --git a/include/armnn/NetworkFwd.hpp b/include/armnn/NetworkFwd.hpp
index 75667fdfd0..56aedaf8d4 100644
--- a/include/armnn/NetworkFwd.hpp
+++ b/include/armnn/NetworkFwd.hpp
@@ -6,6 +6,7 @@
 
 namespace armnn
 {
+struct LstmInputParams;
 class INetwork;
 class IOptimizedNetwork;
 class Graph;
@@ -13,4 +14,4 @@ class IInputSlot;
 class IOutputSlot;
 class IConnectableLayer;
 class IDataLayer;
-}
-\ No newline at end of file
+}
diff --git a/include/armnn/Tensor.hpp b/include/armnn/Tensor.hpp
index 910278f33f..718dd817c5 100644
--- a/include/armnn/Tensor.hpp
+++ b/include/armnn/Tensor.hpp
@@ -18,7 +18,7 @@ namespace armnn
 class TensorShape
 {
 public:
-    /// Empty (invalid) constructor
+    /// Empty (invalid) constructor.
     TensorShape();
 
     TensorShape(unsigned int numDimensions, const unsigned int* dimensionSizes);
@@ -53,7 +53,7 @@ private:
 class TensorInfo
 {
 public:
-    /// Empty (invalid) constructor
+    /// Empty (invalid) constructor.
     TensorInfo();
 
     TensorInfo(const TensorShape& shape, DataType dataType,
@@ -88,7 +88,7 @@ public:
 private:
     TensorShape m_Shape;
     DataType m_DataType;
-    /// Scale and offset values used for quantization
+    /// Scale and offset values are used for quantization.
     struct Quantization
     {
         Quantization() : m_Scale(0.f), m_Offset(0) {}
@@ -102,11 +102,11 @@ template<typename MemoryType>
 class BaseTensor
 {
 public:
-    /// Empty (invalid) constructor
+    /// Empty (invalid) constructor.
     BaseTensor();
 
     /// Constructor from a raw memory pointer.
-    /// @param memoryArea Region of CPU-addressable memory where tensor data will be stored. Must be valid while
+    /// @param memoryArea - Region of CPU-addressable memory where tensor data will be stored. Must be valid while
     /// workloads are on the fly. Tensor instances do not claim ownership of referenced memory regions, that is,
     /// no attempt will be made by ArmNN to free these memory regions automatically.
     BaseTensor(const TensorInfo& info, MemoryType memoryArea);
@@ -130,7 +130,7 @@ public:
     MemoryType GetMemoryArea() const { return m_MemoryArea; }
 
 protected:
-    // protected destructor to stop users from making these
+    // Protected destructor to stop users from making these
     // (could still new one on the heap and then leak it...)
     ~BaseTensor() {}
 
@@ -144,21 +144,23 @@ private:
 class Tensor : public BaseTensor<void*>
 {
 public:
-    using BaseTensor<void*>::BaseTensor; // Bring in the constructors and assignment operator
+    /// Brings in the constructors and assignment operator.
+    using BaseTensor<void*>::BaseTensor; 
 };
 
 /// A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
 class ConstTensor : public BaseTensor<const void*>
 {
 public:
-    using BaseTensor<const void*>::BaseTensor; // Bring in the constructors and assignment operator
+    /// Brings in the constructors and assignment operator.
+    using BaseTensor<const void*>::BaseTensor; 
     ConstTensor() : BaseTensor<const void*>() {} // This needs to be redefined explicitly??
 
-    // Can be implicitly constructed from non-const Tensor
+    /// Can be implicitly constructed from non-const Tensor.
     ConstTensor(const Tensor& other) : BaseTensor<const void*>(other.GetInfo(), other.GetMemoryArea()) {}
 
     /// Constructor from a backing container.
-    /// @param container An stl-like container type which implements data() and size() methods.
+    /// @param container - An stl-like container type which implements data() and size() methods.
     /// Presence of data() and size() is a strong indicator of the continuous memory layout of the container,
     /// which is a requirement for Tensor data. Tensor instances do not claim ownership of referenced memory regions,
     /// that is, no attempt will be made by ArmNN to free these memory regions automatically.
diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp
index c9a4bf13e5..fe1fcb45d2 100644
--- a/include/armnn/Types.hpp
+++ b/include/armnn/Types.hpp
@@ -22,9 +22,10 @@ enum class Status
 
 enum class DataType
 {
-    Float32   = 0,
-    QuantisedAsymm8 = 1,
-    Signed32  = 2
+    Float16 = 0,
+    Float32   = 1,
+    QuantisedAsymm8 = 2,
+    Signed32  = 3
 };
 
 enum class ActivationFunction
@@ -33,7 +34,7 @@ enum class ActivationFunction
     TanH        = 1,
     Linear      = 2,
     ReLu        = 3,
-    BoundedReLu = 4, //< min(a, max(b, input))
+    BoundedReLu = 4, ///< min(a, max(b, input))
     SoftReLu    = 5,
     LeakyReLu   = 6,
     Abs         = 7,
@@ -51,16 +52,18 @@ enum class PoolingAlgorithm
 ///
 /// The padding method modifies the output of pooling layers.
 /// In both supported methods, the values are ignored (they are
-/// not even zeros which would make a difference for max pooling
+/// not even zeroes, which would make a difference for max pooling
 /// a tensor with negative values). The difference between
-/// IgnoreValue and Exclude is that the former count the padding
+/// IgnoreValue and Exclude is that the former counts the padding
 /// fields in the divisor of Average and L2 pooling, while
 /// Exclude does not.
 ///
 enum class PaddingMethod
 {
-    IgnoreValue = 0, // The padding fields count, but ignored
-    Exclude     = 1  // The padding fields don't count and ignored
+    /// The padding fields count, but are ignored
+    IgnoreValue = 0, 
+    /// The padding fields don't count and are ignored
+    Exclude     = 1  
 };
 
 enum class NormalizationAlgorithmChannel
@@ -71,8 +74,10 @@ enum class NormalizationAlgorithmChannel
 
 enum class NormalizationAlgorithmMethod
 {
-    LocalBrightness = 0, /* Krichevsky 2012: Local Brightness Normalization */
-    LocalContrast = 1  /* Jarret 2009: Local Contrast Normalization       */
+    /// Krichevsky 2012: Local Brightness Normalization 
+    LocalBrightness = 0, 
+    /// Jarret 2009: Local Contrast Normalization       
+    LocalContrast = 1
 };
 
 enum class OutputShapeRounding
@@ -83,15 +88,20 @@ enum class OutputShapeRounding
 
 enum class Compute
 {
-    CpuRef      = 0,  // CPU Execution: Reference C++ kernels
-    CpuAcc      = 1,  // CPU Execution: NEON: ArmCompute
-    GpuAcc      = 2,  // GPU Execution: OpenCL: ArmCompute
+    /// CPU Execution: Reference C++ kernels
+    CpuRef      = 0,  
+    /// CPU Execution: NEON: ArmCompute
+    CpuAcc      = 1,  
+    /// GPU Execution: OpenCL: ArmCompute
+    GpuAcc      = 2, 
     Undefined   = 5
 };
 
-struct DeviceSpec
+class IDeviceSpec
 {
-    Compute DefaultComputeDevice;
+protected:
+    IDeviceSpec() {};
+    virtual ~IDeviceSpec() {};
 };
 
 /// Type of identifiers for bindable layers (inputs, outputs).
@@ -105,10 +115,10 @@ public:
     using ArrayType = std::array<ValueType, MaxNumOfTensorDimensions>;
     using ConstIterator = typename ArrayType::const_iterator;
 
-    /// @param dimMappings Indicates how to translate tensor elements from a given source into the target destination,
+    /// @param dimMappings - Indicates how to translate tensor elements from a given source into the target destination,
     /// when source and target potentially have different memory layouts.
     ///
-    /// E.g. For a 4-d tensor laid out in memory with format (Batch Element, Height, Width, Channels),
+    /// E.g. For a 4-d tensor laid out in a memory with the format (Batch Element, Height, Width, Channels),
     /// which is to be passed as an input to ArmNN, each source dimension is mapped to the corresponding
     /// ArmNN dimension. The Batch dimension remains the same (0 -> 0). The source Height dimension is mapped
     /// to the location of the ArmNN Height dimension (1 -> 2). Similar arguments are made for the Width and
@@ -152,7 +162,7 @@ private:
     SizeType m_NumDimMappings;
 };
 
-// Define LayerGuid type.
+/// Define LayerGuid type.
 using LayerGuid = unsigned int;
 
 }
diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
index c63b653ae3..3077ce111f 100644
--- a/include/armnn/TypesUtils.hpp
+++ b/include/armnn/TypesUtils.hpp
@@ -10,6 +10,7 @@
 #include <ostream>
 #include <boost/assert.hpp>
 #include <boost/numeric/conversion/cast.hpp>
+#include <set>
 
 namespace armnn
 {
@@ -89,8 +90,9 @@ constexpr unsigned int GetDataTypeSize(DataType dataType)
 {
     switch (dataType)
     {
-        case DataType::Signed32:
-        case DataType::Float32:   return 4U;
+        case DataType::Float16:     return 2U;
+        case DataType::Float32:
+        case DataType::Signed32:   return 4U;
         case DataType::QuantisedAsymm8: return 1U;
         default:                  return 0U;
     }
@@ -107,17 +109,17 @@ constexpr bool StrEqual(const char* strA, const char (&strB)[N])
     return isEqual;
 }
 
-constexpr Compute ParseComputeDevice(const char* str)
+constexpr armnn::Compute ParseComputeDevice(const char* str)
 {
-    if (StrEqual(str, "CpuAcc"))
+    if (armnn::StrEqual(str, "CpuAcc"))
     {
         return armnn::Compute::CpuAcc;
     }
-    else if (StrEqual(str, "CpuRef"))
+    else if (armnn::StrEqual(str, "CpuRef"))
     {
         return armnn::Compute::CpuRef;
     }
-    else if (StrEqual(str, "GpuAcc"))
+    else if (armnn::StrEqual(str, "GpuAcc"))
     {
         return armnn::Compute::GpuAcc;
     }
@@ -131,59 +133,60 @@ constexpr const char* GetDataTypeName(DataType dataType)
 {
     switch (dataType)
     {
-        case DataType::Float32:   return "Float32";
+        case DataType::Float16:         return "Float16";
+        case DataType::Float32:         return "Float32";
         case DataType::QuantisedAsymm8: return "Unsigned8";
-        case DataType::Signed32:  return "Signed32";
-        default:                  return "Unknown";
+        case DataType::Signed32:        return "Signed32";
+
+        default:
+            return "Unknown";
     }
 }
 
-template <typename T>
-constexpr DataType GetDataType();
-
-template <>
-constexpr DataType GetDataType<float>()
-{
-    return DataType::Float32;
-}
 
-template <>
-constexpr DataType GetDataType<uint8_t>()
-{
-    return DataType::QuantisedAsymm8;
-}
+template<typename T>
+struct IsHalfType
+    : std::integral_constant<bool, std::is_floating_point<T>::value && sizeof(T) == 2>
+{};
 
-template <>
-constexpr DataType GetDataType<int32_t>()
-{
-    return DataType::Signed32;
-}
+template<typename T, typename U=T>
+struct GetDataTypeImpl;
 
 template<typename T>
-constexpr bool IsQuantizedType()
+struct GetDataTypeImpl<T, typename std::enable_if_t<IsHalfType<T>::value, T>>
 {
-    return std::is_integral<T>::value;
-}
-
+    static constexpr DataType Value = DataType::Float16;
+};
 
-template<DataType DT>
-struct ResolveTypeImpl;
+template<>
+struct GetDataTypeImpl<float>
+{
+    static constexpr DataType Value = DataType::Float32;
+};
 
 template<>
-struct ResolveTypeImpl<DataType::QuantisedAsymm8>
+struct GetDataTypeImpl<uint8_t>
 {
-    using Type = uint8_t;
+    static constexpr DataType Value = DataType::QuantisedAsymm8;
 };
 
 template<>
-struct ResolveTypeImpl<DataType::Float32>
+struct GetDataTypeImpl<int32_t>
 {
-    using Type = float;
+    static constexpr DataType Value = DataType::Signed32;
 };
 
-template<DataType DT>
-using ResolveType = typename ResolveTypeImpl<DT>::Type;
+template <typename T>
+constexpr DataType GetDataType()
+{
+    return GetDataTypeImpl<T>::Value;
+}
 
+template<typename T>
+constexpr bool IsQuantizedType()
+{
+    return std::is_integral<T>::value;
+}
 
 inline std::ostream& operator<<(std::ostream& os, Status stat)
 {
@@ -191,7 +194,23 @@ inline std::ostream& operator<<(std::ostream& os, Status stat)
     return os;
 }
 
-inline std::ostream& operator<<(std::ostream& os, Compute compute)
+inline std::ostream& operator<<(std::ostream& os, const std::vector<Compute>& compute)
+{
+    for (const Compute& comp : compute) {
+        os << GetComputeDeviceAsCString(comp) << " ";
+    }
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const std::set<Compute>& compute)
+{
+    for (const Compute& comp : compute) {
+        os << GetComputeDeviceAsCString(comp) << " ";
+    }
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Compute& compute)
 {
     os << GetComputeDeviceAsCString(compute);
     return os;
@@ -212,11 +231,11 @@ inline std::ostream & operator<<(std::ostream & os, const armnn::TensorShape & s
     return os;
 }
 
-/// Quantize a floating point data type into an 8-bit data type
-/// @param value The value to quantize
-/// @param scale The scale (must be non-zero)
-/// @param offset The offset
-/// @return The quantized value calculated as round(value/scale)+offset
+/// Quantize a floating point data type into an 8-bit data type.
+/// @param value - The value to quantize.
+/// @param scale - The scale (must be non-zero).
+/// @param offset - The offset.
+/// @return - The quantized value calculated as round(value/scale)+offset.
 ///
 template<typename QuantizedType>
 inline QuantizedType Quantize(float value, float scale, int32_t offset)
@@ -234,11 +253,11 @@ inline QuantizedType Quantize(float value, float scale, int32_t offset)
     return quantizedBits;
 }
 
-/// Dequantize an 8-bit data type into a floating point data type
-/// @param value The value to dequantize
-/// @param scale The scale (must be non-zero)
-/// @param offset The offset
-/// @return The dequantized value calculated as (value-offset)*scale
+/// Dequantize an 8-bit data type into a floating point data type.
+/// @param value - The value to dequantize.
+/// @param scale - The scale (must be non-zero).
+/// @param offset - The offset.
+/// @return - The dequantized value calculated as (value-offset)*scale.
 ///
 template <typename QuantizedType>
 inline float Dequantize(QuantizedType value, float scale, int32_t offset)
@@ -249,4 +268,18 @@ inline float Dequantize(QuantizedType value, float scale, int32_t offset)
     return dequantized;
 }
 
+template <typename DataType>
+void VerifyTensorInfoDataType(const armnn::TensorInfo & info)
+{
+    auto expectedType = armnn::GetDataType<DataType>();
+    if (info.GetDataType() != expectedType)
+    {
+        std::stringstream ss;
+        ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
+            << " for tensor:" << info.GetShape()
+            << ". The type expected to be: " << armnn::GetDataTypeName(expectedType);
+        throw armnn::Exception(ss.str());
+    }
+}
+
 } //namespace armnn
diff --git a/include/armnn/Utils.hpp b/include/armnn/Utils.hpp
index 1a0c34baad..4b5cb9892d 100644
--- a/include/armnn/Utils.hpp
+++ b/include/armnn/Utils.hpp
@@ -4,6 +4,9 @@
 //
 #pragma once
 
+#include <vector>
+#include "armnn/TypesUtils.hpp"
+
 namespace armnn
 {
 
@@ -24,4 +27,4 @@ enum class LogSeverity
 ///     severity: All log messages that are at this severity level or higher will be printed, others will be ignored.
 void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity);
 
-}
+} // namespace armnn
diff --git a/include/armnn/Version.hpp b/include/armnn/Version.hpp
index d5f794eb8b..1a290d7177 100644
--- a/include/armnn/Version.hpp
+++ b/include/armnn/Version.hpp
@@ -9,4 +9,4 @@
 //   YYYY = 4-digit year number
 //   MM   = 2-digit month number
 //   PP   = 2-digit patch number
-#define ARMNN_VERSION "20180502"
+#define ARMNN_VERSION "20180800"
author	telsoa01 <telmo.soares@arm.com>	2018-08-31 09:22:23 +0100
committer	telsoa01 <telmo.soares@arm.com>	2018-08-31 09:22:23 +0100
commit	c577f2c6a3b4ddb6ba87a882723c53a248afbeba (patch)
tree	bd7d4c148df27f8be6649d313efb24f536b7cf34 /include/armnn
parent	4c7098bfeab1ffe1cdc77f6c15548d3e73274746 (diff)
download	armnn-c577f2c6a3b4ddb6ba87a882723c53a248afbeba.tar.gz