From ce3e84a8d449cbf31cee57e30f0eef6a96c0ce94 Mon Sep 17 00:00:00 2001
From: telsoa01 <telmo.soares@arm.com>
Date: Fri, 31 Aug 2018 09:31:35 +0100
Subject: Release 18.08

---
 1.0/ArmnnDriver.hpp                                |   66 +
 1.0/ArmnnDriverImpl.cpp                            |  277 ++++
 1.0/ArmnnDriverImpl.hpp                            |   41 +
 1.1/ArmnnDriver.hpp                                |  103 ++
 1.1/ArmnnDriverImpl.cpp                            |  151 +++
 1.1/ArmnnDriverImpl.hpp                            |   39 +
 Android.bp                                         |   29 +-
 Android.mk                                         |  261 +++-
 ArmnnDevice.cpp                                    |   65 +
 ArmnnDevice.hpp                                    |   27 +
 ArmnnDriver.cpp                                    |  453 -------
 ArmnnDriver.hpp                                    |   75 +-
 ArmnnPreparedModel.cpp                             |  113 +-
 ArmnnPreparedModel.hpp                             |   51 +-
 DriverOptions.cpp                                  |  159 +++
 DriverOptions.hpp                                  |   43 +
 ModelToINetworkConverter.cpp                       | 1003 ++++++++++----
 ModelToINetworkConverter.hpp                       |  129 +-
 NnapiSupport.txt                                   |   19 +-
 README.md                                          |   30 +-
 RequestThread.hpp                                  |    5 +-
 SystemPropertiesUtils.hpp                          |    3 +-
 Utils.cpp                                          |   45 +-
 Utils.hpp                                          |   24 +-
 ...id.hardware.neuralnetworks@1.1-service-armnn.rc |    4 +
 service.cpp                                        |    1 -
 test/Android.mk                                    |  166 ++-
 test/Concurrent.cpp                                |    6 +-
 test/Convolution2D.cpp                             |   10 +-
 test/DriverTestHelpers.cpp                         |   26 +-
 test/DriverTestHelpers.hpp                         |   23 +-
 test/FullyConnected.cpp                            |   16 +-
 test/GenericLayerTests.cpp                         |  222 ++--
 test/Lstm.cpp                                      | 1397 ++++++++++++++++++++
 test/Merger.cpp                                    |  160 ++-
 test/Tests.cpp                                     |    8 +-
 test/UtilsTests.cpp                                |   11 +-
 37 files changed, 4045 insertions(+), 1216 deletions(-)
 create mode 100644 1.0/ArmnnDriver.hpp
 create mode 100644 1.0/ArmnnDriverImpl.cpp
 create mode 100644 1.0/ArmnnDriverImpl.hpp
 create mode 100644 1.1/ArmnnDriver.hpp
 create mode 100644 1.1/ArmnnDriverImpl.cpp
 create mode 100644 1.1/ArmnnDriverImpl.hpp
 create mode 100644 ArmnnDevice.cpp
 create mode 100644 ArmnnDevice.hpp
 delete mode 100644 ArmnnDriver.cpp
 create mode 100644 DriverOptions.cpp
 create mode 100644 DriverOptions.hpp
 create mode 100644 android.hardware.neuralnetworks@1.1-service-armnn.rc
 create mode 100644 test/Lstm.cpp
diff --git a/1.0/ArmnnDriver.hpp b/1.0/ArmnnDriver.hpp
new file mode 100644
index 00000000..83484ca9
--- /dev/null
+++ b/1.0/ArmnnDriver.hpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include "ArmnnDriverImpl.hpp"
+#include "ArmnnDevice.hpp"
+
+#include <log/log.h>
+
+namespace armnn_driver
+{
+namespace V1_0
+{
+
+class ArmnnDriver : public ArmnnDevice, public ::android::hardware::neuralnetworks::V1_0::IDevice
+{
+public:
+    ArmnnDriver(DriverOptions options)
+        : ArmnnDevice(std::move(options))
+    {
+        ALOGV("V1_0::ArmnnDriver::ArmnnDriver()");
+    }
+    ~ArmnnDriver() {}
+
+public:
+    Return<void> getCapabilities(
+            ::android::hardware::neuralnetworks::V1_0::IDevice::getCapabilities_cb cb)
+    {
+        ALOGV("V1_0::ArmnnDriver::getCapabilities()");
+
+        return ArmnnDriverImpl::getCapabilities(m_Runtime, cb);
+    }
+
+    Return<void> getSupportedOperations(
+            const ::android::hardware::neuralnetworks::V1_0::Model& model,
+            ::android::hardware::neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb)
+    {
+        ALOGV("V1_0::ArmnnDriver::getSupportedOperations()");
+
+        return ArmnnDriverImpl::getSupportedOperations(m_Runtime, m_Options, model, cb);
+    }
+
+    Return<ErrorStatus> prepareModel(
+            const ::android::hardware::neuralnetworks::V1_0::Model& model,
+            const android::sp<IPreparedModelCallback>& cb)
+    {
+        ALOGV("V1_0::ArmnnDriver::prepareModel()");
+
+        return ArmnnDriverImpl::prepareModel(m_Runtime, m_ClTunedParameters, m_Options, model, cb);
+    }
+
+    Return<DeviceStatus> getStatus()
+    {
+        ALOGV("V1_0::ArmnnDriver::getStatus()");
+
+        return ArmnnDriverImpl::getStatus();
+    }
+};
+
+} // armnn_driver::namespace V1_0
+} // namespace armnn_driver
diff --git a/1.0/ArmnnDriverImpl.cpp b/1.0/ArmnnDriverImpl.cpp
new file mode 100644
index 00000000..5429ebed
--- /dev/null
+++ b/1.0/ArmnnDriverImpl.cpp
@@ -0,0 +1,277 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ArmnnDriverImpl.hpp"
+#include "ModelToINetworkConverter.hpp"
+#include "ArmnnPreparedModel.hpp"
+#include "SystemPropertiesUtils.hpp"
+
+#if defined(ARMNN_ANDROID_P)
+// The headers of the ML framework have changed between Android O and Android P.
+// The validation functions have been moved into their own header, ValidateHal.h.
+#include <ValidateHal.h>
+#endif
+
+#include <log/log.h>
+
+using namespace std;
+using namespace android;
+using namespace android::nn;
+using namespace android::hardware;
+
+namespace
+{
+
+const char *g_Float32PerformanceExecTimeName = "ArmNN.float32Performance.execTime";
+const char *g_Float32PerformancePowerUsageName = "ArmNN.float32Performance.powerUsage";
+const char *g_Quantized8PerformanceExecTimeName = "ArmNN.quantized8Performance.execTime";
+const char *g_Quantized8PerformancePowerUsageName = "ArmNN.quantized8Performance.powerUsage";
+
+void NotifyCallbackAndCheck(const sp<IPreparedModelCallback>& callback,
+                            ErrorStatus errorStatus,
+                            const sp<IPreparedModel>& preparedModelPtr)
+{
+    Return<void> returned = callback->notify(errorStatus, preparedModelPtr);
+    // This check is required, if the callback fails and it isn't checked it will bring down the service
+    if (!returned.isOk())
+    {
+        ALOGE("V1_0::ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
+            returned.description().c_str());
+    }
+}
+
+Return<ErrorStatus> FailPrepareModel(ErrorStatus error,
+                                     const string& message,
+                                     const sp<IPreparedModelCallback>& callback)
+{
+    ALOGW("V1_0::ArmnnDriverImpl::prepareModel: %s", message.c_str());
+    NotifyCallbackAndCheck(callback, error, nullptr);
+    return error;
+}
+
+} // namespace
+
+namespace armnn_driver
+{
+namespace V1_0
+{
+
+Return<void> ArmnnDriverImpl::getCapabilities(
+        const armnn::IRuntimePtr& runtime,
+        neuralnetworks::V1_0::IDevice::getCapabilities_cb cb)
+{
+    ALOGV("V1_0::ArmnnDriverImpl::getCapabilities()");
+
+    neuralnetworks::V1_0::Capabilities capabilities;
+    if (runtime)
+    {
+        capabilities.float32Performance.execTime =
+            ParseSystemProperty(g_Float32PerformanceExecTimeName, .1f);
+
+        capabilities.float32Performance.powerUsage =
+            ParseSystemProperty(g_Float32PerformancePowerUsageName, .1f);
+
+        capabilities.quantized8Performance.execTime =
+            ParseSystemProperty(g_Quantized8PerformanceExecTimeName, .1f);
+
+        capabilities.quantized8Performance.powerUsage =
+            ParseSystemProperty(g_Quantized8PerformancePowerUsageName, .1f);
+
+        cb(ErrorStatus::NONE, capabilities);
+    }
+    else
+    {
+        capabilities.float32Performance.execTime = 0;
+        capabilities.float32Performance.powerUsage = 0;
+        capabilities.quantized8Performance.execTime = 0;
+        capabilities.quantized8Performance.powerUsage = 0;
+
+        cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
+    }
+
+    return Void();
+}
+
+Return<void> ArmnnDriverImpl::getSupportedOperations(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const neuralnetworks::V1_0::Model& model,
+        neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb)
+{
+    ALOGV("V1_0::ArmnnDriverImpl::getSupportedOperations()");
+
+    vector<bool> result;
+
+    if (!runtime)
+    {
+        cb(ErrorStatus::DEVICE_UNAVAILABLE, result);
+        return Void();
+    }
+
+    // Run general model validation, if this doesn't pass we shouldn't analyse the model anyway
+    if (!android::nn::validateModel(model))
+    {
+        cb(ErrorStatus::INVALID_ARGUMENT, result);
+        return Void();
+    }
+
+    // Attempt to convert the model to an ArmNN input network (INetwork).
+    ModelToINetworkConverter modelConverter(options.GetComputeDevice(), model,
+        options.GetForcedUnsupportedOperations());
+
+    if (modelConverter.GetConversionResult() != ConversionResult::Success
+        && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature)
+    {
+        cb(ErrorStatus::GENERAL_FAILURE, result);
+        return Void();
+    }
+
+    // Check each operation if it was converted successfully and copy the flags
+    // into the result (vector<bool>) that we need to return to Android
+    result.reserve(model.operations.size());
+    for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); operationIdx++)
+    {
+        bool operationSupported = modelConverter.IsOperationSupported(operationIdx);
+        result.push_back(operationSupported);
+    }
+
+    cb(ErrorStatus::NONE, result);
+    return Void();
+}
+
+Return<ErrorStatus> ArmnnDriverImpl::prepareModel(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const neuralnetworks::V1_0::Model& model,
+        const sp<IPreparedModelCallback>& cb,
+        bool  float32ToFloat16)
+{
+    ALOGV("V1_0::ArmnnDriverImpl::prepareModel()");
+
+    if (cb.get() == nullptr)
+    {
+        ALOGW("V1_0::ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
+        return ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    if (!runtime)
+    {
+        return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE,
+                                "V1_0::ArmnnDriverImpl::prepareModel: Device unavailable", cb);
+    }
+
+    if (!android::nn::validateModel(model))
+    {
+        return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT,
+                                "V1_0::ArmnnDriverImpl::prepareModel: Invalid model passed as input", cb);
+    }
+
+    // Deliberately ignore any unsupported operations requested by the options -
+    // at this point we're being asked to prepare a model that we've already declared support for
+    // and the operation indices may be different to those in getSupportedOperations anyway.
+    set<unsigned int> unsupportedOperations;
+    ModelToINetworkConverter modelConverter(options.GetComputeDevice(), model,
+        unsupportedOperations);
+
+    if (modelConverter.GetConversionResult() != ConversionResult::Success)
+    {
+        FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
+        return ErrorStatus::NONE;
+    }
+
+    // optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptions OptOptions;
+    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+
+    try
+    {
+        optNet = armnn::Optimize(*modelConverter.GetINetwork(),
+                                 {options.GetComputeDevice()},
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions);
+    }
+    catch (armnn::Exception &e)
+    {
+        stringstream message;
+        message << "armnn::Exception (" << e.what() << ") caught from optimize.";
+        FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return ErrorStatus::NONE;
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        FailPrepareModel(ErrorStatus::GENERAL_FAILURE,
+                         "V1_0::ArmnnDriverImpl::prepareModel: Invalid optimized network", cb);
+        return ErrorStatus::NONE;
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    ExportNetworkGraphToDotFile(*optNet,
+                                options.GetRequestInputsAndOutputsDumpDir(),
+                                model);
+
+    // load it into the runtime
+    armnn::NetworkId netId = 0;
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+        {
+            return FailPrepareModel(ErrorStatus::GENERAL_FAILURE,
+                "V1_0::ArmnnDriverImpl::prepareModel: Network could not be loaded", cb);
+        }
+    }
+    catch (armnn::Exception& e)
+    {
+        stringstream message;
+        message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork.";
+        FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return ErrorStatus::NONE;
+    }
+
+    unique_ptr<ArmnnPreparedModel> preparedModel(new ArmnnPreparedModel(
+        netId,
+        runtime.get(),
+        model,
+        options.GetRequestInputsAndOutputsDumpDir(),
+        options.IsGpuProfilingEnabled()
+    ));
+
+    // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
+    // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
+    preparedModel->ExecuteWithDummyInputs();
+
+    if (clTunedParameters &&
+        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+    {
+        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
+        try
+        {
+            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+        }
+        catch (const armnn::Exception& error)
+        {
+            ALOGE("V1_0::ArmnnDriverImpl: Failed to save CL tuned parameters file '%s': %s",
+                options.GetClTunedParametersFile().c_str(), error.what());
+        }
+    }
+
+    NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release());
+
+    return ErrorStatus::NONE;
+}
+
+Return<DeviceStatus> ArmnnDriverImpl::getStatus()
+{
+    ALOGV("V1_0::ArmnnDriverImpl::getStatus()");
+
+    return DeviceStatus::AVAILABLE;
+}
+
+} // armnn_driver::namespace V1_0
+} // namespace armnn_driver
diff --git a/1.0/ArmnnDriverImpl.hpp b/1.0/ArmnnDriverImpl.hpp
new file mode 100644
index 00000000..2628682d
--- /dev/null
+++ b/1.0/ArmnnDriverImpl.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include "DriverOptions.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+namespace armnn_driver
+{
+namespace V1_0
+{
+
+class ArmnnDriverImpl
+{
+public:
+    static Return<void> getCapabilities(
+            const armnn::IRuntimePtr& runtime,
+            ::android::hardware::neuralnetworks::V1_0::IDevice::getCapabilities_cb cb);
+    static Return<void> getSupportedOperations(
+            const armnn::IRuntimePtr& runtime,
+            const DriverOptions& options,
+            const ::android::hardware::neuralnetworks::V1_0::Model& model,
+            ::android::hardware::neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb);
+    static Return<ErrorStatus> prepareModel(
+            const armnn::IRuntimePtr& runtime,
+            const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+            const DriverOptions& options,
+            const ::android::hardware::neuralnetworks::V1_0::Model& model,
+            const android::sp<IPreparedModelCallback>& cb,
+            bool  float32ToFloat16 = false);
+    static Return<DeviceStatus> getStatus();
+};
+
+} // namespace armnn_driver::V1_0
+} // namespace armnn_driver
diff --git a/1.1/ArmnnDriver.hpp b/1.1/ArmnnDriver.hpp
new file mode 100644
index 00000000..6bd8e03c
--- /dev/null
+++ b/1.1/ArmnnDriver.hpp
@@ -0,0 +1,103 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include "ArmnnDevice.hpp"
+#include "1.0/ArmnnDriverImpl.hpp"
+#include "1.1/ArmnnDriverImpl.hpp"
+
+#include <log/log.h>
+
+namespace armnn_driver {
+namespace V1_1 {
+
+class ArmnnDriver : public ArmnnDevice, public ::android::hardware::neuralnetworks::V1_1::IDevice
+{
+public:
+    ArmnnDriver(DriverOptions options)
+        : ArmnnDevice(std::move(options))
+    {
+        ALOGV("V1_1::ArmnnDriver::ArmnnDriver()");
+    }
+    ~ArmnnDriver() {}
+
+public:
+    Return<void> getCapabilities(
+            ::android::hardware::neuralnetworks::V1_0::IDevice::getCapabilities_cb cb)
+    {
+        ALOGV("V1_1::ArmnnDriver::getCapabilities()");
+
+        return V1_0::ArmnnDriverImpl::getCapabilities(m_Runtime, cb);
+    }
+
+    Return<void> getSupportedOperations(
+            const ::android::hardware::neuralnetworks::V1_0::Model& model,
+            ::android::hardware::neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb)
+    {
+        ALOGV("V1_1::ArmnnDriver::getSupportedOperations()");
+
+        return V1_0::ArmnnDriverImpl::getSupportedOperations(m_Runtime, m_Options, model, cb);
+    }
+
+    Return<ErrorStatus> prepareModel(
+            const ::android::hardware::neuralnetworks::V1_0::Model& model,
+            const android::sp<IPreparedModelCallback>& cb)
+    {
+        ALOGV("V1_1::ArmnnDriver::prepareModel()");
+
+        return V1_0::ArmnnDriverImpl::prepareModel(m_Runtime, m_ClTunedParameters, m_Options, model, cb);
+    }
+
+    Return<void> getCapabilities_1_1(
+            ::android::hardware::neuralnetworks::V1_1::IDevice::getCapabilities_1_1_cb cb)
+    {
+        ALOGV("V1_1::ArmnnDriver::getCapabilities_1_1()");
+
+        return V1_1::ArmnnDriverImpl::getCapabilities_1_1(m_Runtime, cb);
+    }
+
+    Return<void> getSupportedOperations_1_1(
+            const ::android::hardware::neuralnetworks::V1_1::Model& model,
+            ::android::hardware::neuralnetworks::V1_1::IDevice::getSupportedOperations_1_1_cb cb)
+    {
+        ALOGV("V1_1::ArmnnDriver::getSupportedOperations_1_1()");
+
+        return V1_1::ArmnnDriverImpl::getSupportedOperations_1_1(m_Runtime, m_Options, model, cb);
+    }
+
+    Return<ErrorStatus> prepareModel_1_1(
+            const ::android::hardware::neuralnetworks::V1_1::Model& model,
+            ::android::hardware::neuralnetworks::V1_1::ExecutionPreference preference,
+            const android::sp<IPreparedModelCallback>& cb)
+    {
+        using namespace ::android::hardware::neuralnetworks::V1_0;
+
+        ALOGV("V1_1::ArmnnDriver::prepareModel_1_1()");
+
+        if(!(preference == ExecutionPreference::LOW_POWER ||
+             preference == ExecutionPreference::FAST_SINGLE_ANSWER ||
+             preference == ExecutionPreference::SUSTAINED_SPEED))
+        {
+            ALOGV("V1_1::ArmnnDriver::prepareModel_1_1(): Invalid execution preference");
+            cb->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
+            return ErrorStatus::INVALID_ARGUMENT;
+        }
+
+        return V1_1::ArmnnDriverImpl::prepareModel_1_1(m_Runtime, m_ClTunedParameters, m_Options, model, cb);
+    }
+
+    Return<DeviceStatus> getStatus()
+    {
+        ALOGV("V1_1::ArmnnDriver::getStatus()");
+
+        return V1_0::ArmnnDriverImpl::getStatus();
+    }
+};
+
+} // armnn_driver::namespace V1_1
+} // namespace armnn_driver
diff --git a/1.1/ArmnnDriverImpl.cpp b/1.1/ArmnnDriverImpl.cpp
new file mode 100644
index 00000000..a5e32766
--- /dev/null
+++ b/1.1/ArmnnDriverImpl.cpp
@@ -0,0 +1,151 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ArmnnDriverImpl.hpp"
+#include "../1.0/ArmnnDriverImpl.hpp"
+
+#include <OperationsUtils.h>
+
+#include <log/log.h>
+#include <boost/assert.hpp>
+
+#include <ValidateHal.h>
+
+using namespace std;
+using namespace android;
+using namespace android::nn;
+using namespace android::hardware;
+
+namespace
+{
+
+void NotifyCallbackAndCheck(const sp<IPreparedModelCallback>& callback,
+                            ErrorStatus errorStatus,
+                            const sp<IPreparedModel>& preparedModelPtr)
+{
+    Return<void> returned = callback->notify(errorStatus, preparedModelPtr);
+    // This check is required, if the callback fails and it isn't checked it will bring down the service
+    if (!returned.isOk())
+    {
+        ALOGE("V1_1::ArmnnDriverImpl::prepareModel_1_1: hidl callback failed to return properly: %s ",
+              returned.description().c_str());
+    }
+}
+
+Return<ErrorStatus> FailPrepareModel(ErrorStatus error,
+                                     const string& message,
+                                     const sp<IPreparedModelCallback>& callback)
+{
+    ALOGW("V1_1::ArmnnDriverImpl::prepareModel_1_1: %s", message.c_str());
+    NotifyCallbackAndCheck(callback, error, nullptr);
+    return error;
+}
+
+} // namespace
+
+namespace armnn_driver
+{
+namespace V1_1
+{
+
+Return<void> ArmnnDriverImpl::getCapabilities_1_1(
+        const armnn::IRuntimePtr& runtime,
+        neuralnetworks::V1_1::IDevice::getCapabilities_1_1_cb cb)
+{
+    ALOGV("V1_1::ArmnnDriverImpl::getCapabilities_1_1()");
+
+    neuralnetworks::V1_0::IDevice::getCapabilities_cb cb_1_0 =
+            [&](ErrorStatus status, const neuralnetworks::V1_0::Capabilities& capabilities)
+    {
+        BOOST_ASSERT_MSG(compliantWithV1_1(capabilities),
+                         "V1_1::ArmnnDriverImpl: V1_0::Capabilities not compliant with V1_1::Capabilities");
+
+        cb(status, convertToV1_1(capabilities));
+    };
+
+    V1_0::ArmnnDriverImpl::getCapabilities(runtime, cb_1_0);
+
+    return Void();
+}
+
+Return<void> ArmnnDriverImpl::getSupportedOperations_1_1(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const neuralnetworks::V1_1::Model& model,
+        neuralnetworks::V1_1::IDevice::getSupportedOperations_1_1_cb cb)
+{
+    ALOGV("V1_1::ArmnnDriverImpl::getSupportedOperations_1_1()");
+
+    if(compliantWithV1_0(model))
+    {
+        V1_0::ArmnnDriverImpl::getSupportedOperations(runtime, options, convertToV1_0(model), cb);
+    }
+    else
+    {
+        std::vector<bool> result;
+
+        if (!runtime)
+        {
+            ALOGW("V1_1::ArmnnDriverImpl::getSupportedOperations_1_1: Device unavailable");
+            cb(ErrorStatus::DEVICE_UNAVAILABLE, result);
+            return Void();
+        }
+
+        if (!android::nn::validateModel(model))
+        {
+            ALOGW("V1_1::ArmnnDriverImpl::getSupportedOperations_1_1: Invalid model passed as input");
+            cb(ErrorStatus::INVALID_ARGUMENT, result);
+            return Void();
+        }
+
+        result.assign(model.operations.size(), false);
+        cb(ErrorStatus::NONE, result);
+    }
+
+    return Void();
+}
+
+Return<ErrorStatus> ArmnnDriverImpl::prepareModel_1_1(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const neuralnetworks::V1_1::Model& model,
+        const sp<IPreparedModelCallback>& cb)
+{
+    ALOGV("V1_1::ArmnnDriverImpl::prepareModel_1_1()");
+
+    if(compliantWithV1_0(model))
+    {
+        return V1_0::ArmnnDriverImpl::prepareModel(runtime, clTunedParameters, options, convertToV1_0(model), cb,
+                                                   model.relaxComputationFloat32toFloat16 && options.GetFp16Enabled());
+    }
+    else
+    {
+        if (cb.get() == nullptr)
+        {
+            ALOGW("V1_1::ArmnnDriverImpl::prepareModel_1_1: Invalid callback passed to prepareModel");
+            return ErrorStatus::INVALID_ARGUMENT;
+        }
+
+        if (!runtime)
+        {
+            return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE,
+                                    "V1_1::ArmnnDriverImpl::prepareModel_1_1: Device unavailable", cb);
+        }
+
+        if (!android::nn::validateModel(model))
+        {
+            return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT,
+                                    "V1_1::ArmnnDriverImpl::prepareModel_1_1: Invalid model passed as input", cb);
+        }
+
+        FailPrepareModel(ErrorStatus::GENERAL_FAILURE,
+                         "V1_1::ArmnnDriverImpl::prepareModel_1_1: Unsupported model", cb);
+        return ErrorStatus::NONE;
+    }
+}
+
+} // armnn_driver::namespace V1_1
+} // namespace armnn_driver
diff --git a/1.1/ArmnnDriverImpl.hpp b/1.1/ArmnnDriverImpl.hpp
new file mode 100644
index 00000000..307d96bf
--- /dev/null
+++ b/1.1/ArmnnDriverImpl.hpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include "DriverOptions.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+namespace armnn_driver
+{
+namespace V1_1
+{
+
+class ArmnnDriverImpl
+{
+public:
+    static Return<void> getCapabilities_1_1(
+            const armnn::IRuntimePtr& runtime,
+            ::android::hardware::neuralnetworks::V1_1::IDevice::getCapabilities_1_1_cb cb);
+    static Return<void> getSupportedOperations_1_1(
+            const armnn::IRuntimePtr& runtime,
+            const DriverOptions& options,
+            const ::android::hardware::neuralnetworks::V1_1::Model& model,
+            ::android::hardware::neuralnetworks::V1_1::IDevice::getSupportedOperations_1_1_cb cb);
+    static Return<ErrorStatus> prepareModel_1_1(
+            const armnn::IRuntimePtr& runtime,
+            const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+            const DriverOptions& options,
+            const ::android::hardware::neuralnetworks::V1_1::Model& model,
+            const android::sp<IPreparedModelCallback>& cb);
+};
+
+} // namespace armnn_driver::V1_1
+} // namespace armnn_driver
diff --git a/Android.bp b/Android.bp
index 03b2ded4..e44275fa 100644
--- a/Android.bp
+++ b/Android.bp
@@ -37,6 +37,7 @@ cc_library_static {
         "clframework/src/core/CL/kernels/CLActivationLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp",
         "clframework/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp",
+        "clframework/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp",
         "clframework/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLBitwiseAndKernel.cpp",
         "clframework/src/core/CL/kernels/CLBitwiseNotKernel.cpp",
@@ -46,8 +47,10 @@ cc_library_static {
         "clframework/src/core/CL/kernels/CLCannyEdgeKernel.cpp",
         "clframework/src/core/CL/kernels/CLChannelCombineKernel.cpp",
         "clframework/src/core/CL/kernels/CLChannelExtractKernel.cpp",
+        "clframework/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLCol2ImKernel.cpp",
         "clframework/src/core/CL/kernels/CLColorConvertKernel.cpp",
+        "clframework/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp",
         "clframework/src/core/CL/kernels/CLConvolutionKernel.cpp",
         "clframework/src/core/CL/kernels/CLCopyKernel.cpp",
         "clframework/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
@@ -66,6 +69,7 @@ cc_library_static {
         "clframework/src/core/CL/kernels/CLErodeKernel.cpp",
         "clframework/src/core/CL/kernels/CLFastCornersKernel.cpp",
         "clframework/src/core/CL/kernels/CLFillBorderKernel.cpp",
+        "clframework/src/core/CL/kernels/CLFlattenLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLFloorKernel.cpp",
         "clframework/src/core/CL/kernels/CLGaussian3x3Kernel.cpp",
         "clframework/src/core/CL/kernels/CLGaussian5x5Kernel.cpp",
@@ -118,6 +122,7 @@ cc_library_static {
         "clframework/src/core/CL/kernels/CLWarpAffineKernel.cpp",
         "clframework/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
         "clframework/src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp",
         "clframework/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp",
         "clframework/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp",
@@ -144,6 +149,11 @@ cc_library_static {
         "clframework/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
         "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
         "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
+        "clframework/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp",
+        "clframework/src/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.cpp",
+        "clframework/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp",
+        "clframework/src/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.cpp",
+        "clframework/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp",
         "clframework/src/core/NEON/kernels/convolution/common/utils.cpp",
         "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_1x1_fp32_fp32.cpp",
         "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_2x2_fp32_fp32.cpp",
@@ -178,6 +188,7 @@ cc_library_static {
         "clframework/src/core/NEON/kernels/NEChannelExtractKernel.cpp",
         "clframework/src/core/NEON/kernels/NECol2ImKernel.cpp",
         "clframework/src/core/NEON/kernels/NEColorConvertKernel.cpp",
+        "clframework/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp",
         "clframework/src/core/NEON/kernels/NEConvolutionKernel.cpp",
         "clframework/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp",
         "clframework/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp",
@@ -247,6 +258,7 @@ cc_library_static {
         "clframework/src/core/NEON/kernels/NETransposeKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWarpKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp",
+        "clframework/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
         "clframework/src/core/PyramidInfo.cpp",
         "clframework/src/core/Rounding.cpp",
@@ -282,6 +294,7 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLAccumulate.cpp",
         "clframework/src/runtime/CL/functions/CLActivationLayer.cpp",
         "clframework/src/runtime/CL/functions/CLArithmeticAddition.cpp",
+        "clframework/src/runtime/CL/functions/CLArithmeticDivision.cpp",
         "clframework/src/runtime/CL/functions/CLArithmeticSubtraction.cpp",
         "clframework/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp",
         "clframework/src/runtime/CL/functions/CLBitwiseAnd.cpp",
@@ -292,7 +305,10 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLCannyEdge.cpp",
         "clframework/src/runtime/CL/functions/CLChannelCombine.cpp",
         "clframework/src/runtime/CL/functions/CLChannelExtract.cpp",
+        "clframework/src/runtime/CL/functions/CLChannelShuffleLayer.cpp",
         "clframework/src/runtime/CL/functions/CLColorConvert.cpp",
+        "clframework/src/runtime/CL/functions/CLConcatenateLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp",
         "clframework/src/runtime/CL/functions/CLConvolution.cpp",
         "clframework/src/runtime/CL/functions/CLConvolutionLayer.cpp",
         "clframework/src/runtime/CL/functions/CLCopy.cpp",
@@ -333,6 +349,7 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLLaplacianPyramid.cpp",
         "clframework/src/runtime/CL/functions/CLLaplacianReconstruct.cpp",
         "clframework/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLLSTMLayer.cpp",
         "clframework/src/runtime/CL/functions/CLMagnitude.cpp",
         "clframework/src/runtime/CL/functions/CLMeanStdDev.cpp",
         "clframework/src/runtime/CL/functions/CLMedian3x3.cpp",
@@ -362,10 +379,12 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLTranspose.cpp",
         "clframework/src/runtime/CL/functions/CLWarpAffine.cpp",
         "clframework/src/runtime/CL/functions/CLWarpPerspective.cpp",
+        "clframework/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp",
         "clframework/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp",
         "clframework/src/runtime/CL/functions/CLWinogradInputTransform.cpp",
         "clframework/src/runtime/CL/ICLSimpleFunction.cpp",
         "clframework/src/runtime/CL/tuners/BifrostTuner.cpp",
+        "clframework/src/runtime/CL/tuners/MidgardTuner.cpp",
         "clframework/src/runtime/CPP/CPPScheduler.cpp",
         "clframework/src/runtime/CPP/functions/CPPPermute.cpp",
         "clframework/src/runtime/CPP/functions/CPPUpsample.cpp",
@@ -384,6 +403,7 @@ cc_library_static {
         "clframework/src/runtime/MemoryManagerOnDemand.cpp",
         "clframework/src/runtime/MultiHOG.cpp",
         "clframework/src/runtime/MultiImage.cpp",
+        "clframework/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp",
         "clframework/src/runtime/NEON/functions/NEAbsoluteDifference.cpp",
         "clframework/src/runtime/NEON/functions/NEAccumulate.cpp",
         "clframework/src/runtime/NEON/functions/NEActivationLayer.cpp",
@@ -400,6 +420,8 @@ cc_library_static {
         "clframework/src/runtime/NEON/functions/NEChannelExtract.cpp",
         "clframework/src/runtime/NEON/functions/NECol2Im.cpp",
         "clframework/src/runtime/NEON/functions/NEColorConvert.cpp",
+        "clframework/src/runtime/NEON/functions/NEConcatenateLayer.cpp",
+        "clframework/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp",
         "clframework/src/runtime/NEON/functions/NEConvolution.cpp",
         "clframework/src/runtime/NEON/functions/NEConvolutionLayer.cpp",
         "clframework/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp",
@@ -421,6 +443,7 @@ cc_library_static {
         "clframework/src/runtime/NEON/functions/NEGaussian3x3.cpp",
         "clframework/src/runtime/NEON/functions/NEGaussian5x5.cpp",
         "clframework/src/runtime/NEON/functions/NEGaussianPyramid.cpp",
+        "clframework/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp",
         "clframework/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp",
         "clframework/src/runtime/NEON/functions/NEGEMM.cpp",
         "clframework/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp",
@@ -457,8 +480,10 @@ cc_library_static {
         "clframework/src/runtime/NEON/functions/NERemap.cpp",
         "clframework/src/runtime/NEON/functions/NEReshapeLayer.cpp",
         "clframework/src/runtime/NEON/functions/NEROIPoolingLayer.cpp",
+        "clframework/src/runtime/NEON/functions/NERNNLayer.cpp",
         "clframework/src/runtime/NEON/functions/NEScale.cpp",
         "clframework/src/runtime/NEON/functions/NEScharr3x3.cpp",
+        "clframework/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp",
         "clframework/src/runtime/NEON/functions/NESobel3x3.cpp",
         "clframework/src/runtime/NEON/functions/NESobel5x5.cpp",
         "clframework/src/runtime/NEON/functions/NESobel7x7.cpp",
@@ -468,11 +493,11 @@ cc_library_static {
         "clframework/src/runtime/NEON/functions/NETranspose.cpp",
         "clframework/src/runtime/NEON/functions/NEWarpAffine.cpp",
         "clframework/src/runtime/NEON/functions/NEWarpPerspective.cpp",
+        "clframework/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp",
         "clframework/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp",
         "clframework/src/runtime/NEON/INESimpleFunction.cpp",
         "clframework/src/runtime/OffsetLifetimeManager.cpp",
         "clframework/src/runtime/OffsetMemoryPool.cpp",
-        "clframework/src/runtime/OMP/OMPScheduler.cpp",
         "clframework/src/runtime/PoolManager.cpp",
         "clframework/src/runtime/Pyramid.cpp",
         "clframework/src/runtime/Scheduler.cpp",
@@ -480,6 +505,7 @@ cc_library_static {
         "clframework/src/runtime/TensorAllocator.cpp",
         "clframework/src/runtime/Tensor.cpp",
         "clframework/src/runtime/Utils.cpp",
+        "clframework/utils/CommonGraphOptions.cpp",
         "clframework/utils/GraphUtils.cpp",
         "clframework/utils/Utils.cpp",
     ],
@@ -518,6 +544,7 @@ cc_library_static {
         "-fexceptions",
         "-DEMBEDDED_KERNELS",
         "-DARM_COMPUTE_ASSERTS_ENABLED",
+        "-DARM_COMPUTE_CPP_SCHEDULER",
         "-Wno-unused-parameter",
         "-DNO_DOT_IN_TOOLCHAIN",
         "-no-integrated-as"
diff --git a/Android.mk b/Android.mk
index e69514c2..c0dd1e6d 100644
--- a/Android.mk
+++ b/Android.mk
@@ -12,12 +12,12 @@ ARMNN_UTILS_HEADER_PATH := $(LOCAL_PATH)/armnn/src/armnnUtils
 OPENCL_HEADER_PATH := $(LOCAL_PATH)/clframework/include
 NN_HEADER_PATH := $(LOCAL_PATH)/../../../frameworks/ml/nn/runtime/include
 
-###################
-# libarmnn-driver #
-###################
+#######################
+# libarmnn-driver@1.0 #
+#######################
 include $(CLEAR_VARS)
 
-LOCAL_MODULE := libarmnn-driver
+LOCAL_MODULE := libarmnn-driver@1.0
 LOCAL_MODULE_TAGS := eng optional
 LOCAL_ARM_MODE := arm
 LOCAL_PROPRIETARY_MODULE := true
@@ -25,16 +25,16 @@ LOCAL_PROPRIETARY_MODULE := true
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
 LOCAL_C_INCLUDES := \
-	$(ARMNN_HEADER_PATH) \
-	$(ARMNN_UTILS_HEADER_PATH) \
-	$(OPENCL_HEADER_PATH) \
-	$(NN_HEADER_PATH)
+        $(ARMNN_HEADER_PATH) \
+        $(ARMNN_UTILS_HEADER_PATH) \
+        $(OPENCL_HEADER_PATH) \
+        $(NN_HEADER_PATH)
 
 LOCAL_CFLAGS := \
-	-std=c++14 \
-	-fexceptions \
-	-Werror \
-	-Wno-format-security
+        -std=c++14 \
+        -fexceptions \
+        -Werror \
+        -Wno-format-security
 ifeq ($(PLATFORM_VERSION),9)
 # Required to build with the changes made to the Android ML framework starting from Android P,
 # regardless of the HAL version used for the build.
@@ -42,43 +42,110 @@ LOCAL_CFLAGS+= \
         -DARMNN_ANDROID_P
 endif
 ifeq ($(ARMNN_DRIVER_DEBUG),1)
-	LOCAL_CFLAGS+= -UNDEBUG
+LOCAL_CFLAGS+= \
+        -UNDEBUG
 endif
 
 LOCAL_SRC_FILES := \
-	ArmnnDriver.cpp \
-	ArmnnPreparedModel.cpp \
-	ModelToINetworkConverter.cpp \
-	RequestThread.cpp \
-	Utils.cpp
+        1.0/ArmnnDriverImpl.cpp \
+        DriverOptions.cpp \
+        ArmnnDevice.cpp \
+        ArmnnPreparedModel.cpp \
+        ModelToINetworkConverter.cpp \
+        RequestThread.cpp \
+        Utils.cpp
 
 LOCAL_STATIC_LIBRARIES := \
-	libneuralnetworks_common \
-	libarmnn \
-	libboost_log \
-	libboost_program_options \
-	libboost_system \
-	libboost_thread \
-	armnn-arm_compute
+        libneuralnetworks_common \
+        libarmnn \
+        libboost_log \
+        libboost_program_options \
+        libboost_system \
+        libboost_thread \
+        armnn-arm_compute
 
 LOCAL_SHARED_LIBRARIES := \
-	libbase \
-	libhidlbase \
-	libhidltransport \
-	libhidlmemory \
-	liblog \
-	libutils \
-	android.hardware.neuralnetworks@1.0 \
-	android.hidl.allocator@1.0 \
-	android.hidl.memory@1.0 \
-	libOpenCL
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        liblog \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        libOpenCL
 ifeq ($(PLATFORM_VERSION),9)
 # Required to build the 1.0 version of the NN Driver on Android P and later versions,
 # as the 1.0 version of the NN API needs the 1.1 HAL headers to be included regardless.
 LOCAL_SHARED_LIBRARIES+= \
-	android.hardware.neuralnetworks@1.1
+        android.hardware.neuralnetworks@1.1
+endif
+
+include $(BUILD_STATIC_LIBRARY)
+
+#######################
+# libarmnn-driver@1.1 #
+#######################
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libarmnn-driver@1.1
+LOCAL_MODULE_TAGS := eng optional
+LOCAL_ARM_MODE := arm
+LOCAL_PROPRIETARY_MODULE := true
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
+        $(ARMNN_HEADER_PATH) \
+        $(ARMNN_UTILS_HEADER_PATH) \
+        $(OPENCL_HEADER_PATH) \
+        $(NN_HEADER_PATH)
+
+LOCAL_CFLAGS := \
+        -std=c++14 \
+        -fexceptions \
+        -Werror \
+        -Wno-format-security \
+        -DARMNN_ANDROID_P \
+        -DARMNN_ANDROID_NN_V1_1
+ifeq ($(ARMNN_DRIVER_DEBUG),1)
+LOCAL_CFLAGS+= \
+        -UNDEBUG
 endif
 
+LOCAL_SRC_FILES := \
+        1.0/ArmnnDriverImpl.cpp \
+        1.1/ArmnnDriverImpl.cpp \
+        DriverOptions.cpp \
+        ArmnnDevice.cpp \
+        ArmnnPreparedModel.cpp \
+        ModelToINetworkConverter.cpp \
+        RequestThread.cpp \
+        Utils.cpp
+
+LOCAL_STATIC_LIBRARIES := \
+        libneuralnetworks_common \
+        libarmnn \
+        libboost_log \
+        libboost_program_options \
+        libboost_system \
+        libboost_thread \
+        armnn-arm_compute
+
+LOCAL_SHARED_LIBRARIES := \
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        liblog \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hardware.neuralnetworks@1.1 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        libOpenCL
+
 include $(BUILD_STATIC_LIBRARY)
 
 #####################################################
@@ -95,56 +162,110 @@ LOCAL_PROPRIETARY_MODULE := true
 # Mark source files as dependent on Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
-LOCAL_C_INCLUDES :=	 \
-	$(ARMNN_HEADER_PATH) \
-	$(NN_HEADER_PATH)
+LOCAL_C_INCLUDES := \
+        $(ARMNN_HEADER_PATH) \
+        $(NN_HEADER_PATH)
 
 LOCAL_CFLAGS := \
-	-std=c++14 \
-	-fexceptions
+        -std=c++14 \
+        -fexceptions
 ifeq ($(ARMNN_DRIVER_DEBUG),1)
-	LOCAL_CFLAGS+= -UNDEBUG
+LOCAL_CFLAGS += \
+        -UNDEBUG
 endif
 
 LOCAL_SRC_FILES := \
-	service.cpp
+        service.cpp
 
 LOCAL_STATIC_LIBRARIES := \
-	libarmnn-driver \
-	libneuralnetworks_common \
-	libarmnn \
-	libboost_log \
-	libboost_program_options \
-	libboost_system \
-	libboost_thread \
-	armnn-arm_compute
-ifeq ($(PLATFORM_VERSION),9)
-# Required to build the 1.0 version of the NN Driver on Android P and later versions.
-LOCAL_STATIC_LIBRARIES+= \
-	libomp
-endif
+        libarmnn-driver@1.0 \
+        libneuralnetworks_common \
+        libarmnn \
+        libboost_log \
+        libboost_program_options \
+        libboost_system \
+        libboost_thread \
+        armnn-arm_compute
 
 LOCAL_SHARED_LIBRARIES := \
-	libbase \
-	libhidlbase \
-	libhidltransport \
-	libhidlmemory \
-	libdl \
-	libhardware \
-	liblog \
-	libtextclassifier_hash \
-	libutils \
-	android.hardware.neuralnetworks@1.0 \
-	android.hidl.allocator@1.0 \
-	android.hidl.memory@1.0 \
-	libOpenCL
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        libdl \
+        libhardware \
+        liblog \
+        libtextclassifier_hash \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        libOpenCL
 ifeq ($(PLATFORM_VERSION),9)
 # Required to build the 1.0 version of the NN Driver on Android P and later versions,
 # as the 1.0 version of the NN API needs the 1.1 HAL headers to be included regardless.
 LOCAL_SHARED_LIBRARIES+= \
-	android.hardware.neuralnetworks@1.1
+        android.hardware.neuralnetworks@1.1
+endif
+
+include $(BUILD_EXECUTABLE)
+
+#####################################################
+# android.hardware.neuralnetworks@1.1-service-armnn #
+#####################################################
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := android.hardware.neuralnetworks@1.1-service-armnn
+LOCAL_INIT_RC := android.hardware.neuralnetworks@1.1-service-armnn.rc
+LOCAL_MODULE_TAGS := eng optional
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE_RELATIVE_PATH := hw
+LOCAL_PROPRIETARY_MODULE := true
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
+        $(ARMNN_HEADER_PATH) \
+        $(NN_HEADER_PATH)
+
+LOCAL_CFLAGS := \
+        -std=c++14 \
+        -fexceptions \
+        -DARMNN_ANDROID_NN_V1_1
+ifeq ($(ARMNN_DRIVER_DEBUG),1)
+LOCAL_CFLAGS += \
+        -UNDEBUG
 endif
 
+LOCAL_SRC_FILES := \
+        service.cpp
+
+LOCAL_STATIC_LIBRARIES := \
+        libarmnn-driver@1.1 \
+        libneuralnetworks_common \
+        libarmnn \
+        libboost_log \
+        libboost_program_options \
+        libboost_system \
+        libboost_thread \
+        armnn-arm_compute
+
+LOCAL_SHARED_LIBRARIES := \
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        libdl \
+        libhardware \
+        liblog \
+        libtextclassifier_hash \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hardware.neuralnetworks@1.1 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        libOpenCL
+
 include $(BUILD_EXECUTABLE)
 
 ##########################
diff --git a/ArmnnDevice.cpp b/ArmnnDevice.cpp
new file mode 100644
index 00000000..3e0b0da2
--- /dev/null
+++ b/ArmnnDevice.cpp
@@ -0,0 +1,65 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ArmnnDevice.hpp"
+
+#include <OperationsUtils.h>
+
+#include <log/log.h>
+
+#include <memory>
+
+using namespace android;
+
+namespace armnn_driver
+{
+
+ArmnnDevice::ArmnnDevice(DriverOptions options)
+    : m_Runtime(nullptr, nullptr)
+    , m_ClTunedParameters(nullptr)
+    , m_Options(std::move(options))
+{
+    ALOGV("ArmnnDevice::ArmnnDevice()");
+
+    armnn::ConfigureLogging(false, m_Options.IsVerboseLoggingEnabled(), armnn::LogSeverity::Trace);
+    if (m_Options.IsVerboseLoggingEnabled())
+    {
+        SetMinimumLogSeverity(base::VERBOSE);
+    }
+    else
+    {
+        SetMinimumLogSeverity(base::INFO);
+    }
+
+    try
+    {
+        armnn::IRuntime::CreationOptions options;
+        if (!m_Options.GetClTunedParametersFile().empty())
+        {
+            m_ClTunedParameters = armnn::IGpuAccTunedParameters::Create(m_Options.GetClTunedParametersMode());
+            try
+            {
+                m_ClTunedParameters->Load(m_Options.GetClTunedParametersFile().c_str());
+            }
+            catch (const armnn::Exception& error)
+            {
+                // This is only a warning because the file won't exist the first time you are generating it.
+                ALOGW("ArmnnDevice: Failed to load CL tuned parameters file '%s': %s",
+                      m_Options.GetClTunedParametersFile().c_str(), error.what());
+            }
+            options.m_GpuAccTunedParameters = m_ClTunedParameters;
+        }
+
+        options.m_EnableGpuProfiling = m_Options.IsGpuProfilingEnabled();
+
+        m_Runtime = armnn::IRuntime::Create(options);
+    }
+    catch (const armnn::ClRuntimeUnavailableException& error)
+    {
+        ALOGE("ArmnnDevice: Failed to setup CL runtime: %s. Device will be unavailable.", error.what());
+    }
+}
+
+} // namespace armnn_driver
diff --git a/ArmnnDevice.hpp b/ArmnnDevice.hpp
new file mode 100644
index 00000000..83414d54
--- /dev/null
+++ b/ArmnnDevice.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "DriverOptions.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+namespace armnn_driver
+{
+
+class ArmnnDevice
+{
+protected:
+    ArmnnDevice(DriverOptions options);
+    virtual ~ArmnnDevice() {}
+
+protected:
+    armnn::IRuntimePtr m_Runtime;
+    armnn::IGpuAccTunedParametersPtr m_ClTunedParameters;
+    DriverOptions m_Options;
+};
+
+} // namespace armnn_driver
diff --git a/ArmnnDriver.cpp b/ArmnnDriver.cpp
deleted file mode 100644
index 4d58249e..00000000
--- a/ArmnnDriver.cpp
+++ /dev/null
@@ -1,453 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// See LICENSE file in the project root for full license information.
-//
-
-#define LOG_TAG "ArmnnDriver"
-
-#include "ArmnnDriver.hpp"
-#include "ArmnnPreparedModel.hpp"
-#include "ModelToINetworkConverter.hpp"
-#include "Utils.hpp"
-
-#include <log/log.h>
-#include "SystemPropertiesUtils.hpp"
-
-#include "OperationsUtils.h"
-
-#if defined(ARMNN_ANDROID_P)
-// The headers of the ML framework have changed between Android O and Android P.
-// The validation functions have been moved into their own header, ValidateHal.h.
-#include <ValidateHal.h>
-#endif
-
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/program_options.hpp>
-
-#include <cassert>
-#include <functional>
-#include <string>
-#include <sstream>
-
-using namespace android;
-using namespace std;
-
-namespace
-{
-
-const char *g_Float32PerformanceExecTimeName = "ArmNN.float32Performance.execTime";
-const char *g_Float32PerformancePowerUsageName = "ArmNN.float32Performance.powerUsage";
-const char *g_Quantized8PerformanceExecTimeName = "ArmNN.quantized8Performance.execTime";
-const char *g_Quantized8PerformancePowerUsageName = "ArmNN.quantized8Performance.powerUsage";
-
-}; //namespace
-
-namespace armnn_driver
-{
-
-DriverOptions::DriverOptions(armnn::Compute computeDevice)
-: m_ComputeDevice(computeDevice)
-, m_VerboseLogging(false)
-, m_UseAndroidNnCpuExecutor(false)
-, m_ClTunedParametersMode(armnn::IClTunedParameters::Mode::UseTunedParameters)
-{
-}
-
-DriverOptions::DriverOptions(int argc, char** argv)
-: m_ComputeDevice(armnn::Compute::GpuAcc)
-, m_VerboseLogging(false)
-, m_UseAndroidNnCpuExecutor(false)
-, m_ClTunedParametersMode(armnn::IClTunedParameters::Mode::UseTunedParameters)
-{
-    namespace po = boost::program_options;
-
-    std::string computeDeviceAsString;
-    std::string unsupportedOperationsAsString;
-    std::string clTunedParametersModeAsString;
-
-    po::options_description optionsDesc("Options");
-    optionsDesc.add_options()
-        ("compute,c",
-         po::value<std::string>(&computeDeviceAsString)->default_value("GpuAcc"),
-         "Which device to run layers on by default. Possible values are: CpuRef, CpuAcc, GpuAcc")
-
-        ("verbose-logging,v",
-         po::bool_switch(&m_VerboseLogging),
-         "Turns verbose logging on")
-
-        ("use-androidnn-cpu-executor,e",
-         po::bool_switch(&m_UseAndroidNnCpuExecutor),
-         "Forces the driver to satisfy requests via the Android-provided CpuExecutor")
-
-        ("request-inputs-and-outputs-dump-dir,d",
-         po::value<std::string>(&m_RequestInputsAndOutputsDumpDir)->default_value(""),
-         "If non-empty, the directory where request inputs and outputs should be dumped")
-
-        ("unsupported-operations,u",
-         po::value<std::string>(&unsupportedOperationsAsString)->default_value(""),
-         "If non-empty, a comma-separated list of operation indices which the driver will forcibly "
-         "consider unsupported")
-
-        ("cl-tuned-parameters-file,t",
-         po::value<std::string>(&m_ClTunedParametersFile)->default_value(""),
-         "If non-empty, the given file will be used to load/save CL tuned parameters. "
-         "See also --cl-tuned-parameters-mode")
-
-        ("cl-tuned-parameters-mode,m",
-         po::value<std::string>(&clTunedParametersModeAsString)->default_value("UseTunedParameters"),
-         "If 'UseTunedParameters' (the default), will read CL tuned parameters from the file specified by "
-         "--cl-tuned-parameters-file. "
-         "If 'UpdateTunedParameters', will also find the optimum parameters when preparing new networks and update "
-         "the file accordingly.");
-
-
-    po::variables_map variablesMap;
-    try
-    {
-        po::store(po::parse_command_line(argc, argv, optionsDesc), variablesMap);
-        po::notify(variablesMap);
-    }
-    catch (const po::error& e)
-    {
-        ALOGW("An error occurred attempting to parse program options: %s", e.what());
-    }
-
-    if (computeDeviceAsString == "CpuRef")
-    {
-        m_ComputeDevice = armnn::Compute::CpuRef;
-    }
-    else if (computeDeviceAsString == "GpuAcc")
-    {
-        m_ComputeDevice = armnn::Compute::GpuAcc;
-    }
-    else if (computeDeviceAsString == "CpuAcc")
-    {
-        m_ComputeDevice = armnn::Compute::CpuAcc;
-    }
-    else
-    {
-        ALOGW("Requested unknown compute device %s. Defaulting to compute id %s",
-            computeDeviceAsString.c_str(), GetComputeDeviceAsCString(m_ComputeDevice));
-    }
-
-    if (!unsupportedOperationsAsString.empty())
-    {
-        std::istringstream argStream(unsupportedOperationsAsString);
-
-        std::string s;
-        while (!argStream.eof())
-        {
-            std::getline(argStream, s, ',');
-            try
-            {
-                unsigned int operationIdx = std::stoi(s);
-                m_ForcedUnsupportedOperations.insert(operationIdx);
-            }
-            catch (const std::invalid_argument&)
-            {
-                ALOGW("Ignoring invalid integer argument in -u/--unsupported-operations value: %s", s.c_str());
-            }
-        }
-    }
-
-    if (!m_ClTunedParametersFile.empty())
-    {
-        // The mode is only relevant if the file path has been provided
-        if (clTunedParametersModeAsString == "UseTunedParameters")
-        {
-            m_ClTunedParametersMode = armnn::IClTunedParameters::Mode::UseTunedParameters;
-        }
-        else if (clTunedParametersModeAsString == "UpdateTunedParameters")
-        {
-            m_ClTunedParametersMode = armnn::IClTunedParameters::Mode::UpdateTunedParameters;
-        }
-        else
-        {
-            ALOGW("Requested unknown cl-tuned-parameters-mode '%s'. Defaulting to UseTunedParameters",
-                clTunedParametersModeAsString.c_str());
-        }
-    }
-}
-
-ArmnnDriver::ArmnnDriver(DriverOptions options)
- : m_Runtime(nullptr, nullptr)
- , m_ClTunedParameters(nullptr, nullptr)
- , m_Options(std::move(options))
-{
-    ALOGV("ArmnnDriver::ArmnnDriver()");
-
-    armnn::ConfigureLogging(false, m_Options.IsVerboseLoggingEnabled(), armnn::LogSeverity::Trace);
-    if (m_Options.IsVerboseLoggingEnabled())
-    {
-        SetMinimumLogSeverity(base::VERBOSE);
-    }
-    else
-    {
-        SetMinimumLogSeverity(base::INFO);
-    }
-
-    try
-    {
-        armnn::IRuntime::CreationOptions options(m_Options.GetComputeDevice());
-        options.m_UseCpuRefAsFallback = false;
-        if (!m_Options.GetClTunedParametersFile().empty())
-        {
-            m_ClTunedParameters = armnn::IClTunedParameters::Create(m_Options.GetClTunedParametersMode());
-            try
-            {
-                m_ClTunedParameters->Load(m_Options.GetClTunedParametersFile().c_str());
-            }
-            catch (const armnn::Exception& error)
-            {
-                // This is only a warning because the file won't exist the first time you are generating it.
-                ALOGW("ArmnnDriver: Failed to load CL tuned parameters file '%s': %s",
-                    m_Options.GetClTunedParametersFile().c_str(), error.what());
-            }
-            options.m_ClTunedParameters = m_ClTunedParameters.get();
-        }
-        m_Runtime = armnn::IRuntime::Create(options);
-    }
-    catch (const armnn::ClRuntimeUnavailableException& error)
-    {
-        ALOGE("ArmnnDriver: Failed to setup CL runtime: %s. Device will be unavailable.", error.what());
-    }
-}
-
-Return<void> ArmnnDriver::getCapabilities(V1_0::IDevice::getCapabilities_cb cb)
-{
-    ALOGV("ArmnnDriver::getCapabilities()");
-
-    V1_0::Capabilities capabilities;
-    if (m_Runtime)
-    {
-        capabilities.float32Performance.execTime =
-            ParseSystemProperty(g_Float32PerformanceExecTimeName, 1.0f);
-
-        capabilities.float32Performance.powerUsage =
-            ParseSystemProperty(g_Float32PerformancePowerUsageName, 1.0f);
-
-        capabilities.quantized8Performance.execTime =
-            ParseSystemProperty(g_Quantized8PerformanceExecTimeName, 1.0f);
-
-        capabilities.quantized8Performance.powerUsage =
-            ParseSystemProperty(g_Quantized8PerformancePowerUsageName, 1.0f);
-
-        cb(ErrorStatus::NONE, capabilities);
-    }
-    else
-    {
-        capabilities.float32Performance.execTime = 0;
-        capabilities.float32Performance.powerUsage = 0;
-        capabilities.quantized8Performance.execTime = 0;
-        capabilities.quantized8Performance.powerUsage = 0;
-
-        cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
-    }
-
-    return Void();
-}
-
-Return<void> ArmnnDriver::getSupportedOperations(const V1_0::Model& model, V1_0::IDevice::getSupportedOperations_cb cb)
-{
-    ALOGV("ArmnnDriver::getSupportedOperations()");
-
-    std::vector<bool> result;
-
-    if (!m_Runtime)
-    {
-        cb(ErrorStatus::DEVICE_UNAVAILABLE, result);
-        return Void();
-    }
-
-    // Run general model validation, if this doesn't pass we shouldn't analyse the model anyway
-    if (!android::nn::validateModel(model))
-    {
-        cb(ErrorStatus::INVALID_ARGUMENT, result);
-        return Void();
-    }
-
-    // Attempt to convert the model to an ArmNN input network (INetwork).
-    ModelToINetworkConverter modelConverter(m_Runtime->GetDeviceSpec().DefaultComputeDevice, model,
-        m_Options.GetForcedUnsupportedOperations());
-
-    if (modelConverter.GetConversionResult() != ConversionResult::Success
-        && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature)
-    {
-        cb(ErrorStatus::GENERAL_FAILURE, result);
-        return Void();
-    }
-
-    // Check each operation if it was converted successfully and copy the flags
-    // into the result (vector<bool>) that we need to return to Android
-    result.reserve(model.operations.size());
-    for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); operationIdx++)
-    {
-        bool operationSupported = modelConverter.IsOperationSupported(operationIdx);
-        result.push_back(operationSupported);
-    }
-
-    cb(ErrorStatus::NONE, result);
-    return Void();
-}
-
-namespace
-{
-
-void NotifyCallbackAndCheck(const sp<IPreparedModelCallback>& callback, ErrorStatus errorStatus,
-                            const ::android::sp<IPreparedModel>& preparedModelPtr)
-{
-    Return<void> returned = callback->notify(errorStatus, preparedModelPtr);
-    // This check is required, if the callback fails and it isn't checked it will bring down the service
-    if (!returned.isOk())
-    {
-        ALOGE("ArmnnDriver::prepareModel: hidl callback failed to return properly: %s ",
-            returned.description().c_str());
-    }
-}
-
-Return<ErrorStatus> FailPrepareModel(ErrorStatus error,
-    const std::string& message,
-    const sp<IPreparedModelCallback>& callback)
-{
-    ALOGW("ArmnnDriver::prepareModel: %s", message.c_str());
-    NotifyCallbackAndCheck(callback, error, nullptr);
-    return error;
-}
-
-}
-
-Return<ErrorStatus> ArmnnDriver::prepareModel(const V1_0::Model& model,
-    const sp<IPreparedModelCallback>& cb)
-{
-    ALOGV("ArmnnDriver::prepareModel()");
-
-    if (cb.get() == nullptr)
-    {
-        ALOGW("ArmnnDriver::prepareModel: Invalid callback passed to prepareModel");
-        return ErrorStatus::INVALID_ARGUMENT;
-    }
-
-    if (!m_Runtime)
-    {
-        return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, "ArmnnDriver::prepareModel: Device unavailable", cb);
-    }
-
-    if (!android::nn::validateModel(model))
-    {
-        return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT,
-            "ArmnnDriver::prepareModel: Invalid model passed as input", cb);
-    }
-
-    if (m_Options.UseAndroidNnCpuExecutor())
-    {
-        sp<AndroidNnCpuExecutorPreparedModel> preparedModel = new AndroidNnCpuExecutorPreparedModel(model,
-            m_Options.GetRequestInputsAndOutputsDumpDir());
-        if (preparedModel->Initialize())
-        {
-            NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel);
-            return ErrorStatus::NONE;
-        }
-        else
-        {
-            NotifyCallbackAndCheck(cb, ErrorStatus::INVALID_ARGUMENT, preparedModel);
-            return ErrorStatus::INVALID_ARGUMENT;
-        }
-    }
-
-    // Deliberately ignore any unsupported operations requested by the options -
-    // at this point we're being asked to prepare a model that we've already declared support for
-    // and the operation indices may be different to those in getSupportedOperations anyway.
-    std::set<unsigned int> unsupportedOperations;
-    ModelToINetworkConverter modelConverter(m_Runtime->GetDeviceSpec().DefaultComputeDevice, model,
-        unsupportedOperations);
-
-    if (modelConverter.GetConversionResult() != ConversionResult::Success)
-    {
-        FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
-        return ErrorStatus::NONE;
-    }
-
-    // optimize the network
-    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
-    try
-    {
-        optNet = armnn::Optimize(*modelConverter.GetINetwork(), m_Runtime->GetDeviceSpec());
-    }
-    catch (armnn::Exception& e)
-    {
-        std::stringstream message;
-        message << "armnn::Exception ("<<e.what()<<") caught from optimize.";
-        FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
-        return ErrorStatus::NONE;
-    }
-
-    // Check that the optimized network is valid.
-    if (!optNet)
-    {
-        FailPrepareModel(ErrorStatus::GENERAL_FAILURE,
-            "ArmnnDriver::prepareModel: Invalid optimized network", cb);
-        return ErrorStatus::NONE;
-    }
-
-    // Export the optimized network graph to a dot file if an output dump directory
-    // has been specified in the drivers' arguments.
-    ExportNetworkGraphToDotFile(*optNet,
-                                m_Options.GetRequestInputsAndOutputsDumpDir(),
-                                model);
-
-    // load it into the runtime
-    armnn::NetworkId netId = 0;
-    try
-    {
-        if (m_Runtime->LoadNetwork(netId, std::move(optNet)) != armnn::Status::Success)
-        {
-            return FailPrepareModel(ErrorStatus::GENERAL_FAILURE,
-                "ArmnnDriver::prepareModel: Network could not be loaded", cb);
-        }
-    }
-    catch (armnn::Exception& e)
-    {
-        std::stringstream message;
-        message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork.";
-        FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
-        return ErrorStatus::NONE;
-    }
-
-    std::unique_ptr<ArmnnPreparedModel> preparedModel(new ArmnnPreparedModel(
-        netId,
-        m_Runtime.get(),
-        model,
-        m_Options.GetRequestInputsAndOutputsDumpDir()
-    ));
-
-    // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
-    // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    preparedModel->ExecuteWithDummyInputs();
-
-    if (m_ClTunedParameters &&
-        m_Options.GetClTunedParametersMode() == armnn::IClTunedParameters::Mode::UpdateTunedParameters)
-    {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
-        {
-            m_ClTunedParameters->Save(m_Options.GetClTunedParametersFile().c_str());
-        }
-        catch (const armnn::Exception& error)
-        {
-            ALOGE("ArmnnDriver: Failed to save CL tuned parameters file '%s': %s",
-                m_Options.GetClTunedParametersFile().c_str(), error.what());
-        }
-    }
-
-    NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release());
-
-    return ErrorStatus::NONE;
-}
-
-Return<DeviceStatus> ArmnnDriver::getStatus()
-{
-    ALOGV("ArmnnDriver::getStatus()");
-    return DeviceStatus::AVAILABLE;
-}
-
-}
diff --git a/ArmnnDriver.hpp b/ArmnnDriver.hpp
index e8dc3bfb..32a10a47 100644
--- a/ArmnnDriver.hpp
+++ b/ArmnnDriver.hpp
@@ -5,61 +5,48 @@
 
 #pragma once
 
-#include "HalInterfaces.h"
-#include "NeuralNetworks.h"
-#include <armnn/ArmNN.hpp>
+#include <HalInterfaces.h>
 
-#include <memory>
-#include <set>
-#include <string>
+#include <log/log.h>
 
-// For Android O, explicitly declare the V1_0 HAL namespace to shorten type declarations,
-// as the namespace is not defined in HalInterfaces.h.
-namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
+#if defined(ARMNN_ANDROID_NN_V1_1) // Using ::android::hardware::neuralnetworks::V1_1.
+
+#include "1.1/ArmnnDriver.hpp"
 
 namespace armnn_driver
 {
 
-class DriverOptions
+class ArmnnDriver : public V1_1::ArmnnDriver
 {
 public:
-    DriverOptions(armnn::Compute computeDevice);
-    DriverOptions(int argc, char** argv);
-    DriverOptions(DriverOptions&& other) = default;
+    ArmnnDriver(DriverOptions options)
+        : V1_1::ArmnnDriver(std::move(options))
+    {
+        ALOGV("ArmnnDriver::ArmnnDriver()");
+    }
+    ~ArmnnDriver() {}
+};
 
-    armnn::Compute GetComputeDevice() const { return m_ComputeDevice; }
-    bool IsVerboseLoggingEnabled() const { return m_VerboseLogging; }
-    const std::string& GetRequestInputsAndOutputsDumpDir() const { return m_RequestInputsAndOutputsDumpDir; }
-    bool UseAndroidNnCpuExecutor() const { return m_UseAndroidNnCpuExecutor; }
-    const std::set<unsigned int>& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; }
-    const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; }
-    armnn::IClTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; }
+} // namespace armnn_driver
 
-private:
-    armnn::Compute m_ComputeDevice;
-    bool m_VerboseLogging;
-    bool m_UseAndroidNnCpuExecutor;
-    std::string m_RequestInputsAndOutputsDumpDir;
-    std::set<unsigned int> m_ForcedUnsupportedOperations;
-    std::string m_ClTunedParametersFile;
-    armnn::IClTunedParameters::Mode m_ClTunedParametersMode;
-};
+#else // Fallback to ::android::hardware::neuralnetworks::V1_0.
 
-class ArmnnDriver : public V1_0::IDevice {
-public:
-    ArmnnDriver(DriverOptions options);
-    virtual ~ArmnnDriver() {}
-    virtual Return<void> getCapabilities(V1_0::IDevice::getCapabilities_cb _hidl_cb) override;
-    virtual Return<void> getSupportedOperations(const V1_0::Model &model,
-                                                V1_0::IDevice::getSupportedOperations_cb _hidl_cb) override;
-    virtual Return<ErrorStatus> prepareModel(const V1_0::Model &model,
-                                      const android::sp<IPreparedModelCallback>& callback);
-    virtual Return<DeviceStatus> getStatus() override;
+#include "1.0/ArmnnDriver.hpp"
+
+namespace armnn_driver
+{
 
-private:
-    armnn::IRuntimePtr m_Runtime;
-    armnn::IClTunedParametersPtr m_ClTunedParameters;
-    DriverOptions m_Options;
+class ArmnnDriver : public V1_0::ArmnnDriver
+{
+public:
+    ArmnnDriver(DriverOptions options)
+        : V1_0::ArmnnDriver(std::move(options))
+    {
+        ALOGV("ArmnnDriver::ArmnnDriver()");
+    }
+    ~ArmnnDriver() {}
 };
 
-}
+} // namespace armnn_driver
+
+#endif
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 3aad955b..d338fdc8 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -83,6 +83,8 @@ inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t ind
 
 }
 
+using namespace android::hardware;
+
 namespace armnn_driver
 {
 
@@ -106,21 +108,31 @@ void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix,
 }
 
 ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
-    armnn::IRuntime* runtime,
-    const V1_0::Model& model,
-    const std::string& requestInputsAndOutputsDumpDir)
-: m_NetworkId(networkId)
-, m_Runtime(runtime)
-, m_Model(model)
-, m_RequestCount(0)
-, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+                                       armnn::IRuntime* runtime,
+                                       const neuralnetworks::V1_0::Model& model,
+                                       const std::string& requestInputsAndOutputsDumpDir,
+                                       const bool gpuProfilingEnabled)
+    : m_NetworkId(networkId)
+    , m_Runtime(runtime)
+    , m_Model(model)
+    , m_RequestCount(0)
+    , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+    , m_GpuProfilingEnabled(gpuProfilingEnabled)
 {
+    // Enable profiling if required.
+    m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
 }
 
 ArmnnPreparedModel::~ArmnnPreparedModel()
 {
-    //unload the network associated with this model
+    // Get a hold of the profiler used by this model.
+    std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+
+    // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
+
+    // Dump the profiling info to a file if required.
+    DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
 }
 
 Return<ErrorStatus> ArmnnPreparedModel::execute(const Request& request,
@@ -275,85 +287,4 @@ void ArmnnPreparedModel::ExecuteWithDummyInputs()
     }
 }
 
-AndroidNnCpuExecutorPreparedModel::AndroidNnCpuExecutorPreparedModel(const V1_0::Model& model,
-    const std::string& requestInputsAndOutputsDumpDir)
-: m_Model(model)
-, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
-, m_RequestCount(0)
-{
-}
-
-bool AndroidNnCpuExecutorPreparedModel::Initialize()
-{
-    return setRunTimePoolInfosFromHidlMemories(&m_ModelPoolInfos, m_Model.pools);
-}
-
-Return<ErrorStatus> AndroidNnCpuExecutorPreparedModel::execute(const Request& request,
-    const ::android::sp<IExecutionCallback>& callback)
-{
-    m_RequestCount++;
-    std::vector<android::nn::RunTimePoolInfo> requestPoolInfos;
-
-    if (!setRunTimePoolInfosFromHidlMemories(&requestPoolInfos, request.pools))
-    {
-        NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "AndroidNnCpuExecutorPreparedModel::execute");
-        return ErrorStatus::GENERAL_FAILURE;
-    }
-
-    if (!m_RequestInputsAndOutputsDumpDir.empty())
-    {
-        ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
-    }
-
-    DumpTensorsIfRequired(
-        "Input",
-        m_Model.inputIndexes,
-        request.inputs,
-        requestPoolInfos);
-
-    android::nn::CpuExecutor executor;
-    const int n = executor.run(m_Model, request, m_ModelPoolInfos, requestPoolInfos);
-    ErrorStatus executionStatus =
-            n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
-
-    DumpTensorsIfRequired(
-        "Output",
-        m_Model.outputIndexes,
-        request.outputs,
-        requestPoolInfos);
-
-    NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "AndroidNnCpuExecutorPreparedModel::execute");
-    return executionStatus;
-}
-
-void AndroidNnCpuExecutorPreparedModel::DumpTensorsIfRequired(
-    char const* tensorNamePrefix,
-    const hidl_vec<uint32_t>& operandIndices,
-    const hidl_vec<RequestArgument>& requestArgs,
-    const std::vector<android::nn::RunTimePoolInfo>& requestPoolInfos)
-{
-    if (m_RequestInputsAndOutputsDumpDir.empty())
-    {
-        return;
-    }
-
-    for (std::size_t i = 0; i < requestArgs.size(); ++i)
-    {
-        const Operand& operand = m_Model.operands[operandIndices[i]];
-        const armnn::TensorInfo tensorInfo = GetTensorInfoForOperand(operand);
-        const armnn::Tensor tensor = GetTensorForRequestArgument(requestArgs[i], tensorInfo, requestPoolInfos);
-        const std::string tensorName = BuildTensorName(tensorNamePrefix, i);
-        if (tensor.GetMemoryArea() != nullptr)
-        {
-            std::string requestName = boost::str(boost::format("%1%_%2%.dump") % this % m_RequestCount);
-            DumpTensor(m_RequestInputsAndOutputsDumpDir, requestName, tensorName, tensor);
-        }
-        else
-        {
-            ALOGE("Cannot dump tensor %s. An error occurred converting the associated request argument to a tensor.",
-                tensorName.c_str());
-        }
-    }
-}
-
-}
+} // namespace armnn_driver
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index f7644b95..a700e54d 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -7,12 +7,11 @@
 
 #include "RequestThread.hpp"
 
-#include "HalInterfaces.h"
-#include "NeuralNetworks.h"
-#include <armnn/ArmNN.hpp>
-
 #include "ArmnnDriver.hpp"
 
+#include <NeuralNetworks.h>
+#include <armnn/ArmNN.hpp>
+
 #include <string>
 #include <vector>
 
@@ -24,8 +23,9 @@ class ArmnnPreparedModel : public IPreparedModel
 public:
     ArmnnPreparedModel(armnn::NetworkId networkId,
                        armnn::IRuntime* runtime,
-                       const V1_0::Model& model,
-                       const std::string& requestInputsAndOutputsDumpDir);
+                       const ::android::hardware::neuralnetworks::V1_0::Model& model,
+                       const std::string& requestInputsAndOutputsDumpDir,
+                       const bool gpuProfilingEnabled);
 
     virtual ~ArmnnPreparedModel();
 
@@ -46,40 +46,15 @@ private:
     template <typename TensorBindingCollection>
     void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
 
-    armnn::NetworkId     m_NetworkId;
-    armnn::IRuntime*     m_Runtime;
-    V1_0::Model          m_Model;
+    armnn::NetworkId                                 m_NetworkId;
+    armnn::IRuntime*                                 m_Runtime;
+    ::android::hardware::neuralnetworks::V1_0::Model m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
-    static RequestThread m_RequestThread;
-    uint32_t             m_RequestCount;
-    const std::string&   m_RequestInputsAndOutputsDumpDir;
-};
-
-class AndroidNnCpuExecutorPreparedModel : public IPreparedModel
-{
-public:
-
-    AndroidNnCpuExecutorPreparedModel(const V1_0::Model& model, const std::string& requestInputsAndOutputsDumpDir);
-    virtual ~AndroidNnCpuExecutorPreparedModel() { }
-
-    bool Initialize();
-
-    virtual Return<ErrorStatus> execute(const Request& request,
-                                        const ::android::sp<IExecutionCallback>& callback) override;
-
-private:
-
-    void DumpTensorsIfRequired(
-        char const* tensorNamePrefix,
-        const hidl_vec<uint32_t>& operandIndices,
-        const hidl_vec<RequestArgument>& requestArgs,
-        const std::vector<android::nn::RunTimePoolInfo>& requestPoolInfos);
-
-    V1_0::Model m_Model;
-    std::vector<android::nn::RunTimePoolInfo> m_ModelPoolInfos;
-    const std::string& m_RequestInputsAndOutputsDumpDir;
-    uint32_t m_RequestCount;
+    static RequestThread                             m_RequestThread;
+    uint32_t                                         m_RequestCount;
+    const std::string&                               m_RequestInputsAndOutputsDumpDir;
+    const bool                                       m_GpuProfilingEnabled;
 };
 
 }
diff --git a/DriverOptions.cpp b/DriverOptions.cpp
new file mode 100644
index 00000000..3d397e37
--- /dev/null
+++ b/DriverOptions.cpp
@@ -0,0 +1,159 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "DriverOptions.hpp"
+#include "Utils.hpp"
+
+#include <log/log.h>
+#include "SystemPropertiesUtils.hpp"
+
+#include <OperationsUtils.h>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
+#include <cassert>
+#include <functional>
+#include <string>
+#include <sstream>
+
+using namespace android;
+using namespace std;
+
+namespace armnn_driver
+{
+
+DriverOptions::DriverOptions(armnn::Compute computeDevice)
+    : m_ComputeDevice(computeDevice)
+    , m_VerboseLogging(false)
+    , m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+    , m_EnableGpuProfiling(false)
+    , m_fp16Enabled(false)
+{
+}
+
+DriverOptions::DriverOptions(int argc, char** argv)
+    : m_ComputeDevice(armnn::Compute::GpuAcc)
+    , m_VerboseLogging(false)
+    , m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+    , m_EnableGpuProfiling(false)
+    , m_fp16Enabled(false)
+{
+    namespace po = boost::program_options;
+
+    std::string computeDeviceAsString;
+    std::string unsupportedOperationsAsString;
+    std::string clTunedParametersModeAsString;
+
+    po::options_description optionsDesc("Options");
+    optionsDesc.add_options()
+        ("compute,c",
+         po::value<std::string>(&computeDeviceAsString)->default_value("GpuAcc"),
+         "Which device to run layers on by default. Possible values are: CpuRef, CpuAcc, GpuAcc")
+
+        ("verbose-logging,v",
+         po::bool_switch(&m_VerboseLogging),
+         "Turns verbose logging on")
+
+        ("request-inputs-and-outputs-dump-dir,d",
+         po::value<std::string>(&m_RequestInputsAndOutputsDumpDir)->default_value(""),
+         "If non-empty, the directory where request inputs and outputs should be dumped")
+
+        ("unsupported-operations,u",
+         po::value<std::string>(&unsupportedOperationsAsString)->default_value(""),
+         "If non-empty, a comma-separated list of operation indices which the driver will forcibly "
+         "consider unsupported")
+
+        ("cl-tuned-parameters-file,t",
+         po::value<std::string>(&m_ClTunedParametersFile)->default_value(""),
+         "If non-empty, the given file will be used to load/save CL tuned parameters. "
+         "See also --cl-tuned-parameters-mode")
+
+        ("cl-tuned-parameters-mode,m",
+         po::value<std::string>(&clTunedParametersModeAsString)->default_value("UseTunedParameters"),
+         "If 'UseTunedParameters' (the default), will read CL tuned parameters from the file specified by "
+         "--cl-tuned-parameters-file. "
+         "If 'UpdateTunedParameters', will also find the optimum parameters when preparing new networks and update "
+         "the file accordingly.")
+
+        ("gpu-profiling,p",
+         po::bool_switch(&m_EnableGpuProfiling),
+         "Turns GPU profiling on");
+
+        ("fp16-enabled,f",
+         po::bool_switch(&m_fp16Enabled),
+         "Enables support for relaxed computation from Float32 to Float16");
+
+    po::variables_map variablesMap;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, optionsDesc), variablesMap);
+        po::notify(variablesMap);
+    }
+    catch (const po::error& e)
+    {
+        ALOGW("An error occurred attempting to parse program options: %s", e.what());
+    }
+
+    if (computeDeviceAsString == "CpuRef")
+    {
+        m_ComputeDevice = armnn::Compute::CpuRef;
+    }
+    else if (computeDeviceAsString == "GpuAcc")
+    {
+        m_ComputeDevice = armnn::Compute::GpuAcc;
+    }
+    else if (computeDeviceAsString == "CpuAcc")
+    {
+        m_ComputeDevice = armnn::Compute::CpuAcc;
+    }
+    else
+    {
+        ALOGW("Requested unknown compute device %s. Defaulting to compute id %s",
+            computeDeviceAsString.c_str(), GetComputeDeviceAsCString(m_ComputeDevice));
+    }
+
+    if (!unsupportedOperationsAsString.empty())
+    {
+        std::istringstream argStream(unsupportedOperationsAsString);
+
+        std::string s;
+        while (!argStream.eof())
+        {
+            std::getline(argStream, s, ',');
+            try
+            {
+                unsigned int operationIdx = std::stoi(s);
+                m_ForcedUnsupportedOperations.insert(operationIdx);
+            }
+            catch (const std::invalid_argument&)
+            {
+                ALOGW("Ignoring invalid integer argument in -u/--unsupported-operations value: %s", s.c_str());
+            }
+        }
+    }
+
+    if (!m_ClTunedParametersFile.empty())
+    {
+        // The mode is only relevant if the file path has been provided
+        if (clTunedParametersModeAsString == "UseTunedParameters")
+        {
+            m_ClTunedParametersMode = armnn::IGpuAccTunedParameters::Mode::UseTunedParameters;
+        }
+        else if (clTunedParametersModeAsString == "UpdateTunedParameters")
+        {
+            m_ClTunedParametersMode = armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters;
+        }
+        else
+        {
+            ALOGW("Requested unknown cl-tuned-parameters-mode '%s'. Defaulting to UseTunedParameters",
+                clTunedParametersModeAsString.c_str());
+        }
+    }
+}
+
+} // namespace armnn_driver
diff --git a/DriverOptions.hpp b/DriverOptions.hpp
new file mode 100644
index 00000000..4a378b26
--- /dev/null
+++ b/DriverOptions.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/ArmNN.hpp>
+
+#include <set>
+#include <string>
+
+namespace armnn_driver
+{
+
+class DriverOptions
+{
+public:
+    DriverOptions(armnn::Compute computeDevice);
+    DriverOptions(int argc, char** argv);
+    DriverOptions(DriverOptions&& other) = default;
+
+    armnn::Compute GetComputeDevice() const { return m_ComputeDevice; }
+    bool IsVerboseLoggingEnabled() const { return m_VerboseLogging; }
+    const std::string& GetRequestInputsAndOutputsDumpDir() const { return m_RequestInputsAndOutputsDumpDir; }
+    const std::set<unsigned int>& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; }
+    const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; }
+    armnn::IGpuAccTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; }
+    bool IsGpuProfilingEnabled() const { return m_EnableGpuProfiling; }
+    bool GetFp16Enabled() const { return m_fp16Enabled; }
+
+private:
+    armnn::Compute m_ComputeDevice;
+    bool m_VerboseLogging;
+    std::string m_RequestInputsAndOutputsDumpDir;
+    std::set<unsigned int> m_ForcedUnsupportedOperations;
+    std::string m_ClTunedParametersFile;
+    armnn::IGpuAccTunedParameters::Mode m_ClTunedParametersMode;
+    bool m_EnableGpuProfiling;
+    bool m_fp16Enabled;
+};
+
+} // namespace armnn_driver
diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp
index fe4e8ac1..3da56ef8 100644
--- a/ModelToINetworkConverter.cpp
+++ b/ModelToINetworkConverter.cpp
@@ -6,7 +6,7 @@
 #define LOG_TAG "ArmnnDriver"
 
 #include "ModelToINetworkConverter.hpp"
-#include "OperationsUtils.h"
+#include <OperationsUtils.h>
 
 #include <armnn/LayerSupport.hpp>
 #include <Permute.hpp>
@@ -19,6 +19,8 @@
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/cast.hpp>
 
+using namespace android::hardware;
+
 namespace armnn_driver
 {
 class LayerInputHandle
@@ -105,45 +107,66 @@ inline bool IsOperandTypeSupportedForTensors(OperandType type)
            type == OperandType::TENSOR_INT32;
 }
 
-void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail,
-                 android::nn::PaddingScheme scheme)
-{
-    int32_t padHead;
-    int32_t padTail;
-    calculateExplicitPadding(input, stride, kernel, scheme, &padHead, &padTail);
-    outPadHead = boost::numeric_cast<uint32_t>(padHead);
-    outPadTail = boost::numeric_cast<uint32_t>(padTail);
-}
-
-bool ValidateBroadcast(const V1_0::Model& model, const V1_0::Operation& operation, uint32_t numInputs)
+void BroadcastTensor(LayerInputHandle& input0, LayerInputHandle& input1, armnn::IConnectableLayer* startLayer,
+                     armnn::INetwork& network)
 {
-    assert(operation.inputs.size() > 0); // This should have been validated by the caller
-    // validateModel() has been called already so we know the operation.inputs indexes are valid within model.operands.
-    const Operand& firstInput = model.operands[operation.inputs[0]];
+    BOOST_ASSERT(startLayer != nullptr);
+    const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo();
+    const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo();
 
-    // We don't support broadcasting yet - we require all input operands to have the same shape
-    for (uint32_t i = 1; i < numInputs; ++i)
+    if (inputTensorInfo0.GetNumDimensions() != inputTensorInfo1.GetNumDimensions())
     {
-        const Operand& otherInput = model.operands[operation.inputs[i]];
+        // If the number of dimensions do not match then we need to add degenerate dimensions
+        // to the "smaller" tensor using a reshape:
+        //   Small  Big
+        //     |     |
+        //  Reshape  |
+        //      \   /
+        //       Add
+        bool input0IsBigger = inputTensorInfo0.GetNumDimensions() > inputTensorInfo1.GetNumDimensions();
 
-        if (firstInput.dimensions.size() != otherInput.dimensions.size())
-        {
-            return Fail("%s: Broadcasting not supported (Input 0 dims: %i Input %i dims: %i)",
-                __func__, firstInput.dimensions.size(), i, otherInput.dimensions.size());
-        }
+        LayerInputHandle& smallTensorHandle = input0IsBigger ? input1 : input0;
+        const armnn::TensorInfo& smallTensorDims = smallTensorHandle.GetTensorInfo();
+
+        LayerInputHandle& bigTensorHandle =  input0IsBigger ? input0 : input1;
+        const armnn::TensorInfo& bigTensorDims = bigTensorHandle.GetTensorInfo();
 
-        for (unsigned int d = 0; d < firstInput.dimensions.size(); ++d)
+        const unsigned int bigTensorDimsNumber = bigTensorDims.GetNumDimensions();
+        std::vector<unsigned int> reshapedDims(bigTensorDimsNumber, 1);
+        unsigned int sizeDifference = bigTensorDimsNumber - smallTensorDims.GetNumDimensions();
+        for (unsigned i = sizeDifference; i < bigTensorDimsNumber; ++i)
         {
-            if (firstInput.dimensions[d] != otherInput.dimensions[d])
-            {
-                return Fail("%s: Broadcasting not supported (Dimension %i size mismatch. "
-                    "Input 0: %i Input %i: %i)",
-                    __func__, d, firstInput.dimensions[d], i, otherInput.dimensions[d]);
-            }
+            reshapedDims[i] = smallTensorDims.GetShape()[i-sizeDifference];
         }
+        armnn::TensorInfo reshapedInfo = smallTensorDims;
+        reshapedInfo.SetShape(armnn::TensorShape{ static_cast<unsigned int>(reshapedDims.size()),
+                                                  reshapedDims.data() });
+
+        armnn::ReshapeDescriptor reshapeDesc;
+        reshapeDesc.m_TargetShape = reshapedInfo.GetShape();
+        armnn::IConnectableLayer* const reshapeLayer = network.AddReshapeLayer(reshapeDesc);
+        smallTensorHandle.Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
+
+        // Connect the outputs from new reshape and original input layer
+        reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
+        bigTensorHandle.Connect(startLayer->GetInputSlot(1));
     }
+    else
+    {
+        input0.Connect(startLayer->GetInputSlot(0));
+        input1.Connect(startLayer->GetInputSlot(1));
+    }
+}
 
-    return true;
+void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail,
+                 android::nn::PaddingScheme scheme)
+{
+    int32_t padHead;
+    int32_t padTail;
+    calculateExplicitPadding(input, stride, kernel, scheme, &padHead, &padTail);
+    outPadHead = boost::numeric_cast<uint32_t>(padHead);
+    outPadTail = boost::numeric_cast<uint32_t>(padTail);
 }
 
 Shape GetOperandShape(const Operand& operand)
@@ -175,11 +198,17 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
     }
 }
 
-const armnn::PermutationVector IdentityPermutation({ 0U, 1U, 2U, 3U });
+// 4D Tensor Permutations
+const armnn::PermutationVector IdentityPermutation4D({ 0U, 1U, 2U, 3U });
 const armnn::PermutationVector NHWCToArmNN({ 0U, 2U, 3U, 1U });
 const armnn::PermutationVector ArmNNToNHWC({ 0U, 3U, 1U, 2U });
 const armnn::PermutationVector SwapDim1And2({ 0U, 2U, 1U, 3U });
 
+// 3D Permutation Vectors
+const armnn::PermutationVector IdentityPermutation3D({ 0U, 1U, 2U });
+const armnn::PermutationVector RotateTensorLeft({ 2U, 0U, 1U });
+const armnn::PermutationVector RotateTensorRight({ 1U, 2U, 0U });
+
 template <typename OSlot>
 armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input,
                                           const armnn::PermutationVector& mappings)
@@ -189,7 +218,7 @@ armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input
 
     assert(layer != nullptr);
 
-    // Connect intput to swizzle layer
+    // Connect input to swizzle layer
     input.Connect(layer->GetInputSlot(0));
 
     // Setup swizzled output
@@ -199,22 +228,32 @@ armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input
     return *layer;
 }
 
-armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerInputHandle& input,
-                                                armnn::IConnectableLayer& firstLayer,
-                                                armnn::IConnectableLayer& lastLayer)
+void SwizzleIn(armnn::INetwork& network, LayerInputHandle& input, armnn::IConnectableLayer& layer, unsigned int index)
 {
     // Add swizzle layer
     armnn::IConnectableLayer& swizzleLayer = AddPermuteLayer(network, input, NHWCToArmNN);
-
     // Connect swizzled input to layer
-    swizzleLayer.GetOutputSlot(0).Connect(firstLayer.GetInputSlot(0));
+    swizzleLayer.GetOutputSlot(0).Connect(layer.GetInputSlot(index));
+}
 
+armnn::IConnectableLayer& DeswizzleOut(armnn::INetwork& network, armnn::IConnectableLayer& layer, unsigned int index)
+{
     // Add deswizzle layer
-    armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(network, lastLayer.GetOutputSlot(0), ArmNNToNHWC);
-
+    armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(network, layer.GetOutputSlot(index), ArmNNToNHWC);
     return deswizzleLayer;
 }
 
+// only suitable for input/output slot index 0, for other slots, use SwizzleIn and DeswizzleOut directly
+armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network,
+                                                LayerInputHandle& input,
+                                                armnn::IConnectableLayer& firstLayer,
+                                                armnn::IConnectableLayer& lastLayer)
+{
+    SwizzleIn(network, input, firstLayer, 0);
+    return DeswizzleOut(network, lastLayer, 0);
+}
+
+// only suitable for input/output slot index 0, for other slots, use SwizzleIn and DeswizzleOut directly
 armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerInputHandle& input,
                                                 armnn::IConnectableLayer& layer)
 {
@@ -264,12 +303,34 @@ bool ValidateConcatOutputShape(const std::vector<armnn::TensorShape> & inputShap
     return true;
 }
 
+bool RequiresReshape(armnn::TensorShape & inputShape)
+{
+    return inputShape.GetNumDimensions() < 3;
+}
+
+template <typename OSlot>
+armnn::IConnectableLayer& AddReshapeLayer(armnn::INetwork& network, OSlot& inputLayer,
+                                          armnn::TensorInfo reshapeInfo)
+{
+    armnn::ReshapeDescriptor reshapeDescriptor;
+    reshapeDescriptor.m_TargetShape = reshapeInfo.GetShape();
+
+    armnn::IConnectableLayer* reshapeLayer = network.AddReshapeLayer(reshapeDescriptor);
+    assert(reshapeLayer != nullptr);
+
+    // Attach the input layer to the reshape layer
+    inputLayer.Connect(reshapeLayer->GetInputSlot(0));
+    reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapeInfo);
+
+    return *reshapeLayer;
+}
+
 void SwizzleInputs(armnn::INetwork& network,
                    std::vector<LayerInputHandle>& inputs,
                    std::vector<armnn::TensorShape>& inputShapes,
                    const armnn::PermutationVector& mapping)
 {
-    if (!mapping.IsEqual(IdentityPermutation))
+    if (!mapping.IsEqual(IdentityPermutation4D))
     {
         size_t nInputs = inputs.size();
         for (size_t i=0; i<nInputs; ++i)
@@ -285,6 +346,53 @@ void SwizzleInputs(armnn::INetwork& network,
     }
 }
 
+void CreatePermutationParameters(const unsigned int numberOfDimensions,
+                       int32_t & concatDimension,
+                       std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutationPair)
+{
+    assert(numberOfDimensions >= 3);
+
+    // ArmNN uses Compute Library subtensors to perform concatenation
+    // This only works when concatenating along dimension 0 or 1 for a 4-D tensor,
+    // or along dimension 0 for a 3-D tensor.
+    if (numberOfDimensions == 4)
+    {
+        if (concatDimension == 3)
+        {
+            concatDimension = 1;
+            permutationPair = std::make_pair(NHWCToArmNN, ArmNNToNHWC);
+        }
+        else if (concatDimension == 2)
+        {
+            concatDimension = 1;
+            permutationPair = std::make_pair(SwapDim1And2, SwapDim1And2);
+        }
+        else
+        {
+            permutationPair = std::make_pair(IdentityPermutation4D, IdentityPermutation4D);
+        }
+
+    }
+    else if (numberOfDimensions == 3)
+    {
+        if (concatDimension == 2)
+        {
+            concatDimension = 0;
+            permutationPair = std::make_pair(RotateTensorRight, RotateTensorLeft);
+        }
+        else if (concatDimension == 1)
+        {
+            concatDimension = 0;
+            permutationPair = std::make_pair(RotateTensorLeft, RotateTensorRight);
+        }
+        else
+        {
+            permutationPair = std::make_pair(IdentityPermutation3D, IdentityPermutation3D);
+        }
+    }
+}
+
+
 } // namespace
 
 namespace armnn_driver
@@ -294,7 +402,8 @@ class ConstTensorPin
 {
 public:
     // Creates an invalid tensor pin (can be used to signal errors)
-    ConstTensorPin() {}
+    // The optional flag can be set to indicate the tensor values were missing, but it was otherwise valid
+    ConstTensorPin(bool optional = false) : m_Optional(optional) {}
 
     // @param tensorInfo TensorInfo associated with the tensor.
     // @param valueStart Start address of tensor data. Belongs to one of the memory pools associated with
@@ -324,7 +433,17 @@ public:
     ConstTensorPin(ConstTensorPin&& other) = default;
 
     bool IsValid() const { return m_ConstTensor.GetMemoryArea() != nullptr; }
+    bool IsOptional() const { return m_Optional; }
     const armnn::ConstTensor& GetConstTensor() const { return m_ConstTensor; }
+    const armnn::ConstTensor* GetConstTensorPtr() const
+    {
+        if (IsValid() && m_ConstTensor.GetNumElements() > 0)
+        {
+            return &m_ConstTensor;
+        }
+        // tensor is either invalid, or has no elements (indicating an optional tensor that was not provided)
+        return nullptr;
+    }
 
 private:
     armnn::ConstTensor m_ConstTensor;
@@ -332,9 +451,12 @@ private:
     // swizzling. Otherwise, @ref m_ConstTensor will reference memory from one of
     // the pools associated with the model being converted.
     std::vector<uint8_t> m_SwizzledTensorData;
+    // optional flag to indicate that an invalid tensor pin is not an error, but the optional values were not given
+    bool m_Optional;
 };
 
-ModelToINetworkConverter::ModelToINetworkConverter(armnn::Compute compute, const V1_0::Model& model,
+ModelToINetworkConverter::ModelToINetworkConverter(armnn::Compute compute,
+    const neuralnetworks::V1_0::Model& model,
     const std::set<unsigned int>& forcedUnsupportedOperations)
     : m_Compute(compute)
     , m_Model(model)
@@ -471,37 +593,59 @@ void ModelToINetworkConverter::Convert()
     }
 }
 
-bool ModelToINetworkConverter::ConvertOperation(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertOperation(const neuralnetworks::V1_0::Operation& operation)
 {
     switch (operation.type)
     {
-        case V1_0::OperationType::ADD: return ConvertAdd(operation);
-        case V1_0::OperationType::AVERAGE_POOL_2D: return ConvertAveragePool2d(operation);
-        case V1_0::OperationType::CONCATENATION: return ConvertConcatenation(operation);
-        case V1_0::OperationType::CONV_2D: return ConvertConv2d(operation);
-        case V1_0::OperationType::DEPTHWISE_CONV_2D: return ConvertDepthwiseConv2d(operation);
-        case V1_0::OperationType::FLOOR: return ConvertFloor(operation);
-        case V1_0::OperationType::FULLY_CONNECTED: return ConvertFullyConnected(operation);
-        case V1_0::OperationType::LOCAL_RESPONSE_NORMALIZATION: return ConvertLocalResponseNormalization(operation);
-        case V1_0::OperationType::LOGISTIC: return ConvertLogistic(operation);
-        case V1_0::OperationType::L2_NORMALIZATION: return ConvertL2Normalization(operation);
-        case V1_0::OperationType::L2_POOL_2D: return ConvertL2Pool2d(operation);
-        case V1_0::OperationType::MAX_POOL_2D: return ConvertMaxPool2d(operation);
-        case V1_0::OperationType::MUL: return ConvertMul(operation);
-        case V1_0::OperationType::RELU: return ConvertReLu(operation);
-        case V1_0::OperationType::RELU1: return ConvertReLu1(operation);
-        case V1_0::OperationType::RELU6: return ConvertReLu6(operation);
-        case V1_0::OperationType::SOFTMAX: return ConvertSoftmax(operation);
-        case V1_0::OperationType::TANH: return ConvertTanH(operation);
-        case V1_0::OperationType::RESHAPE: return ConvertReshape(operation);
-        case V1_0::OperationType::RESIZE_BILINEAR: return ConvertResizeBilinear(operation);
-        default: return Fail("%s: Operation type %s not supported in ArmnnDriver",
-            __func__, toString(operation.type).c_str());
+        case neuralnetworks::V1_0::OperationType::ADD:
+            return ConvertAdd(operation);
+        case neuralnetworks::V1_0::OperationType::AVERAGE_POOL_2D:
+            return ConvertAveragePool2d(operation);
+        case neuralnetworks::V1_0::OperationType::CONCATENATION:
+            return ConvertConcatenation(operation);
+        case neuralnetworks::V1_0::OperationType::CONV_2D:
+            return ConvertConv2d(operation);
+        case neuralnetworks::V1_0::OperationType::DEPTHWISE_CONV_2D:
+            return ConvertDepthwiseConv2d(operation);
+        case neuralnetworks::V1_0::OperationType::FLOOR:
+            return ConvertFloor(operation);
+        case neuralnetworks::V1_0::OperationType::FULLY_CONNECTED:
+            return ConvertFullyConnected(operation);
+        case neuralnetworks::V1_0::OperationType::LOCAL_RESPONSE_NORMALIZATION:
+            return ConvertLocalResponseNormalization(operation);
+        case neuralnetworks::V1_0::OperationType::LOGISTIC:
+            return ConvertLogistic(operation);
+        case neuralnetworks::V1_0::OperationType::LSTM:
+            return ConvertLstm(operation);
+        case neuralnetworks::V1_0::OperationType::L2_NORMALIZATION:
+            return ConvertL2Normalization(operation);
+        case neuralnetworks::V1_0::OperationType::L2_POOL_2D:
+            return ConvertL2Pool2d(operation);
+        case neuralnetworks::V1_0::OperationType::MAX_POOL_2D:
+            return ConvertMaxPool2d(operation);
+        case neuralnetworks::V1_0::OperationType::MUL:
+            return ConvertMul(operation);
+        case neuralnetworks::V1_0::OperationType::RELU:
+            return ConvertReLu(operation);
+        case neuralnetworks::V1_0::OperationType::RELU1:
+            return ConvertReLu1(operation);
+        case neuralnetworks::V1_0::OperationType::RELU6:
+            return ConvertReLu6(operation);
+        case neuralnetworks::V1_0::OperationType::SOFTMAX:
+            return ConvertSoftmax(operation);
+        case neuralnetworks::V1_0::OperationType::TANH:
+            return ConvertTanH(operation);
+        case neuralnetworks::V1_0::OperationType::RESHAPE:
+            return ConvertReshape(operation);
+        case neuralnetworks::V1_0::OperationType::RESIZE_BILINEAR:
+            return ConvertResizeBilinear(operation);
+        default:
+            return Fail("%s: Operation type %s not supported in ArmnnDriver",
+                        __func__, toString(operation.type).c_str());
     }
 }
 
-
-bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertAdd(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0);
     LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1);
@@ -511,8 +655,10 @@ bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation)
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
+    // The FuseActivation parameter is always the input index 2
+    // and it should be optional
     ActivationFn activationFunction;
-    if (!GetInputActivationFunction(operation, 2, activationFunction))
+    if (!GetOptionalInputActivation(operation, 2, activationFunction))
     {
         return Fail("%s: Operation has invalid inputs", __func__);
     }
@@ -543,49 +689,7 @@ bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation)
 
     if (endLayer != nullptr)
     {
-        // If the number of dimensions do not match then we need to add degenerate dimensions
-        // to the "smaller" tensor using a reshape:
-        //   Small  Big
-        //     |     |
-        //  Reshape  |
-        //      \   /
-        //       Add
-        if (inputTensorInfo0.GetNumDimensions() != inputTensorInfo1.GetNumDimensions())
-        {
-            bool input0IsBigger = inputTensorInfo0.GetNumDimensions() > inputTensorInfo1.GetNumDimensions();
-
-            LayerInputHandle& smallTensorHandle = input0IsBigger ? input1 : input0;
-            const armnn::TensorInfo& smallTensorDims = smallTensorHandle.GetTensorInfo();
-
-            LayerInputHandle& bigTensorHandle =  input0IsBigger ? input0 : input1;
-            const armnn::TensorInfo& bigTensorDims = bigTensorHandle.GetTensorInfo();
-
-            std::vector<unsigned int> reshapedDims(bigTensorDims.GetNumDimensions(), 1);
-            unsigned int sizeDifference = bigTensorDims.GetNumDimensions() - smallTensorDims.GetNumDimensions();
-            for (unsigned i = sizeDifference; i < bigTensorDims.GetNumDimensions(); ++i)
-            {
-                reshapedDims[i] = smallTensorDims.GetShape()[i-sizeDifference];
-            }
-            armnn::TensorInfo reshapedInfo = smallTensorDims;
-            reshapedInfo.SetShape(armnn::TensorShape{ static_cast<unsigned int>(reshapedDims.size()),
-                                                      reshapedDims.data() });
-
-            armnn::ReshapeDescriptor reshapeDesc;
-            reshapeDesc.m_TargetShape = reshapedInfo.GetShape();
-            armnn::IConnectableLayer* const reshapeLayer = m_Network->AddReshapeLayer(reshapeDesc);
-            smallTensorHandle.Connect(reshapeLayer->GetInputSlot(0));
-            reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
-
-            // Connect the outputs from new reshape and original input layer
-            reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
-            bigTensorHandle.Connect(startLayer->GetInputSlot(1));
-        }
-        else
-        {
-            input0.Connect(startLayer->GetInputSlot(0));
-            input1.Connect(startLayer->GetInputSlot(1));
-        }
-
+        BroadcastTensor(input0, input1, startLayer, *m_Network);
         return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer);
     }
     else
@@ -594,12 +698,12 @@ bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation)
     }
 }
 
-bool ModelToINetworkConverter::ConvertAveragePool2d(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertAveragePool2d(const neuralnetworks::V1_0::Operation& operation)
 {
     return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::Average);
 }
 
-bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertConcatenation(const neuralnetworks::V1_0::Operation& operation)
 {
     // The first N (0..N-1) inputs are tensors. The Nth input is the concatenation axis.
     if (operation.inputs.size() <= 1)
@@ -622,6 +726,7 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera
         return Fail("%s: Operation has no outputs", __func__);
     }
 
+
     armnn::TensorInfo  outputInfo  = GetTensorInfoForOperand(*outputOperand);
     armnn::TensorShape outputShape = outputInfo.GetShape();
 
@@ -640,41 +745,15 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera
         return Fail("%s: Operation has invalid concat axis: %d", __func__, concatDim);
     }
 
-    // ArmNN uses Compute Library subtensors to perform concatenation
-    // This only works when concatenating along dimension 0 or 1 for a 4-D tensor,
-    // or along dimension 0 for a 3-D tensor.
-    const armnn::PermutationVector* permuteVectorIn = &IdentityPermutation;
-    const armnn::PermutationVector* permuteVectorOut = &IdentityPermutation;
-
-    assert(permuteVectorOut != nullptr);
-
-    if (outputShape.GetNumDimensions() == 4) {
-        if (concatDim == 3) {
-            concatDim = 1;
-            permuteVectorIn = &NHWCToArmNN;
-            permuteVectorOut = &ArmNNToNHWC;
-            outputShape = armnnUtils::Permuted(outputShape, *permuteVectorIn);
-            outputInfo.SetShape(outputShape);
-        } else if (concatDim == 2) {
-            concatDim = 1;
-            permuteVectorIn = &SwapDim1And2;
-            permuteVectorOut = &SwapDim1And2;
-            outputShape = armnnUtils::Permuted(outputShape, *permuteVectorIn);
-            outputInfo.SetShape(outputShape);
-        }
-    }
-    else if (!(outputShape.GetNumDimensions() == 3 && concatDim == 0))
-    {
-        // Operation unsupported
-        return false;
-    }
-
     std::vector<LayerInputHandle> inputHandles;
     std::vector<armnn::TensorShape> inputShapes;
 
     inputHandles.reserve(numInputTensors);
     inputShapes.reserve(numInputTensors);
 
+    bool inputsHaveBeenReshaped = false;
+    unsigned int tensorDimensionsAdded = 0;
+
     for (uint32_t i = 0; i < numInputTensors; ++i)
     {
         const Operand* const operand = GetInputOperand(operation, i);
@@ -683,9 +762,45 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera
             return Fail("%s: Operation has invalid inputs", __func__);
         }
 
-        inputShapes.emplace_back(GetTensorShapeForOperand(*operand));
-        inputHandles.emplace_back(ConvertToLayerInputHandle(operation, i));
+        armnn::TensorShape operandShape = GetTensorShapeForOperand(*operand);
+        LayerInputHandle operandInputHandle = ConvertToLayerInputHandle(operation, i);
+
+        if (operandShape.GetNumDimensions() == 0)
+        {
+            return Fail("%s: Operands with rank 0 are not supported", __func__);
+        }
+
+        if (RequiresReshape(operandShape))
+        {
+            inputsHaveBeenReshaped = true;
+
+            armnn::TensorInfo reshapeInfo = operandInputHandle.GetTensorInfo();
+
+            // Expand the tensor to three dimensions
+            if (operandShape.GetNumDimensions() == 2)
+            {
+                reshapeInfo.SetShape(armnn::TensorShape({1, operandShape[0], operandShape[1]}));
+                tensorDimensionsAdded = 1;
+            }
+            else
+            {
+                reshapeInfo.SetShape(armnn::TensorShape({1, 1, operandShape[0]}));
+                tensorDimensionsAdded = 2;
+            }
 
+            armnn::IConnectableLayer& newReshape = AddReshapeLayer(
+                    *m_Network,
+                    operandInputHandle,
+                    reshapeInfo
+            );
+
+            // Point to the reshape operation rather then the input operation
+            operandShape = reshapeInfo.GetShape();
+            operandInputHandle = LayerInputHandle(true, &newReshape.GetOutputSlot(0), reshapeInfo);
+        }
+
+        inputShapes.emplace_back(operandShape);
+        inputHandles.emplace_back(operandInputHandle);
 
         if (!inputHandles.back().IsValid())
         {
@@ -695,9 +810,34 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera
 
     assert(inputShapes.size() == inputHandles.size());
 
+    if (inputsHaveBeenReshaped)
+    {
+        // Adjust the concatenation dimension by the amount of dimensions added (if any)
+        concatDim += tensorDimensionsAdded;
+
+        // Add extra dimensions to the output shape to reflect the addition of the reshape layers
+        if (tensorDimensionsAdded == 1)
+        {
+            outputShape = armnn::TensorShape({1, outputShape[0], outputShape[1]});
+        }
+        else if (tensorDimensionsAdded == 2)
+        {
+            outputShape = armnn::TensorShape({1, 1, outputShape[0], outputShape[1]});
+        }
+    }
+
+    // Get the pair of permutations required for the concatenation
+    std::pair<armnn::PermutationVector, armnn::PermutationVector> permutationPair =
+            std::make_pair(IdentityPermutation4D, IdentityPermutation4D);
+
+    CreatePermutationParameters(inputShapes[0].GetNumDimensions(), concatDim, permutationPair);
+
+    outputShape = armnnUtils::Permuted(outputShape, permutationPair.first);
+    outputInfo.SetShape(outputShape);
+
     // this is no-op for identity swizzles, otherwise it replaces both
     // the handles and shapes with the swizzled layer output handles and shapes
-    SwizzleInputs(*m_Network, inputHandles, inputShapes, *permuteVectorIn);
+    SwizzleInputs(*m_Network, inputHandles, inputShapes, permutationPair.first);
 
     // Create an armnn merger layer descriptor - this will also perform validation on the input shapes
     armnn::OriginsDescriptor mergerDescriptor;
@@ -746,19 +886,39 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera
         inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(i));
     }
 
-    if (permuteVectorOut != &IdentityPermutation)
+    // Add permutation layer and connect the output to it, the permutation becomes the output layer
+    armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(*m_Network,
+                                                               layer->GetOutputSlot(0),
+                                                               permutationPair.second);
+    layer = &deswizzleLayer;
+
+    if (inputsHaveBeenReshaped)
     {
-        // Add permutation layer and connect the output to it, the permutation becomes the output layer
-        armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(*m_Network,
-                                                                   layer->GetOutputSlot(0),
-                                                                   *permuteVectorOut);
-        layer = &deswizzleLayer;
+        armnn::TensorInfo afterConcatInfo = layer->GetOutputSlot(0).GetTensorInfo();
+
+        // Undo the reshape knowing the amount of dimensions added
+        if (tensorDimensionsAdded == 1)
+        {
+            afterConcatInfo.SetShape(armnn::TensorShape({ afterConcatInfo.GetShape()[1],
+                                                          afterConcatInfo.GetShape()[2] }));
+        }
+        else if (tensorDimensionsAdded == 2)
+        {
+            afterConcatInfo.SetShape(armnn::TensorShape({ afterConcatInfo.GetShape()[2],
+                                                          afterConcatInfo.GetShape()[3] }));
+        }
+
+        layer = &AddReshapeLayer(
+                *m_Network,
+                layer->GetOutputSlot(0),
+                afterConcatInfo
+        );
     }
 
     return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
 }
 
-bool ModelToINetworkConverter::ConvertConv2d(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertConv2d(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -860,7 +1020,7 @@ bool ModelToINetworkConverter::ConvertConv2d(const V1_0::Operation& operation)
     }
 }
 
-bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -959,8 +1119,10 @@ bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const V1_0::Operation& ope
                           armnn::IsDepthwiseConvolutionSupported,
                           m_Compute,
                           swizzledInputInfo,
+                          swizzledOutputInfo,
                           desc,
-                          weights.GetInfo()))
+                          weights.GetInfo(),
+                          bias.GetInfo()))
     {
         return false;
     }
@@ -979,7 +1141,7 @@ bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const V1_0::Operation& ope
     }
 }
 
-bool ModelToINetworkConverter::ConvertFloor(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertFloor(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -1009,7 +1171,7 @@ bool ModelToINetworkConverter::ConvertFloor(const V1_0::Operation& operation)
     return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
 }
 
-bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertFullyConnected(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -1026,30 +1188,39 @@ bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& oper
     const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
-    armnn::TensorInfo reshapedInfo = inputInfo;
+    // ArmNN does not currently support non-fixed weights or bias
+    ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1); // 2D
+    ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2);    // 1D
 
+    if (!weightsPin.IsValid() || !biasPin.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+
+    armnn::ConstTensor weights = weightsPin.GetConstTensor();
+    armnn::ConstTensor bias = biasPin.GetConstTensor();
+
+    armnn::TensorInfo reshapedInfo = inputInfo;
     if (inputInfo.GetNumDimensions() > 2U)
     {
+        unsigned int dim0 = inputInfo.GetShape()[0];
         unsigned int dim1 = inputInfo.GetShape()[1];
+
         for (unsigned int i = 2U; i < inputInfo.GetNumDimensions(); ++i)
         {
             dim1 *= inputInfo.GetShape()[i];
         }
-        reshapedInfo.SetShape(armnn::TensorShape({inputInfo.GetShape()[0], dim1}));
-    }
 
-    // ArmNN does not currently support non-fixed weights or bias
-    ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1); // 2D
-    ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2);    // 1D
+        unsigned int divisor = weights.GetInfo().GetShape()[1] / dim1;
+        if(dim0 % divisor != 0)
+        {
+            return Fail("%s: Failed to deduce tensor shape", __func__);
+        }
 
-    if (!weightsPin.IsValid() || !biasPin.IsValid())
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
+        reshapedInfo.SetShape(armnn::TensorShape({dim0 / divisor, dim1 * divisor}));
     }
 
     // ensuring that the bias value is within 1% of the weights input (small float differences can exist)
-    armnn::ConstTensor weights = weightsPin.GetConstTensor();
-    armnn::ConstTensor bias = biasPin.GetConstTensor();
     SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), reshapedInfo);
 
     ActivationFn activationFunction;
@@ -1065,7 +1236,10 @@ bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& oper
     if (!IsLayerSupported(__func__,
                           armnn::IsFullyConnectedSupported,
                           m_Compute,
-                          reshapedInfo,
+                          inputInfo,
+                          outputInfo,
+                          weights.GetInfo(),
+                          bias.GetInfo(),
                           desc))
     {
         return false;
@@ -1100,7 +1274,7 @@ bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& oper
     }
 }
 
-bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -1158,7 +1332,7 @@ bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const V1_0::Ope
     return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
 }
 
-bool ModelToINetworkConverter::ConvertLogistic(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertLogistic(const neuralnetworks::V1_0::Operation& operation)
 {
     armnn::ActivationDescriptor desc;
     desc.m_Function = armnn::ActivationFunction::Sigmoid;
@@ -1166,7 +1340,7 @@ bool ModelToINetworkConverter::ConvertLogistic(const V1_0::Operation& operation)
     return ConvertToActivation(operation, __func__, desc);
 }
 
-bool ModelToINetworkConverter::ConvertL2Normalization(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertL2Normalization(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -1189,7 +1363,8 @@ bool ModelToINetworkConverter::ConvertL2Normalization(const V1_0::Operation& ope
     if (!IsLayerSupported(__func__,
                           armnn::IsL2NormalizationSupported,
                           m_Compute,
-                          swizzledInputInfo))
+                          swizzledInputInfo,
+                          swizzledOutputInfo))
     {
         return false;
     }
@@ -1203,17 +1378,17 @@ bool ModelToINetworkConverter::ConvertL2Normalization(const V1_0::Operation& ope
     return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
 }
 
-bool ModelToINetworkConverter::ConvertL2Pool2d(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertL2Pool2d(const neuralnetworks::V1_0::Operation& operation)
 {
     return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::L2);
 }
 
-bool ModelToINetworkConverter::ConvertMaxPool2d(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertMaxPool2d(const neuralnetworks::V1_0::Operation& operation)
 {
     return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::Max);
 }
 
-bool ModelToINetworkConverter::ConvertMul(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertMul(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0);
     LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1);
@@ -1223,43 +1398,42 @@ bool ModelToINetworkConverter::ConvertMul(const V1_0::Operation& operation)
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
+    // The FuseActivation parameter is always the input index 2
+    // and it should be optional
     ActivationFn activationFunction;
-    if (!GetInputActivationFunction(operation, 2, activationFunction))
+    if (!GetOptionalInputActivation(operation, 2, activationFunction))
     {
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    if (!ValidateBroadcast(m_Model, operation, 2u))
+    const Operand* outputOperand = GetOutputOperand(operation, 0);
+
+    if (outputOperand == nullptr)
     {
-        return Fail("%s is invalid due to broadcasting", __func__);
+        return false;
     }
 
+    const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
+
     if (!IsLayerSupported(__func__,
                           armnn::IsMultiplicationSupported,
                           m_Compute,
                           input0.GetTensorInfo(),
-                          input1.GetTensorInfo()))
-    {
-        return false;
-    }
-
-    const Operand* outputOperand = GetOutputOperand(operation, 0);
-
-    if (outputOperand == nullptr)
+                          input1.GetTensorInfo(),
+                          outInfo))
     {
         return false;
     }
 
-    const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
-
     armnn::IConnectableLayer* const startLayer = m_Network->AddMultiplicationLayer();
     armnn::IConnectableLayer* const endLayer = ProcessActivation(outInfo, activationFunction, startLayer);
 
+    const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo();
+    const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo();
+
     if (endLayer != nullptr)
     {
-        input0.Connect(startLayer->GetInputSlot(0));
-        input1.Connect(startLayer->GetInputSlot(1));
-
+        BroadcastTensor(input0, input1, startLayer, *m_Network);
         return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer);
     }
     else
@@ -1268,7 +1442,7 @@ bool ModelToINetworkConverter::ConvertMul(const V1_0::Operation& operation)
     }
 }
 
-bool ModelToINetworkConverter::ConvertReLu(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertReLu(const neuralnetworks::V1_0::Operation& operation)
 {
     armnn::ActivationDescriptor desc;
     desc.m_Function = armnn::ActivationFunction::ReLu;
@@ -1276,7 +1450,7 @@ bool ModelToINetworkConverter::ConvertReLu(const V1_0::Operation& operation)
     return ConvertToActivation(operation, __func__, desc);
 }
 
-bool ModelToINetworkConverter::ConvertReLu1(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertReLu1(const neuralnetworks::V1_0::Operation& operation)
 {
     armnn::ActivationDescriptor desc;
     desc.m_Function = armnn::ActivationFunction::BoundedReLu;
@@ -1286,7 +1460,7 @@ bool ModelToINetworkConverter::ConvertReLu1(const V1_0::Operation& operation)
     return ConvertToActivation(operation, __func__, desc);
 }
 
-bool ModelToINetworkConverter::ConvertReLu6(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertReLu6(const neuralnetworks::V1_0::Operation& operation)
 {
     armnn::ActivationDescriptor desc;
     desc.m_Function = armnn::ActivationFunction::BoundedReLu;
@@ -1295,7 +1469,7 @@ bool ModelToINetworkConverter::ConvertReLu6(const V1_0::Operation& operation)
     return ConvertToActivation(operation, __func__, desc);
 }
 
-bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertSoftmax(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -1303,6 +1477,14 @@ bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation)
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
+    const Operand* outputOperand = GetOutputOperand(operation, 0);
+    if (!outputOperand)
+    {
+        return Fail("%s: Operation has no outputs", __func__);
+    }
+
+    const armnn::TensorInfo outInfo = GetTensorInfoForOperand(*outputOperand);
+
     armnn::SoftmaxDescriptor desc;
     if (!GetInputFloat32(operation, 1, desc.m_Beta))
     {
@@ -1313,6 +1495,7 @@ bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation)
                           armnn::IsSoftmaxSupported,
                           m_Compute,
                           input.GetTensorInfo(),
+                          outInfo,
                           desc))
     {
         return false;
@@ -1325,7 +1508,7 @@ bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation)
     return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
 }
 
-bool ModelToINetworkConverter::ConvertTanH(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertTanH(const neuralnetworks::V1_0::Operation& operation)
 {
     armnn::ActivationDescriptor desc;
     desc.m_Function = armnn::ActivationFunction::TanH;
@@ -1335,7 +1518,7 @@ bool ModelToINetworkConverter::ConvertTanH(const V1_0::Operation& operation)
     return ConvertToActivation(operation, __func__, desc);
 }
 
-bool ModelToINetworkConverter::ConvertReshape(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertReshape(const neuralnetworks::V1_0::Operation& operation)
 {
     const Operand* inputOperand = GetInputOperand(operation, 0);
     const Operand* requestedShapeOperand = GetInputOperand(operation, 1);
@@ -1403,7 +1586,7 @@ bool ModelToINetworkConverter::ConvertReshape(const V1_0::Operation& operation)
     return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
 }
 
-bool ModelToINetworkConverter::ConvertResizeBilinear(const V1_0::Operation& operation)
+bool ModelToINetworkConverter::ConvertResizeBilinear(const neuralnetworks::V1_0::Operation& operation)
 {
     LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
     if (!input.IsValid())
@@ -1449,7 +1632,307 @@ bool ModelToINetworkConverter::ConvertResizeBilinear(const V1_0::Operation& oper
 
 }
 
-bool ModelToINetworkConverter::ConvertToActivation(const V1_0::Operation& operation,
+bool ModelToINetworkConverter::ConvertLstm(const neuralnetworks::V1_0::Operation& operation)
+{
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //      “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+    if (!input.IsValid())
+    {
+        return Fail("%s: Could not read input 0: input", __func__);
+    }
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    LayerInputHandle outputStateIn = ConvertToLayerInputHandle(operation, 18);
+    if (!outputStateIn.IsValid())
+    {
+        return Fail("%s: Could not read input 18: outputStateIn", __func__);
+    }
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    LayerInputHandle cellStateIn = ConvertToLayerInputHandle(operation, 19);
+    if (!cellStateIn.IsValid())
+    {
+        return Fail("%s: Could not read input 19: cellStateIn", __func__);
+    }
+
+    // Get the mandatory input tensors:
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    const ConstTensorPin inputToForgetWeightsPin = ConvertOperationInputToConstTensorPin(operation, 2);
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    const ConstTensorPin inputToCellWeightsPin = ConvertOperationInputToConstTensorPin(operation, 3);
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    const ConstTensorPin inputToOutputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 4);
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    const ConstTensorPin recurrentToForgetWeightsPin = ConvertOperationInputToConstTensorPin(operation, 6);
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    const ConstTensorPin recurrentToCellWeightsPin = ConvertOperationInputToConstTensorPin(operation, 7);
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    const ConstTensorPin recurrentToOutputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 8);
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin forgetGateBiasPin = ConvertOperationInputToConstTensorPin(operation, 13);
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin cellBiasPin = ConvertOperationInputToConstTensorPin(operation, 14);
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin outputGateBiasPin = ConvertOperationInputToConstTensorPin(operation, 15);
+
+    if (!inputToForgetWeightsPin.IsValid() ||
+        !inputToCellWeightsPin.IsValid() ||
+        !inputToOutputWeightsPin.IsValid() ||
+        !recurrentToForgetWeightsPin.IsValid() ||
+        !recurrentToCellWeightsPin.IsValid() ||
+        !recurrentToOutputWeightsPin.IsValid() ||
+        !forgetGateBiasPin.IsValid() ||
+        !cellBiasPin.IsValid() ||
+        !outputGateBiasPin.IsValid())
+    {
+        return Fail("%s: Operation has invalid tensor inputs", __func__);
+    }
+
+    // Get the optional input tensors:
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    const ConstTensorPin inputToInputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 1);
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    const ConstTensorPin recurrentToInputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 5);
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin cellToInputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 9);
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin cellToForgetWeightsPin = ConvertOperationInputToConstTensorPin(operation, 10);
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin cellToOutputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 11);
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    const ConstTensorPin inputGateBiasPin = ConvertOperationInputToConstTensorPin(operation, 12);
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    const ConstTensorPin projectionWeightsPin = ConvertOperationInputToConstTensorPin(operation, 16);
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    const ConstTensorPin projectionBiasPin = ConvertOperationInputToConstTensorPin(operation, 17);
+
+    if ((!inputToInputWeightsPin.IsValid() && !inputToInputWeightsPin.IsOptional()) ||
+        (!recurrentToInputWeightsPin.IsValid() && !recurrentToInputWeightsPin.IsOptional()) ||
+        (!cellToInputWeightsPin.IsValid() && !cellToInputWeightsPin.IsOptional()) ||
+        (!cellToForgetWeightsPin.IsValid() && !cellToForgetWeightsPin.IsOptional()) ||
+        (!cellToOutputWeightsPin.IsValid() && !cellToOutputWeightsPin.IsOptional()) ||
+        (!inputGateBiasPin.IsValid() && !inputGateBiasPin.IsOptional()) ||
+        (!projectionWeightsPin.IsValid() && !projectionWeightsPin.IsOptional()) ||
+        (!projectionBiasPin.IsValid() && !projectionBiasPin.IsOptional()))
+    {
+        return Fail("%s: Operation has invalid tensor inputs", __func__);
+    }
+
+    // Get the mandatory input scalars (actually 1-D tensors of size 1):
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    ActivationFn activation;
+    float cellClip;
+    float projClip;
+    if (!GetInputActivationFunctionFromTensor(operation, 20, activation) ||
+        !GetInputScalar(operation, 21, OperandType::FLOAT32, cellClip) ||
+        !GetInputScalar(operation, 22, OperandType::FLOAT32, projClip))
+    {
+        return Fail("%s: Operation has invalid scalar inputs", __func__);
+    }
+
+    // Outputs:
+    // 00: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with
+    //     CIFG, or [batch_size, num_units * 3] without CIFG.
+    const Operand* scratchBuffer = GetOutputOperand(operation, 0);
+    if (!scratchBuffer)
+    {
+        return Fail("%s: Could not read output 0: scratchBuffer", __func__);
+    }
+    // 01: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    const Operand* outputStateOut = GetOutputOperand(operation, 1);
+    if (!outputStateOut)
+    {
+        return Fail("%s: Could not read output 1: outputStateOut", __func__);
+    }
+    // 02: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    const Operand* cellStateOut = GetOutputOperand(operation, 2);
+    if (!cellStateOut)
+    {
+        return Fail("%s: Could not read output 2: cellStateOut", __func__);
+    }
+    // 03: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is
+    //     effectively the same as the current “output state (out)” value.
+    const Operand* output = GetOutputOperand(operation, 3);
+    if (!output)
+    {
+        return Fail("%s: Could not read output 3: output", __func__);
+    }
+
+    // set the params structure for the AddLstmLayer call
+    armnn::LstmInputParams params;
+    params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr();
+    params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr();
+    params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr();
+    params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr();
+    params.m_CellToInputWeights = cellToInputWeightsPin.GetConstTensorPtr();
+    params.m_CellToForgetWeights = cellToForgetWeightsPin.GetConstTensorPtr();
+    params.m_CellToOutputWeights = cellToOutputWeightsPin.GetConstTensorPtr();
+    params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr();
+    params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr();
+    params.m_CellBias = cellBiasPin.GetConstTensorPtr();
+    params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr();
+    params.m_ProjectionWeights = projectionWeightsPin.GetConstTensorPtr();
+    params.m_ProjectionBias = projectionBiasPin.GetConstTensorPtr();
+
+    // set the layer descriptor
+    armnn::LstmDescriptor desc;
+    desc.m_ActivationFunc = activation;
+    desc.m_ClippingThresCell = cellClip;
+    desc.m_ClippingThresProj = projClip;
+    desc.m_CifgEnabled = (params.m_InputToInputWeights == nullptr ||
+                          params.m_RecurrentToInputWeights == nullptr ||
+                          params.m_InputGateBias == nullptr);
+    desc.m_PeepholeEnabled = (params.m_CellToForgetWeights != nullptr ||
+                              params.m_CellToOutputWeights != nullptr);
+    desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr);
+
+    // validate the optional input groups
+    if (desc.m_CifgEnabled &&
+        (params.m_InputToInputWeights != nullptr ||
+         params.m_RecurrentToInputWeights != nullptr ||
+         params.m_InputGateBias != nullptr))
+    {
+        return Fail("%s: All, or none, of input-to-input weights, recurrent-to-input weights,"
+                    " and input gate bias must be provided", __func__);
+    }
+
+    if (!desc.m_ProjectionEnabled && params.m_ProjectionBias != nullptr)
+    {
+        return Fail("%s: projection bias should not be provided without projection weights", __func__);
+    }
+
+    if (desc.m_PeepholeEnabled &&
+        (params.m_CellToForgetWeights == nullptr ||
+         params.m_CellToOutputWeights == nullptr ||
+         (!desc.m_CifgEnabled && params.m_CellToInputWeights == nullptr)))
+    {
+        return Fail("%s: All, or none, of cell-to-forget weights and cell-to-output weights must be provided"
+                    " and, if CIFG is not enabled, cell-to-input weights must also be provided", __func__);
+    }
+
+    // Check if the layer is supported
+    // Inputs
+    const armnn::TensorInfo& inputInfo         = input.GetTensorInfo();
+    const armnn::TensorInfo& outputStateInInfo = outputStateIn.GetTensorInfo();
+    const armnn::TensorInfo& cellStateInInfo   = cellStateIn.GetTensorInfo();
+
+    // Outputs
+    const armnn::TensorInfo& scratchBufferInfo  = GetTensorInfoForOperand(*scratchBuffer);
+    const armnn::TensorInfo& outputStateOutInfo = GetTensorInfoForOperand(*outputStateOut);
+    const armnn::TensorInfo& cellStateOutInfo   = GetTensorInfoForOperand(*cellStateOut);
+    const armnn::TensorInfo& outputInfo         = GetTensorInfoForOperand(*output);
+
+    // Basic parameters
+    const armnn::TensorInfo& inputToForgetWeights = params.m_InputToForgetWeights->GetInfo();
+    const armnn::TensorInfo& inputToCellWeights   = params.m_InputToCellWeights->GetInfo();
+    const armnn::TensorInfo& inputToOutputWeights = params.m_InputToOutputWeights->GetInfo();
+    const armnn::TensorInfo& recurrentToForgetWeights = params.m_RecurrentToForgetWeights->GetInfo();
+    const armnn::TensorInfo& recurrentToCellWeights = params.m_RecurrentToCellWeights->GetInfo();
+    const armnn::TensorInfo& recurrentToOutputWeights = params.m_RecurrentToOutputWeights->GetInfo();
+    const armnn::TensorInfo& forgetGateBias = params.m_ForgetGateBias->GetInfo();
+    const armnn::TensorInfo& cellBias = params.m_CellBias->GetInfo();
+    const armnn::TensorInfo& outputGateBias = params.m_OutputGateBias->GetInfo();
+
+    //Optional parameters
+    const armnn::TensorInfo* inputToInputWeights = nullptr;
+    const armnn::TensorInfo* recurrentToInputWeights = nullptr;
+    const armnn::TensorInfo* cellToInputWeights = nullptr;
+    const armnn::TensorInfo* inputGateBias = nullptr;
+    const armnn::TensorInfo* projectionWeights = nullptr;
+    const armnn::TensorInfo* projectionBias    = nullptr;
+    const armnn::TensorInfo* cellToForgetWeights = nullptr;
+    const armnn::TensorInfo* cellToOutputWeights = nullptr;
+
+    if(!desc.m_CifgEnabled)
+    {
+        inputToInputWeights = &(params.m_InputToInputWeights->GetInfo());
+        recurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo());
+        if (params.m_CellToInputWeights != nullptr)
+        {
+            cellToInputWeights = &(params.m_CellToInputWeights->GetInfo());
+        }
+        inputGateBias = &(params.m_InputGateBias->GetInfo());
+    }
+
+    if(desc.m_ProjectionEnabled)
+    {
+        projectionWeights = &(params.m_ProjectionWeights->GetInfo());
+        if (params.m_ProjectionBias != nullptr)
+        {
+            projectionBias = &(params.m_ProjectionBias->GetInfo());
+        }
+    }
+
+    if(desc.m_PeepholeEnabled)
+    {
+        cellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo());
+        cellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo());
+    }
+
+    if (!IsLayerSupported(__func__,
+                          armnn::IsLstmSupported,
+                          m_Compute,
+                          inputInfo,
+                          outputStateInInfo,
+                          cellStateInInfo,
+                          scratchBufferInfo,
+                          outputStateOutInfo,
+                          cellStateOutInfo,
+                          outputInfo,
+                          desc,
+                          inputToForgetWeights,
+                          inputToCellWeights,
+                          inputToOutputWeights,
+                          recurrentToForgetWeights,
+                          recurrentToCellWeights,
+                          recurrentToOutputWeights,
+                          forgetGateBias,
+                          cellBias,
+                          outputGateBias,
+                          inputToInputWeights,
+                          recurrentToInputWeights,
+                          cellToInputWeights,
+                          inputGateBias,
+                          projectionWeights,
+                          projectionBias,
+                          cellToForgetWeights,
+                          cellToOutputWeights))
+    {
+        return false;
+    }
+
+    // Add the layer
+    armnn::IConnectableLayer* layer = m_Network->AddLstmLayer(desc, params, "Lstm");
+
+    input.Connect(layer->GetInputSlot(0));
+    outputStateIn.Connect(layer->GetInputSlot(1));
+    cellStateIn.Connect(layer->GetInputSlot(2));
+
+    return (SetupAndTrackLayerOutputSlot(operation, 0, *layer, 0) &&
+            SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1) &&
+            SetupAndTrackLayerOutputSlot(operation, 2, *layer, 2) &&
+            SetupAndTrackLayerOutputSlot(operation, 3, *layer, 3));
+}
+
+bool ModelToINetworkConverter::ConvertToActivation(const neuralnetworks::V1_0::Operation& operation,
     const char* operationName,
     const armnn::ActivationDescriptor& activationDesc)
 {
@@ -1459,10 +1942,17 @@ bool ModelToINetworkConverter::ConvertToActivation(const V1_0::Operation& operat
         return Fail("%s: Input 0 is invalid", operationName);
     }
 
+    const Operand* outputOperand = GetOutputOperand(operation, 0);
+    if (!outputOperand)
+    {
+        return false;
+    }
+    const armnn::TensorInfo outInfo = GetTensorInfoForOperand(*outputOperand);
     if (!IsLayerSupported(__func__,
                           armnn::IsActivationSupported,
                           m_Compute,
                           input.GetTensorInfo(),
+                          outInfo,
                           activationDesc))
     {
         return false;
@@ -1475,7 +1965,7 @@ bool ModelToINetworkConverter::ConvertToActivation(const V1_0::Operation& operat
     return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
 }
 
-bool ModelToINetworkConverter::ConvertPooling2d(const V1_0::Operation& operation,
+bool ModelToINetworkConverter::ConvertPooling2d(const neuralnetworks::V1_0::Operation& operation,
     const char* operationName,
     armnn::PoolingAlgorithm poolType)
 {
@@ -1625,7 +2115,8 @@ const void* ModelToINetworkConverter::GetOperandValueReadOnlyAddress(const Opera
     return valueStart;
 }
 
-const Operand* ModelToINetworkConverter::GetInputOperand(const V1_0::Operation& operation, uint32_t inputIndex) const
+const Operand* ModelToINetworkConverter::GetInputOperand(const neuralnetworks::V1_0::Operation& operation,
+                                                         uint32_t inputIndex) const
 {
     if (inputIndex >= operation.inputs.size())
     {
@@ -1637,7 +2128,8 @@ const Operand* ModelToINetworkConverter::GetInputOperand(const V1_0::Operation&
     return &m_Model.operands[operation.inputs[inputIndex]];
 }
 
-const Operand* ModelToINetworkConverter::GetOutputOperand(const V1_0::Operation& operation, uint32_t outputIndex) const
+const Operand* ModelToINetworkConverter::GetOutputOperand(const neuralnetworks::V1_0::Operation& operation,
+                                                          uint32_t outputIndex) const
 {
     if (outputIndex >= operation.outputs.size())
     {
@@ -1650,7 +2142,7 @@ const Operand* ModelToINetworkConverter::GetOutputOperand(const V1_0::Operation&
 }
 
 template<typename T>
-bool ModelToINetworkConverter::GetInputScalar(const V1_0::Operation& operation, uint32_t inputIndex,
+bool ModelToINetworkConverter::GetInputScalar(const neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex,
     OperandType type, T& outValue) const
 {
     const Operand* operand = GetInputOperand(operation, inputIndex);
@@ -1681,33 +2173,75 @@ bool ModelToINetworkConverter::GetInputScalar(const V1_0::Operation& operation,
     return true;
 }
 
-bool ModelToINetworkConverter::GetInputInt32(const V1_0::Operation& operation,
+bool ModelToINetworkConverter::GetInputInt32(const neuralnetworks::V1_0::Operation& operation,
                                              uint32_t inputIndex, int32_t& outValue) const
 {
     return GetInputScalar(operation, inputIndex, OperandType::INT32, outValue);
 }
 
-bool ModelToINetworkConverter::GetInputFloat32(const V1_0::Operation& operation,
+bool ModelToINetworkConverter::GetInputFloat32(const neuralnetworks::V1_0::Operation& operation,
                                                uint32_t inputIndex, float& outValue) const
 {
     return GetInputScalar(operation, inputIndex, OperandType::FLOAT32, outValue);
 }
 
-bool ModelToINetworkConverter::GetInputActivationFunction(const V1_0::Operation& operation,
-    uint32_t inputIndex,
-    ActivationFn& outActivationFunction) const
+bool ModelToINetworkConverter::GetInputActivationFunctionImpl(const neuralnetworks::V1_0::Operation& operation,
+                                                              uint32_t inputIndex,
+                                                              OperandType type,
+                                                              ActivationFn& outActivationFunction) const
 {
+    if (type != OperandType::INT32 && type != OperandType::TENSOR_INT32)
+    {
+        return Fail("%s: unexpected operand type: %s (should be %s or %s)",
+                    __func__,
+                    toString(type).c_str(),
+                    toString(OperandType::INT32).c_str(),
+                    toString(OperandType::TENSOR_INT32).c_str());
+    }
+
     int32_t activationFunctionAsInt;
-    if (!GetInputInt32(operation, inputIndex, activationFunctionAsInt))
+    if (!GetInputScalar(operation, inputIndex, type, activationFunctionAsInt))
     {
         return Fail("%s: failed to get activation input value", __func__);
     }
-
     outActivationFunction = static_cast<ActivationFn>(activationFunctionAsInt);
     return true;
 }
 
-bool ModelToINetworkConverter::GetInputPaddingScheme(const V1_0::Operation& operation,
+bool ModelToINetworkConverter::GetInputActivationFunction(const neuralnetworks::V1_0::Operation& operation,
+                                                          uint32_t inputIndex,
+                                                          ActivationFn& outActivationFunction) const
+{
+    return GetInputActivationFunctionImpl(operation, inputIndex, OperandType::INT32, outActivationFunction);
+}
+
+bool ModelToINetworkConverter::GetInputActivationFunctionFromTensor(const neuralnetworks::V1_0::Operation& operation,
+                                                                    uint32_t inputIndex,
+                                                                    ActivationFn& outActivationFunction) const
+{
+    // This only accepts a 1-D tensor of size 1
+    return GetInputActivationFunctionImpl(operation, inputIndex, OperandType::INT32, outActivationFunction);
+}
+
+bool ModelToINetworkConverter::GetOptionalInputActivation(const neuralnetworks::V1_0::Operation& operation,
+                                uint32_t inputIndex,
+                                ActivationFn& activationFunction) const
+{
+    if (operation.inputs.size() <= inputIndex)
+    {
+        activationFunction = ActivationFn::kActivationNone;
+    }
+    else
+    {
+        if (!GetInputActivationFunction(operation, inputIndex, activationFunction))
+        {
+            return Fail("%s: Operation has invalid inputs", __func__);
+        }
+    }
+    return true;
+}
+
+bool ModelToINetworkConverter::GetInputPaddingScheme(const neuralnetworks::V1_0::Operation& operation,
     uint32_t inputIndex,
     android::nn::PaddingScheme& outPaddingScheme) const
 {
@@ -1722,7 +2256,7 @@ bool ModelToINetworkConverter::GetInputPaddingScheme(const V1_0::Operation& oper
 }
 
 LayerInputHandle ModelToINetworkConverter::ConvertToLayerInputHandle(
-    const V1_0::Operation& operation,
+    const neuralnetworks::V1_0::Operation& operation,
     uint32_t inputIndex)
 {
     const Operand* operand = GetInputOperand(operation, inputIndex);
@@ -1791,22 +2325,22 @@ LayerInputHandle ModelToINetworkConverter::ConvertToLayerInputHandle(
     }
 }
 
-ConstTensorPin ModelToINetworkConverter::ConvertOperationInputToConstTensorPin(const V1_0::Operation& operation,
-    uint32_t inputIndex, const armnn::PermutationVector& dimensionMappings,
-    const armnn::TensorShape* overrideTensorShape)
+ConstTensorPin ModelToINetworkConverter::ConvertOperationInputToConstTensorPin(
+        const neuralnetworks::V1_0::Operation& operation,
+        uint32_t inputIndex, const armnn::PermutationVector& dimensionMappings,
+        const armnn::TensorShape* overrideTensorShape, bool optional)
 {
     const Operand* operand = GetInputOperand(operation, inputIndex);
     if (!operand)
     {
-        Fail("%s: failed to get input operand", __func__);
+        Fail("%s: failed to get input operand: index=%u", __func__, inputIndex);
         return ConstTensorPin();
     }
-
-    return ConvertOperandToConstTensorPin(*operand, dimensionMappings, overrideTensorShape);
+    return ConvertOperandToConstTensorPin(*operand, dimensionMappings, overrideTensorShape, optional);
 }
 
 ConstTensorPin ModelToINetworkConverter::ConvertOperandToConstTensorPin(const Operand& operand,
-    const armnn::PermutationVector& dimensionMappings, const armnn::TensorShape* overrideTensorShape)
+    const armnn::PermutationVector& dimensionMappings, const armnn::TensorShape* overrideTensorShape, bool optional)
 {
     if (!IsOperandTypeSupportedForTensors(operand.type))
     {
@@ -1823,6 +2357,12 @@ ConstTensorPin ModelToINetworkConverter::ConvertOperandToConstTensorPin(const Op
     const void* const valueStart = GetOperandValueReadOnlyAddress(operand);
     if (!valueStart)
     {
+        if (optional)
+        {
+            // optional tensor with no values is not really an error; return it as invalid, but marked as optional
+            return ConstTensorPin(true);
+        }
+        // mandatory tensor with no values
         Fail("%s: failed to get operand address", __func__);
         return ConstTensorPin();
     }
@@ -1919,7 +2459,7 @@ armnn::IConnectableLayer* ModelToINetworkConverter::ProcessActivation(const armn
         }
 
         if (!IsLayerSupported(__func__, armnn::IsActivationSupported, m_Compute,
-                              prevLayer->GetOutputSlot(0).GetTensorInfo(), activationDesc))
+                              prevLayer->GetOutputSlot(0).GetTensorInfo(), tensorInfo, activationDesc))
         {
             return nullptr;
         }
@@ -1933,19 +2473,21 @@ armnn::IConnectableLayer* ModelToINetworkConverter::ProcessActivation(const armn
     return activationLayer;
 }
 
-bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const V1_0::Operation& operation, uint32_t outputIndex,
-                                                            armnn::IConnectableLayer& layer)
+bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const neuralnetworks::V1_0::Operation& operation,
+                                                            uint32_t operationOutputIndex,
+                                                            armnn::IConnectableLayer& layer,
+                                                            uint32_t layerOutputIndex)
 {
-    const Operand* outputOperand = GetOutputOperand(operation, outputIndex);
+    const Operand* outputOperand = GetOutputOperand(operation, operationOutputIndex);
 
-    if ((outputOperand == nullptr) || (outputIndex >= layer.GetNumOutputSlots()))
+    if ((outputOperand == nullptr) || (operationOutputIndex >= layer.GetNumOutputSlots()))
     {
         return false;
     }
 
-    armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(outputIndex);
+    armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(layerOutputIndex);
 
-    const uint32_t operandIndex = operation.outputs[outputIndex];
+    const uint32_t operandIndex = operation.outputs[operationOutputIndex];
     m_OutputSlotForOperand[operandIndex] = &outputSlot;
 
     outputSlot.SetTensorInfo(GetTensorInfoForOperand(*outputOperand));
@@ -1953,6 +2495,13 @@ bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const V1_0::Operatio
     return true;
 }
 
+bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const neuralnetworks::V1_0::Operation& operation,
+                                                            uint32_t outputIndex,
+                                                            armnn::IConnectableLayer& layer)
+{
+    return SetupAndTrackLayerOutputSlot(operation, outputIndex, layer, outputIndex);
+}
+
 bool ModelToINetworkConverter::IsOperationSupported(uint32_t operationIndex) const
 {
     std::map<uint32_t, bool>::const_iterator it = m_OperationSupported.find(operationIndex);
diff --git a/ModelToINetworkConverter.hpp b/ModelToINetworkConverter.hpp
index 864a2fcc..f0e28970 100644
--- a/ModelToINetworkConverter.hpp
+++ b/ModelToINetworkConverter.hpp
@@ -5,12 +5,11 @@
 
 #pragma once
 
-#include "HalInterfaces.h"
-#include "NeuralNetworks.h"
-#include "ActivationFunctor.h"
-
 #include "ArmnnDriver.hpp"
 
+#include <NeuralNetworks.h>
+#include <ActivationFunctor.h>
+
 #include <armnn/ArmNN.hpp>
 #include <armnn/INetwork.hpp>
 #include <CpuExecutor.h>
@@ -39,7 +38,8 @@ enum class ConversionResult
 class ModelToINetworkConverter
 {
 public:
-    ModelToINetworkConverter(armnn::Compute compute, const V1_0::Model& model,
+    ModelToINetworkConverter(armnn::Compute compute,
+        const ::android::hardware::neuralnetworks::V1_0::Model& model,
         const std::set<unsigned int>& forcedUnsupportedOperations);
 
     ConversionResult GetConversionResult() const { return m_ConversionResult; }
@@ -52,82 +52,108 @@ public:
 private:
     void Convert();
 
-    bool ConvertOperation(const V1_0::Operation& operation);
+    bool ConvertOperation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertAdd(const V1_0::Operation& operation);
+    bool ConvertAdd(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertAveragePool2d(const V1_0::Operation& operation);
+    bool ConvertAveragePool2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertConcatenation(const V1_0::Operation& operation);
+    bool ConvertConcatenation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertConv2d(const V1_0::Operation& operation);
+    bool ConvertConv2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertDepthwiseConv2d(const V1_0::Operation& operation);
+    bool ConvertDepthwiseConv2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertFloor(const V1_0::Operation& operation);
+    bool ConvertFloor(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertFullyConnected(const V1_0::Operation& operation);
+    bool ConvertFullyConnected(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertLogistic(const V1_0::Operation& operation);
+    bool ConvertLogistic(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertLocalResponseNormalization(const V1_0::Operation& operation);
+    bool ConvertLocalResponseNormalization(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertL2Normalization(const V1_0::Operation& operation);
+    bool ConvertL2Normalization(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertL2Pool2d(const V1_0::Operation& operation);
+    bool ConvertL2Pool2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertMaxPool2d(const V1_0::Operation& operation);
+    bool ConvertMaxPool2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertMul(const V1_0::Operation& operation);
+    bool ConvertMul(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertReLu(const V1_0::Operation& operation);
+    bool ConvertReLu(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertReLu1(const V1_0::Operation& operation);
+    bool ConvertReLu1(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertReLu6(const V1_0::Operation& operation);
+    bool ConvertReLu6(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertSoftmax(const V1_0::Operation& operation);
+    bool ConvertSoftmax(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertTanH(const V1_0::Operation& operation);
+    bool ConvertTanH(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertReshape(const V1_0::Operation& operation);
+    bool ConvertReshape(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertResizeBilinear(const V1_0::Operation& operation);
+    bool ConvertResizeBilinear(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
 
-    bool ConvertToActivation(const V1_0::Operation& operation, const char* operationName,
+    bool ConvertLstm(const ::android::hardware::neuralnetworks::V1_0::Operation& operation);
+
+    bool ConvertToActivation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                             const char* operationName,
         const armnn::ActivationDescriptor& activationDesc);
 
-    bool ConvertPooling2d(const V1_0::Operation& operation, const char* name, armnn::PoolingAlgorithm poolType);
+    bool ConvertPooling2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                          const char* name, armnn::PoolingAlgorithm poolType);
 
 
     const void* GetOperandValueReadOnlyAddress(const Operand& operand) const;
 
-    const Operand* GetInputOperand(const V1_0::Operation& operation, uint32_t inputIndex) const;
+    const Operand* GetInputOperand(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                   uint32_t inputIndex) const;
 
-    const Operand* GetOutputOperand(const V1_0::Operation& operation, uint32_t outputIndex) const;
+    const Operand* GetOutputOperand(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                    uint32_t outputIndex) const;
 
     template<typename T>
-    bool GetInputScalar(const V1_0::Operation& operation, uint32_t inputIndex, OperandType type, T& outValue) const;
+    bool GetInputScalar(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex,
+                        OperandType type, T& outValue) const;
 
-    bool GetInputInt32(const V1_0::Operation& operation, uint32_t inputIndex, int32_t& outValue) const;
+    bool GetInputInt32(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex,
+                       int32_t& outValue) const;
 
-    bool GetInputFloat32(const V1_0::Operation& operation, uint32_t inputIndex, float& outValue) const;
+    bool GetInputFloat32(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex,
+                         float& outValue) const;
 
-    bool GetInputActivationFunction(const V1_0::Operation& operation, uint32_t inputIndex,
-        ActivationFn& outActivationFunction) const;
+    bool GetInputActivationFunctionImpl(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                        uint32_t inputIndex,
+                                        OperandType type,
+                                        ActivationFn& outActivationFunction) const;
 
-    bool GetInputPaddingScheme(const V1_0::Operation& operation, uint32_t inputIndex,
-        android::nn::PaddingScheme& outPaddingScheme) const;
+    bool GetInputActivationFunction(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                    uint32_t inputIndex,
+                                    ActivationFn& outActivationFunction) const;
 
-    LayerInputHandle ConvertToLayerInputHandle(const V1_0::Operation& operation, uint32_t inputIndex);
+    bool GetInputActivationFunctionFromTensor(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                              uint32_t inputIndex,
+                                              ActivationFn& outActivationFunction) const;
 
-    ConstTensorPin ConvertOperationInputToConstTensorPin(const V1_0::Operation& operation, uint32_t inputIndex,
-        const armnn::PermutationVector& dimensionMappings = g_DontPermute,
-        const armnn::TensorShape* overrideTensorShape = nullptr);
+    bool GetOptionalInputActivation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                    uint32_t inputIndex,
+                                    ActivationFn& activationFunction) const;
+
+    bool GetInputPaddingScheme(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                               uint32_t inputIndex,
+                               android::nn::PaddingScheme& outPaddingScheme) const;
+
+    LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                               uint32_t inputIndex);
+
+    ConstTensorPin ConvertOperationInputToConstTensorPin(
+            const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex,
+            const armnn::PermutationVector& dimensionMappings = g_DontPermute,
+            const armnn::TensorShape* overrideTensorShape = nullptr, bool optional = false);
 
     ConstTensorPin ConvertOperandToConstTensorPin(const Operand& operand,
         const armnn::PermutationVector& dimensionMappings = g_DontPermute,
-        const armnn::TensorShape* overrideTensorShape = nullptr);
+        const armnn::TensorShape* overrideTensorShape = nullptr, bool optional = false);
 
     bool GetTensorInt32Values(const Operand& operand, std::vector<int32_t>& outValues) const;
 
@@ -135,20 +161,25 @@ private:
     armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo, ActivationFn activation,
                                                 armnn::IConnectableLayer* prevLayer);
 
+    bool SetupAndTrackLayerOutputSlot(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                      uint32_t operationOutputIndex,
+                                      armnn::IConnectableLayer& layer,
+                                      uint32_t layerOutputIndex);
 
-    bool SetupAndTrackLayerOutputSlot(const V1_0::Operation& operation, uint32_t outputIndex,
+    bool SetupAndTrackLayerOutputSlot(const ::android::hardware::neuralnetworks::V1_0::Operation& operation,
+                                      uint32_t outputIndex,
                                       armnn::IConnectableLayer& layer);
 
 
     // Input data
-    armnn::Compute                    m_Compute;
-    const V1_0::Model&                m_Model;
-    const std::set<unsigned int>&     m_ForcedUnsupportedOperations;
+    armnn::Compute                                          m_Compute;
+    const ::android::hardware::neuralnetworks::V1_0::Model& m_Model;
+    const std::set<unsigned int>&                           m_ForcedUnsupportedOperations;
 
     // Output data
-    armnn::INetworkPtr                m_Network;
-    ConversionResult                  m_ConversionResult;
-    std::map<uint32_t, bool>          m_OperationSupported;
+    armnn::INetworkPtr       m_Network;
+    ConversionResult         m_ConversionResult;
+    std::map<uint32_t, bool> m_OperationSupported;
 
     // Working/intermediate data
     std::vector<armnn::IOutputSlot*>  m_OutputSlotForOperand;
diff --git a/NnapiSupport.txt b/NnapiSupport.txt
index 8973d901..de2e4071 100644
--- a/NnapiSupport.txt
+++ b/NnapiSupport.txt
@@ -1,6 +1,7 @@
 ------ ArmNN for Android NNAPI supported operations ------
 
 This release of ArmNN for Android supports use as a driver for the Android Neural Networks API. It implements the android.hardware.neuralnetworks@1.0 interface.
+android.hardware.neuralnetworks@1.1 models available in Android P can be executed if they return true from the utility method compliantWithV1_0(model).
 
 For more information on the Android Neural Networks API, see https://developer.android.com/ndk/guides/neuralnetworks/index.html
 
@@ -31,21 +32,35 @@ RESHAPE                      (FLOAT32,QUANT8_ASYMM)
 RESIZE_BILINEAR              (FLOAT32)
 SOFTMAX                      (FLOAT32,QUANT8_ASYMM)
 TANH                         (FLOAT32)
+LSTM                         (FLOAT32)
 
 * Depthwise convolution only supports a value of 1 for the depth multiplier. In addition, the QUANT8_ASYMM version only supports 3x3 kernels.
 
 --- Unsupported operators ---
 
-The following AndroidNN operations are currently not supported.
+The following AndroidNN 1.0 operations are currently not supported.
 
 DEPTH_TO_SPACE
 DEQUANTIZE
 EMBEDDING_LOOKUP
 HASHTABLE_LOOKUP
 LSH_PROJECTION
-LSTM
 RNN
 SPACE_TO_DEPTH
 SVDF
 
 Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework appropriately and the framework implements those operations using a CPU implementation.
+
+The following AndroidNN 1.1 operations are currently not supported.
+
+BATCH_TO_SPACE_ND
+DIV
+MEAN
+PAD
+SPACE_TO_BATCH_ND
+SQUEEZE
+STRIDED_SLICE
+SUB
+TRANSPOSE
+
+Where any of these operations are present the model will return false from the compliantWithV1_0(model) and the driver will not currently be utilised.
diff --git a/README.md b/README.md
index 4f780e7a..9939cf1b 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ArmNN Android Neural Networks driver
 
-This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0 HAL.
+This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0 HAL and android.hardware.neuralnetworks@1.1 HAL.
 
 For more information about supported operations and configurations, see NnapiSupport.txt
 
@@ -20,34 +20,50 @@ is built and copied to the `system/vendor/bin/hw` directory in the Android image
 To update the build environment, add to the contents of the variable `PRODUCT_PACKAGES`
 within the device-specific makefile that is located in the `<ANDROID_ROOT>/device/<manufacturer>/<product>`
 directory. This file is normally called `device.mk`:
+
+For Android O or Android P, using NN API version (1.0), the following should be added to `device.mk`:
 <pre>
 PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.0-service-armnn
 </pre>
+For Android P, a new version of the NN API is available (1.1),
+thus the following should be added to `device.mk` instead:
+<pre>
+PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.1-service-armnn
+</pre>
+`Android.mk` contains the module definition of both versions of the ArmNN driver.
 4. Build Android as normal, i.e. run `make` in `<ANDROID_ROOT>`
 5. To confirm that the ArmNN driver has been built, check for driver service executable at
 <pre>
-<ANDROID_ROOT>/out/target/product/<product>/system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
+<ANDROID_ROOT>/out/target/product/<product>system/vendor/bin/hw
 </pre>
+For example, if the ArmNN driver has been built with the NN API 1.0, check for the following file:
+<pre>
+<ANDROID_ROOT>/out/target/product/<product>system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
+</pre>
+
+Please Note: Android O is only compatible with NN API version 1.0.
 
 ### Testing
 
-1. Run the ArmNN driver service executable in the background
+1. Run the ArmNN driver service executable in the background.
+The following examples assume that the 1.0 version of the driver is being used:
 <pre>
 adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn &
 </pre>
 2. Run some code that exercises the Android Neural Networks API, for example Android's
 `NeuralNetworksTest` unit tests (note this is an optional component that must be built).
 <pre>
-adb shell /data/nativetest/NeuralNetworksTest/NeuralNetworksTest > NeuralNetworkTest.log
+adb shell /data/nativetest/NeuralNetworksTest_static/NeuralNetworksTest_static > NeuralNetworkTest.log
 </pre>
 3. To confirm that the ArmNN driver is being used to service the Android Neural Networks API requests,
 check for messages in logcat with the `ArmnnDriver` tag.
 
-### Using ClTuner
+### Using the GPU tuner
 
-ClTuner is a feature of the Compute Library that finds optimum values for OpenCL tuning parameters. The recommended way of using it with ArmNN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation:
+The GPU tuner is a feature of the Compute Library that finds optimum values for GPU acceleration tuning parameters. The recommended way of using it with ArmNN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation:
 
-1. Run the ArmNN driver service executable in tuning mode. The path to the tuning data must be writable by the service:
+1. Run the ArmNN driver service executable in tuning mode. The path to the tuning data must be writable by the service.
+The following examples assume that the 1.0 version of the driver is being used:
 <pre>
 adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; --cl-tuned-parameters-mode UpdateTunedParameters &
 </pre>
diff --git a/RequestThread.hpp b/RequestThread.hpp
index 0983793e..2448dbec 100644
--- a/RequestThread.hpp
+++ b/RequestThread.hpp
@@ -10,8 +10,9 @@
 #include <mutex>
 #include <condition_variable>
 
-#include "CpuExecutor.h"
-#include "HalInterfaces.h"
+#include "ArmnnDriver.hpp"
+
+#include <CpuExecutor.h>
 #include <armnn/ArmNN.hpp>
 
 namespace armnn_driver
diff --git a/SystemPropertiesUtils.hpp b/SystemPropertiesUtils.hpp
index 57aa98ca..e27c5ca6 100644
--- a/SystemPropertiesUtils.hpp
+++ b/SystemPropertiesUtils.hpp
@@ -9,6 +9,7 @@
 #include <string>
 #include <iostream>
 #include <sys/system_properties.h>
+#include <log/log.h>
 
 namespace {
 template<typename T>
@@ -80,4 +81,4 @@ T ParseSystemProperty(const char* name, T defaultValue)
     ALOGD("%s", messageBuilder.str().c_str());
     return defaultValue;
 }
-} //namespace
\ No newline at end of file
+} //namespace
diff --git a/Utils.cpp b/Utils.cpp
index 99912201..38a8cd31 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -18,6 +18,7 @@
 #include <iomanip>
 
 using namespace android;
+using namespace android::hardware;
 using namespace android::hidl::memory::V1_0;
 
 namespace armnn_driver
@@ -111,7 +112,7 @@ std::string GetOperandSummary(const Operand& operand)
         toString(operand.type);
 }
 
-std::string GetModelSummary(const V1_0::Model& model)
+std::string GetModelSummary(const neuralnetworks::V1_0::Model& model)
 {
     std::stringstream result;
 
@@ -280,9 +281,48 @@ void DumpTensor(const std::string& dumpDir,
     }
 }
 
+void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
+                                 const std::string& dumpDir,
+                                 armnn::NetworkId networkId,
+                                 const armnn::IProfiler* profiler)
+{
+    // Check if profiling is required.
+    if (!gpuProfilingEnabled)
+    {
+        return;
+    }
+
+    // The dump directory must exist in advance.
+    if (dumpDir.empty())
+    {
+        return;
+    }
+
+    BOOST_ASSERT(profiler);
+
+    // Set the name of the output profiling file.
+    const std::string fileName = boost::str(boost::format("%1%/%2%_%3%.json")
+                                            % dumpDir
+                                            % std::to_string(networkId)
+                                            % "profiling");
+
+    // Open the ouput file for writing.
+    std::ofstream fileStream;
+    fileStream.open(fileName, std::ofstream::out | std::ofstream::trunc);
+
+    if (!fileStream.good())
+    {
+        ALOGW("Could not open file %s for writing", fileName.c_str());
+        return;
+    }
+
+    // Write the profiling info to a JSON file.
+    profiler->Print(fileStream);
+}
+
 void ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork,
                                  const std::string& dumpDir,
-                                 const V1_0::Model& model)
+                                 const neuralnetworks::V1_0::Model& model)
 {
     // The dump directory must exist in advance.
     if (dumpDir.empty())
@@ -318,4 +358,5 @@ void ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwor
         ALOGW("An error occurred when writing to file %s", fileName.c_str());
     }
 }
+
 } // namespace armnn_driver
diff --git a/Utils.hpp b/Utils.hpp
index 4b5066ee..e805f260 100644
--- a/Utils.hpp
+++ b/Utils.hpp
@@ -5,13 +5,13 @@
 
 #pragma once
 
-#include "HalInterfaces.h"
-#include "NeuralNetworks.h"
+#include "ArmnnDriver.hpp"
+
+#include <NeuralNetworks.h>
+
 #include <armnn/ArmNN.hpp>
 #include <CpuExecutor.h>
 
-#include "ArmnnDriver.hpp"
-
 #include <vector>
 #include <string>
 
@@ -43,14 +43,20 @@ void* GetMemoryFromPool(DataLocation location,
 armnn::TensorInfo GetTensorInfoForOperand(const Operand& operand);
 
 std::string GetOperandSummary(const Operand& operand);
-std::string GetModelSummary(const V1_0::Model& model);
+std::string GetModelSummary(const ::android::hardware::neuralnetworks::V1_0::Model& model);
 
 void DumpTensor(const std::string& dumpDir,
-    const std::string& requestName,
-    const std::string& tensorName,
-    const armnn::ConstTensor& tensor);
+                const std::string& requestName,
+                const std::string& tensorName,
+                const armnn::ConstTensor& tensor);
+
+void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
+                                 const std::string& dumpDir,
+                                 armnn::NetworkId networkId,
+                                 const armnn::IProfiler* profiler);
 
 void ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork,
                                  const std::string& dumpDir,
-                                 const V1_0::Model& model);
+                                 const ::android::hardware::neuralnetworks::V1_0::Model& model);
+
 }
diff --git a/android.hardware.neuralnetworks@1.1-service-armnn.rc b/android.hardware.neuralnetworks@1.1-service-armnn.rc
new file mode 100644
index 00000000..98efaf9c
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.1-service-armnn.rc
@@ -0,0 +1,4 @@
+service neuralnetworks_hal_service_armnn /vendor/bin/hw/android.hardware.neuralnetworks@1.1-service-armnn
+    class hal
+    user system
+    group system
diff --git a/service.cpp b/service.cpp
index 4ab59c85..c2701bf9 100644
--- a/service.cpp
+++ b/service.cpp
@@ -11,7 +11,6 @@
 #include <log/log.h>
 
 #include <string>
-#include <vector>
 
 using namespace armnn_driver;
 using namespace std;
diff --git a/test/Android.mk b/test/Android.mk
index 97e9a903..1cef3787 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -12,55 +12,72 @@ NN_HEADER_PATH := $(LOCAL_PATH)/../../../../frameworks/ml/nn/runtime/include
 ARMNN_HEADER_PATH := $(LOCAL_PATH)/../armnn/include
 ARMNN_DRIVER_HEADER_PATH := $(LOCAL_PATH)/..
 
+##########################
+# armnn-driver-tests@1.0 #
+##########################
 include $(CLEAR_VARS)
 
-LOCAL_C_INCLUDES :=	 \
-	$(OPENCL_HEADER_PATH) \
-	$(NN_HEADER_PATH) \
-	$(ARMNN_HEADER_PATH) \
-	$(ARMNN_DRIVER_HEADER_PATH)
+LOCAL_MODULE := armnn-driver-tests@1.0
+LOCAL_MODULE_TAGS := eng optional
+LOCAL_ARM_MODE := arm
+LOCAL_PROPRIETARY_MODULE := true
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
+        $(OPENCL_HEADER_PATH) \
+        $(NN_HEADER_PATH) \
+        $(ARMNN_HEADER_PATH) \
+        $(ARMNN_DRIVER_HEADER_PATH)
 
 LOCAL_CFLAGS := \
-	-std=c++14 \
-	-fexceptions \
-	-Werror \
-	-UNDEBUG
-
-LOCAL_SRC_FILES :=	\
-	Tests.cpp \
-	UtilsTests.cpp \
-	Concurrent.cpp  \
-	Convolution2D.cpp  \
-	FullyConnected.cpp  \
-	GenericLayerTests.cpp \
-	DriverTestHelpers.cpp \
-	SystemProperties.cpp \
-	Merger.cpp \
-	TestTensor.cpp
+        -std=c++14 \
+        -fexceptions \
+        -Werror \
+        -O0 \
+        -UNDEBUG
+ifeq ($(PLATFORM_VERSION),9)
+# Required to build with the changes made to the Android ML framework starting from Android P,
+# regardless of the HAL version used for the build.
+LOCAL_CFLAGS+= \
+        -DARMNN_ANDROID_P
+endif
 
-LOCAL_STATIC_LIBRARIES := \
-	libarmnn-driver \
-	libneuralnetworks_common \
-	libarmnn \
-	libboost_log \
-	libboost_system \
-	libboost_unit_test_framework \
-	libboost_thread \
-	armnn-arm_compute
-
-LOCAL_SHARED_LIBRARIES :=  \
-	libbase \
-	libhidlbase \
-	libhidltransport \
-	libhidlmemory \
-	liblog \
-	libtextclassifier_hash \
-	libutils \
-	android.hardware.neuralnetworks@1.0 \
-	android.hidl.allocator@1.0 \
-	android.hidl.memory@1.0 \
-	libOpenCL
+LOCAL_SRC_FILES := \
+        Tests.cpp \
+        UtilsTests.cpp \
+        Concurrent.cpp \
+        Convolution2D.cpp \
+        FullyConnected.cpp \
+        GenericLayerTests.cpp \
+        DriverTestHelpers.cpp \
+        SystemProperties.cpp \
+        Lstm.cpp \
+        Merger.cpp \
+        TestTensor.cpp
 
+LOCAL_STATIC_LIBRARIES := \
+        libarmnn-driver@1.0 \
+        libneuralnetworks_common \
+        libarmnn \
+        libboost_log \
+        libboost_system \
+        libboost_unit_test_framework \
+        libboost_thread \
+        armnn-arm_compute
+
+LOCAL_SHARED_LIBRARIES := \
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        liblog \
+        libtextclassifier_hash \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        libOpenCL
 ifeq ($(PLATFORM_VERSION),9)
 # Required to build the 1.0 version of the NN Driver on Android P and later versions,
 # as the 1.0 version of the NN API needs the 1.1 HAL headers to be included regardless.
@@ -68,18 +85,71 @@ LOCAL_SHARED_LIBRARIES+= \
         android.hardware.neuralnetworks@1.1
 endif
 
-LOCAL_MODULE := armnn-driver-tests
+include $(BUILD_EXECUTABLE)
 
-LOCAL_MODULE_TAGS := eng optional
+##########################
+# armnn-driver-tests@1.1 #
+##########################
+include $(CLEAR_VARS)
 
+LOCAL_MODULE := armnn-driver-tests@1.1
+LOCAL_MODULE_TAGS := eng optional
 LOCAL_ARM_MODE := arm
-
+LOCAL_PROPRIETARY_MODULE := true
 # Mark source files as dependent on Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
-LOCAL_PROPRIETARY_MODULE := true
+LOCAL_C_INCLUDES := \
+        $(OPENCL_HEADER_PATH) \
+        $(NN_HEADER_PATH) \
+        $(ARMNN_HEADER_PATH) \
+        $(ARMNN_DRIVER_HEADER_PATH)
 
-include $(BUILD_EXECUTABLE)
+LOCAL_CFLAGS := \
+        -std=c++14 \
+        -fexceptions \
+        -Werror \
+        -O0 \
+        -UNDEBUG \
+        -DARMNN_ANDROID_P \
+        -DARMNN_ANDROID_NN_V1_1
+
+LOCAL_SRC_FILES := \
+        Tests.cpp \
+        UtilsTests.cpp \
+        Concurrent.cpp \
+        Convolution2D.cpp \
+        FullyConnected.cpp \
+        GenericLayerTests.cpp \
+        DriverTestHelpers.cpp \
+        SystemProperties.cpp \
+        Lstm.cpp \
+        Merger.cpp \
+        TestTensor.cpp
 
+LOCAL_STATIC_LIBRARIES := \
+        libarmnn-driver@1.1 \
+        libneuralnetworks_common \
+        libarmnn \
+        libboost_log \
+        libboost_system \
+        libboost_unit_test_framework \
+        libboost_thread \
+        armnn-arm_compute
+
+LOCAL_SHARED_LIBRARIES := \
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        liblog \
+        libtextclassifier_hash \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hardware.neuralnetworks@1.1 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        libOpenCL
 
+include $(BUILD_EXECUTABLE)
 
diff --git a/test/Concurrent.cpp b/test/Concurrent.cpp
index c2d58bde..e4940537 100644
--- a/test/Concurrent.cpp
+++ b/test/Concurrent.cpp
@@ -11,7 +11,9 @@ BOOST_AUTO_TEST_SUITE(ConcurrentDriverTests)
 using ArmnnDriver = armnn_driver::ArmnnDriver;
 using DriverOptions = armnn_driver::DriverOptions;
 using namespace android::nn;
+using namespace android::hardware;
 using namespace driverTestHelpers;
+using namespace armnn_driver;
 
 // Add our own test for concurrent execution
 // The main point of this test is to check that multiple requests can be
@@ -22,7 +24,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
     ALOGI("ConcurrentExecute: entry");
 
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    V1_0::Model model = {};
+    neuralnetworks::V1_0::Model model = {};
 
     // add operands
     int32_t actValue      = 0;
@@ -37,7 +39,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
 
     // make the fully connected operation
     model.operations.resize(1);
-    model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED;
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
     model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
     model.operations[0].outputs = hidl_vec<uint32_t>{4};
 
diff --git a/test/Convolution2D.cpp b/test/Convolution2D.cpp
index cc301bc9..fe28aa44 100644
--- a/test/Convolution2D.cpp
+++ b/test/Convolution2D.cpp
@@ -6,13 +6,13 @@
 #include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
-#include "OperationsUtils.h"
+#include <OperationsUtils.h>
 
 BOOST_AUTO_TEST_SUITE(Convolution2DTests)
 
-using ArmnnDriver = armnn_driver::ArmnnDriver;
-using DriverOptions = armnn_driver::DriverOptions;
+using namespace android::hardware;
 using namespace driverTestHelpers;
+using namespace armnn_driver;
 
 namespace
 {
@@ -20,7 +20,7 @@ namespace
 void PaddingTestImpl(android::nn::PaddingScheme paddingScheme)
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    V1_0::Model model  = {};
+    neuralnetworks::V1_0::Model model  = {};
 
     uint32_t outSize = paddingScheme == android::nn::kPaddingSame ? 2 : 1;
 
@@ -39,7 +39,7 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme)
 
     // make the convolution operation
     model.operations.resize(1);
-    model.operations[0].type = V1_0::OperationType::CONV_2D;
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::CONV_2D;
     model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3, 4, 5, 6};
     model.operations[0].outputs = hidl_vec<uint32_t>{7};
 
diff --git a/test/DriverTestHelpers.cpp b/test/DriverTestHelpers.cpp
index d2d380a7..82087961 100644
--- a/test/DriverTestHelpers.cpp
+++ b/test/DriverTestHelpers.cpp
@@ -25,10 +25,12 @@ std::ostream& operator<<(std::ostream& os, ErrorStatus stat)
 } // namespace android::hardware
 } // namespace android
 
-
 namespace driverTestHelpers
 {
 
+using namespace android::hardware;
+using namespace armnn_driver;
+
 Return<void> ExecutionCallback::notify(ErrorStatus status)
 {
     (void)status;
@@ -107,13 +109,13 @@ void AddPoolAndSetData(uint32_t size, Request& request, const float* data)
     memcpy(dst, data, size * sizeof(float));
 }
 
-void AddOperand(V1_0::Model& model, const Operand& op)
+void AddOperand(neuralnetworks::V1_0::Model& model, const Operand& op)
 {
     model.operands.resize(model.operands.size() + 1);
     model.operands[model.operands.size() - 1] = op;
 }
 
-void AddIntOperand(V1_0::Model& model, int32_t value)
+void AddIntOperand(neuralnetworks::V1_0::Model& model, int32_t value)
 {
     DataLocation location = {};
     location.offset = model.operandValues.size();
@@ -131,10 +133,12 @@ void AddIntOperand(V1_0::Model& model, int32_t value)
     AddOperand(model, op);
 }
 
-void AddInputOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions)
+void AddInputOperand(neuralnetworks::V1_0::Model& model,
+                     hidl_vec<uint32_t> dimensions,
+                     neuralnetworks::V1_0::OperandType operandType)
 {
     Operand op    = {};
-    op.type       = OperandType::TENSOR_FLOAT32;
+    op.type       = operandType;
     op.dimensions = dimensions;
     op.lifetime   = OperandLifeTime::MODEL_INPUT;
 
@@ -144,10 +148,13 @@ void AddInputOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions)
     model.inputIndexes[model.inputIndexes.size() - 1] = model.operands.size() - 1;
 }
 
-void AddOutputOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions)
+void AddOutputOperand(neuralnetworks::V1_0::Model& model,
+                      hidl_vec<uint32_t> dimensions,
+                      neuralnetworks::V1_0::OperandType operandType)
 {
     Operand op = {};
-    op.type       = OperandType::TENSOR_FLOAT32;
+    op.type       = operandType;
+    op.scale      = operandType == neuralnetworks::V1_0::OperandType::TENSOR_QUANT8_ASYMM ? 1.f / 255.f : 0.f;
     op.dimensions = dimensions;
     op.lifetime   = OperandLifeTime::MODEL_OUTPUT;
 
@@ -158,7 +165,7 @@ void AddOutputOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions)
 }
 
 
-android::sp<IPreparedModel> PrepareModelWithStatus(const V1_0::Model& model,
+android::sp<IPreparedModel> PrepareModelWithStatus(const neuralnetworks::V1_0::Model& model,
                                                    armnn_driver::ArmnnDriver& driver,
                                                    ErrorStatus & prepareStatus,
                                                    ErrorStatus expectedStatus)
@@ -176,7 +183,7 @@ android::sp<IPreparedModel> PrepareModelWithStatus(const V1_0::Model& model,
     return cb->GetPreparedModel();
 }
 
-android::sp<IPreparedModel> PrepareModel(const V1_0::Model& model,
+android::sp<IPreparedModel> PrepareModel(const neuralnetworks::V1_0::Model& model,
                                          armnn_driver::ArmnnDriver& driver)
 {
     ErrorStatus prepareStatus = ErrorStatus::NONE;
@@ -187,6 +194,7 @@ ErrorStatus Execute(android::sp<IPreparedModel> preparedModel,
                     const Request& request,
                     ErrorStatus expectedStatus)
 {
+    BOOST_TEST(preparedModel.get() != nullptr);
     android::sp<ExecutionCallback> cb(new ExecutionCallback());
     ErrorStatus execStatus = preparedModel->execute(request, cb);
     BOOST_TEST(execStatus == expectedStatus);
diff --git a/test/DriverTestHelpers.hpp b/test/DriverTestHelpers.hpp
index 57541a35..ccb6b983 100644
--- a/test/DriverTestHelpers.hpp
+++ b/test/DriverTestHelpers.hpp
@@ -72,9 +72,9 @@ android::sp<IMemory> AddPoolAndGetData(uint32_t size, Request& request);
 
 void AddPoolAndSetData(uint32_t size, Request& request, const float* data);
 
-void AddOperand(V1_0::Model& model, const Operand& op);
+void AddOperand(::android::hardware::neuralnetworks::V1_0::Model& model, const Operand& op);
 
-void AddIntOperand(V1_0::Model& model, int32_t value);
+void AddIntOperand(::android::hardware::neuralnetworks::V1_0::Model& model, int32_t value);
 
 template<typename T>
 OperandType TypeToOperandType();
@@ -86,7 +86,10 @@ template<>
 OperandType TypeToOperandType<int32_t>();
 
 template<typename T>
-void AddTensorOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions, T* values)
+void AddTensorOperand(::android::hardware::neuralnetworks::V1_0::Model& model,
+                      hidl_vec<uint32_t> dimensions,
+                      T* values,
+                      OperandType operandType = OperandType::TENSOR_FLOAT32)
 {
     uint32_t totalElements = 1;
     for (uint32_t dim : dimensions)
@@ -99,7 +102,7 @@ void AddTensorOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions, T* valu
     location.length = totalElements * sizeof(T);
 
     Operand op    = {};
-    op.type       = TypeToOperandType<T>();
+    op.type       = operandType;
     op.dimensions = dimensions;
     op.lifetime   = OperandLifeTime::CONSTANT_COPY;
     op.location   = location;
@@ -113,14 +116,18 @@ void AddTensorOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions, T* valu
     AddOperand(model, op);
 }
 
-void AddInputOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions);
+void AddInputOperand(::android::hardware::neuralnetworks::V1_0::Model& model,
+                     hidl_vec<uint32_t> dimensions,
+                     ::android::hardware::neuralnetworks::V1_0::OperandType operandType = OperandType::TENSOR_FLOAT32);
 
-void AddOutputOperand(V1_0::Model& model, hidl_vec<uint32_t> dimensions);
+void AddOutputOperand(::android::hardware::neuralnetworks::V1_0::Model& model,
+                      hidl_vec<uint32_t> dimensions,
+                      ::android::hardware::neuralnetworks::V1_0::OperandType operandType = OperandType::TENSOR_FLOAT32);
 
-android::sp<IPreparedModel> PrepareModel(const V1_0::Model& model,
+android::sp<IPreparedModel> PrepareModel(const ::android::hardware::neuralnetworks::V1_0::Model& model,
                                          armnn_driver::ArmnnDriver& driver);
 
-android::sp<IPreparedModel> PrepareModelWithStatus(const V1_0::Model& model,
+android::sp<IPreparedModel> PrepareModelWithStatus(const ::android::hardware::neuralnetworks::V1_0::Model& model,
                                                    armnn_driver::ArmnnDriver& driver,
                                                    ErrorStatus & prepareStatus,
                                                    ErrorStatus expectedStatus=ErrorStatus::NONE);
diff --git a/test/FullyConnected.cpp b/test/FullyConnected.cpp
index 4feda30b..20a350c5 100644
--- a/test/FullyConnected.cpp
+++ b/test/FullyConnected.cpp
@@ -8,9 +8,9 @@
 
 BOOST_AUTO_TEST_SUITE(FullyConnectedTests)
 
-using ArmnnDriver = armnn_driver::ArmnnDriver;
-using DriverOptions = armnn_driver::DriverOptions;
+using namespace android::hardware;
 using namespace driverTestHelpers;
+using namespace armnn_driver;
 
 // Add our own test here since we fail the fc tests which Google supplies (because of non-const weights)
 BOOST_AUTO_TEST_CASE(FullyConnected)
@@ -19,7 +19,7 @@ BOOST_AUTO_TEST_CASE(FullyConnected)
     // but that uses slightly weird dimensions which I don't think we need to support for now
 
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    V1_0::Model model = {};
+    neuralnetworks::V1_0::Model model = {};
 
     // add operands
     int32_t actValue      = 0;
@@ -34,7 +34,7 @@ BOOST_AUTO_TEST_CASE(FullyConnected)
 
     // make the fully connected operation
     model.operations.resize(1);
-    model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED;
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
     model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
     model.operations[0].outputs = hidl_vec<uint32_t>{4};
 
@@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
             sup = supported;
         };
 
-    V1_0::Model model = {};
+    neuralnetworks::V1_0::Model model = {};
 
     // operands
     int32_t actValue      = 0;
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
 
     model.operations.resize(1);
 
-    model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED;
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
     model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
     model.operations[0].outputs = hidl_vec<uint32_t>{4};
 
@@ -177,7 +177,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
             sup = supported;
         };
 
-    V1_0::Model model = {};
+    neuralnetworks::V1_0::Model model = {};
 
     // operands
     int32_t actValue      = 0;
@@ -200,7 +200,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
 
     model.operations.resize(1);
 
-    model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED;
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
     model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
     model.operations[0].outputs = hidl_vec<uint32_t>{4};
 
diff --git a/test/GenericLayerTests.cpp b/test/GenericLayerTests.cpp
index 7116f0b0..aa91ce15 100644
--- a/test/GenericLayerTests.cpp
+++ b/test/GenericLayerTests.cpp
@@ -8,189 +8,233 @@
 
 BOOST_AUTO_TEST_SUITE(GenericLayerTests)
 
-using ArmnnDriver = armnn_driver::ArmnnDriver;
-using DriverOptions = armnn_driver::DriverOptions;
+using namespace android::hardware;
 using namespace driverTestHelpers;
+using namespace armnn_driver;
 
 BOOST_AUTO_TEST_CASE(GetSupportedOperations)
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
-    ErrorStatus error;
-    std::vector<bool> sup;
+    ErrorStatus errorStatus;
+    std::vector<bool> supported;
 
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+    auto cb = [&](ErrorStatus _errorStatus, const std::vector<bool>& _supported)
     {
-        error = status;
-        sup = supported;
+        errorStatus = _errorStatus;
+        supported = _supported;
     };
 
-    V1_0::Model model1 = {};
+    neuralnetworks::V1_0::Model model0 = {};
 
-    // add operands
+    // Add operands
     int32_t actValue      = 0;
     float   weightValue[] = {2, 4, 1};
     float   biasValue[]   = {4};
 
-    AddInputOperand(model1, hidl_vec<uint32_t>{1, 3});
+    AddInputOperand (model0, hidl_vec<uint32_t>{1, 3});
+    AddTensorOperand(model0, hidl_vec<uint32_t>{1, 3}, weightValue);
+    AddTensorOperand(model0, hidl_vec<uint32_t>{1}, biasValue);
+    AddIntOperand   (model0, actValue);
+    AddOutputOperand(model0, hidl_vec<uint32_t>{1, 1});
+
+    model0.operations.resize(1);
+
+    // Make a correct fully connected operation
+    model0.operations[0].type    = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
+    model0.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
+    model0.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    driver->getSupportedOperations(model0, cb);
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE);
+    BOOST_TEST(supported.size() == (size_t)1);
+    BOOST_TEST(supported[0] == true);
+
+    neuralnetworks::V1_0::Model model1 = {};
+
+    AddInputOperand (model1, hidl_vec<uint32_t>{1, 3});
     AddTensorOperand(model1, hidl_vec<uint32_t>{1, 3}, weightValue);
     AddTensorOperand(model1, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model1, actValue);
+    AddIntOperand   (model1, actValue);
     AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
 
-    // make a correct fully connected operation
     model1.operations.resize(2);
-    model1.operations[0].type = V1_0::OperationType::FULLY_CONNECTED;
+
+    // Make a correct fully connected operation
+    model1.operations[0].type    = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
     model1.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
     model1.operations[0].outputs = hidl_vec<uint32_t>{4};
 
-    // make an incorrect fully connected operation
-    AddIntOperand(model1, actValue);
+    // Add an incorrect fully connected operation
+    AddIntOperand   (model1, actValue);
     AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
-    model1.operations[1].type = V1_0::OperationType::FULLY_CONNECTED;
-    model1.operations[1].inputs = hidl_vec<uint32_t>{4};
+    model1.operations[1].type    = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
+    model1.operations[1].inputs  = hidl_vec<uint32_t>{4}; // Only 1 input operand, expected 4
     model1.operations[1].outputs = hidl_vec<uint32_t>{5};
 
     driver->getSupportedOperations(model1, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == true);
-    BOOST_TEST(sup[1] == false);
 
-    // Broadcast add/mul are not supported
-    V1_0::Model model2 = {};
-
-    AddInputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
-    AddInputOperand(model2, hidl_vec<uint32_t>{4});
+#if defined(ARMNN_ANDROID_P)
+    // In Android P, android::nn::validateModel returns INVALID_ARGUMENT, because of the wrong number of inputs for the
+    // fully connected layer (1 instead of 4)
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::INVALID_ARGUMENT);
+    BOOST_TEST(supported.empty());
+#else
+    // In Android O, android::nn::validateModel indicates that the second (wrong) fully connected layer in unsupported
+    // in the vector of flags returned by the callback
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE);
+    BOOST_TEST(supported.size() == (size_t)2);
+    BOOST_TEST(supported[0] == true);
+    BOOST_TEST(supported[1] == false);
+#endif
+
+    // Test Broadcast on add/mul operators
+    neuralnetworks::V1_0::Model model2 = {};
+
+    AddInputOperand (model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+    AddInputOperand (model2, hidl_vec<uint32_t>{4});
+    AddIntOperand   (model2, actValue);
     AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
     AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
 
     model2.operations.resize(2);
 
-    model2.operations[0].type = V1_0::OperationType::ADD;
-    model2.operations[0].inputs = hidl_vec<uint32_t>{0,1};
-    model2.operations[0].outputs = hidl_vec<uint32_t>{2};
+    model2.operations[0].type    = neuralnetworks::V1_0::OperationType::ADD;
+    model2.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2};
+    model2.operations[0].outputs = hidl_vec<uint32_t>{3};
 
-    model2.operations[1].type = V1_0::OperationType::MUL;
-    model2.operations[1].inputs = hidl_vec<uint32_t>{0,1};
-    model2.operations[1].outputs = hidl_vec<uint32_t>{3};
+    model2.operations[1].type    = neuralnetworks::V1_0::OperationType::MUL;
+    model2.operations[1].inputs  = hidl_vec<uint32_t>{0, 1, 2};
+    model2.operations[1].outputs = hidl_vec<uint32_t>{4};
 
     driver->getSupportedOperations(model2, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == false);
-    BOOST_TEST(sup[1] == false);
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE);
+    BOOST_TEST(supported.size() == (size_t)2);
+    BOOST_TEST(supported[0] == true);
+    BOOST_TEST(supported[1] == true);
 
-    V1_0::Model model3 = {};
+    neuralnetworks::V1_0::Model model3 = {};
 
-    // Add unsupported operation, should return no error but we don't support it
-    AddInputOperand(model3, hidl_vec<uint32_t>{1, 1, 1, 8});
-    AddIntOperand(model3, 2);
+    AddInputOperand (model3, hidl_vec<uint32_t>{1, 1, 1, 8});
+    AddIntOperand   (model3, 2);
     AddOutputOperand(model3, hidl_vec<uint32_t>{1, 2, 2, 2});
+
     model3.operations.resize(1);
-    model3.operations[0].type = V1_0::OperationType::DEPTH_TO_SPACE;
-    model1.operations[0].inputs = hidl_vec<uint32_t>{0, 1};
+
+    // Add unsupported operation, should return no error but we don't support it
+    model3.operations[0].type    = neuralnetworks::V1_0::OperationType::DEPTH_TO_SPACE;
+    model3.operations[0].inputs  = hidl_vec<uint32_t>{0, 1};
     model3.operations[0].outputs = hidl_vec<uint32_t>{2};
 
     driver->getSupportedOperations(model3, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == false);
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE);
+    BOOST_TEST(supported.size() == (size_t)1);
+    BOOST_TEST(supported[0] == false);
+
+    neuralnetworks::V1_0::Model model4 = {};
 
-    // Add invalid operation
-    V1_0::Model model4 = {};
     AddIntOperand(model4, 0);
+
     model4.operations.resize(1);
-    model4.operations[0].type = static_cast<V1_0::OperationType>(100);
+
+    // Add invalid operation
+    model4.operations[0].type    = static_cast<neuralnetworks::V1_0::OperationType>(100);
     model4.operations[0].outputs = hidl_vec<uint32_t>{0};
 
     driver->getSupportedOperations(model4, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::INVALID_ARGUMENT);
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::INVALID_ARGUMENT);
+    BOOST_TEST(supported.empty());
 }
 
 // The purpose of this test is to ensure that when encountering an unsupported operation
-//      it is skipped and getSupportedOperations() continues (rather than failing and stopping).
-//      As per IVGCVSW-710.
+// it is skipped and getSupportedOperations() continues (rather than failing and stopping).
+// As per IVGCVSW-710.
 BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
-    ErrorStatus error;
-    std::vector<bool> sup;
+    ErrorStatus errorStatus;
+    std::vector<bool> supported;
 
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+    auto cb = [&](ErrorStatus _errorStatus, const std::vector<bool>& _supported)
     {
-        error = status;
-        sup = supported;
+        errorStatus = _errorStatus;
+        supported = _supported;
     };
 
-    V1_0::Model model = {};
+    neuralnetworks::V1_0::Model model = {};
 
-    // operands
+    // Operands
     int32_t actValue      = 0;
     float   weightValue[] = {2, 4, 1};
     float   biasValue[]   = {4};
 
-    // broadcast add is unsupported at the time of writing this test, but any unsupported layer will do
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
-    AddInputOperand(model, hidl_vec<uint32_t>{4});
+    // HASHTABLE_LOOKUP is unsupported at the time of writing this test, but any unsupported layer will do
+    AddInputOperand (model, hidl_vec<uint32_t>{1, 1, 3, 4}, neuralnetworks::V1_0::OperandType::TENSOR_INT32);
+    AddInputOperand (model, hidl_vec<uint32_t>{4},          neuralnetworks::V1_0::OperandType::TENSOR_INT32);
+    AddInputOperand (model, hidl_vec<uint32_t>{1, 1, 3, 4});
     AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4}, neuralnetworks::V1_0::OperandType::TENSOR_QUANT8_ASYMM);
 
-    // fully connected
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+    // Fully connected is supported
+    AddInputOperand (model, hidl_vec<uint32_t>{1, 3});
     AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
     AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model, actValue);
+    AddIntOperand   (model, actValue);
     AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
 
-    // broadcast mul is unsupported
+    // EMBEDDING_LOOKUP is unsupported
     AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
 
     model.operations.resize(3);
 
-    // unsupported
-    model.operations[0].type = V1_0::OperationType::ADD;
-    model.operations[0].inputs = hidl_vec<uint32_t>{0,1};
-    model.operations[0].outputs = hidl_vec<uint32_t>{2};
+    // Unsupported
+    model.operations[0].type    = neuralnetworks::V1_0::OperationType::HASHTABLE_LOOKUP;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2};
+    model.operations[0].outputs = hidl_vec<uint32_t>{3, 4};
 
-    // supported
-    model.operations[1].type = V1_0::OperationType::FULLY_CONNECTED;
-    model.operations[1].inputs  = hidl_vec<uint32_t>{3, 4, 5, 6};
-    model.operations[1].outputs = hidl_vec<uint32_t>{7};
+    // Supported
+    model.operations[1].type    = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED;
+    model.operations[1].inputs  = hidl_vec<uint32_t>{5, 6, 7, 8};
+    model.operations[1].outputs = hidl_vec<uint32_t>{9};
 
-    // unsupported
-    model.operations[2].type = V1_0::OperationType::MUL;
-    model.operations[2].inputs = hidl_vec<uint32_t>{0,1};
-    model.operations[2].outputs = hidl_vec<uint32_t>{8};
+    // Unsupported
+    model.operations[2].type    = neuralnetworks::V1_0::OperationType::EMBEDDING_LOOKUP;
+    model.operations[2].inputs  = hidl_vec<uint32_t>{1, 2};
+    model.operations[2].outputs = hidl_vec<uint32_t>{10};
 
-    // we are testing that the unsupported layers return false and the test continues
-    //      rather than failing and stopping.
+    // We are testing that the unsupported layers return false and the test continues rather than failing and stopping
     driver->getSupportedOperations(model, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == false);
-    BOOST_TEST(sup[1] == true);
-    BOOST_TEST(sup[2] == false);
+    BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE);
+    BOOST_TEST(supported.size() == (size_t)3);
+    BOOST_TEST(supported[0] == false);
+    BOOST_TEST(supported[1] == true);
+    BOOST_TEST(supported[2] == false);
 }
 
 // The purpose of this test is to ensure that when encountering an failure
-//      during mem pool mapping we properly report an error to the framework via a callback
+// during mem pool mapping we properly report an error to the framework via a callback
 BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
 {
     auto driver = std::make_unique<ArmnnDriver>(armnn::Compute::CpuRef);
 
-    ErrorStatus error;
-    std::vector<bool> sup;
+    ErrorStatus errorStatus;
+    std::vector<bool> supported;
 
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+    auto cb = [&](ErrorStatus _errorStatus, const std::vector<bool>& _supported)
     {
-        error = status;
-        sup = supported;
+        errorStatus = _errorStatus;
+        supported = _supported;
     };
 
-    V1_0::Model model = {};
+    neuralnetworks::V1_0::Model model = {};
 
     model.pools = hidl_vec<hidl_memory>{hidl_memory("Unsuported hidl memory type", nullptr, 0)};
 
-    //memory pool mapping should fail, we should report an error
+    // Memory pool mapping should fail, we should report an error
     driver->getSupportedOperations(model, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::GENERAL_FAILURE);
+    BOOST_TEST((int)errorStatus != (int)ErrorStatus::NONE);
+    BOOST_TEST(supported.empty());
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Lstm.cpp b/test/Lstm.cpp
new file mode 100644
index 00000000..1b6ef60d
--- /dev/null
+++ b/test/Lstm.cpp
@@ -0,0 +1,1397 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <log/log.h>
+
+#include "OperationsUtils.h"
+
+#include <cmath>
+
+BOOST_AUTO_TEST_SUITE(LstmTests)
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace driverTestHelpers;
+using namespace android::hardware;
+
+namespace
+{
+
+template<typename T>
+RequestArgument CreateRequestArgument(std::vector<T> value, unsigned int poolIndex)
+{
+    DataLocation inputInloc = {};
+    inputInloc.poolIndex = poolIndex;
+    inputInloc.offset = 0;
+    inputInloc.length = value.size() * sizeof(T);
+    RequestArgument inputRequestArgument = {};
+    inputRequestArgument.location = inputInloc;
+    inputRequestArgument.dimensions = hidl_vec<uint32_t>{};
+    return inputRequestArgument;
+}
+
+// Returns true if the relative difference between two float values is less than the tolerance value given.
+// This is used because the floating point comparison tolerance (set on each BOOST_AUTO_TEST_CASE) does not work!
+bool TolerantCompareEqual(float a, float b, float tolerance = 0.00001f)
+{
+    float rd;
+    if (a == 0.0f)
+    {
+        rd = fabs(b);
+    }
+    else if (b == 0.0f)
+    {
+        rd = fabs(a);
+    }
+    else
+    {
+        rd = boost::math::relative_difference(a, b);
+    }
+    return rd < tolerance;
+}
+
+} // namespace
+
+// Add our own tests here since we fail the lstm tests which Google supplies (because of non-const weights)
+
+void LstmTestImpl(hidl_vec<uint32_t> inputDimensions,
+                  std::vector<float> inputValue,
+                  hidl_vec<uint32_t> inputToInputWeightsDimensions,
+                  float*             inputToInputWeightsValue,
+                  hidl_vec<uint32_t> inputToForgetWeightsDimensions,
+                  float*             inputToForgetWeightsValue,
+                  hidl_vec<uint32_t> inputToCellWeightsDimensions,
+                  float*             inputToCellWeightsValue,
+                  hidl_vec<uint32_t> inputToOutputWeightsDimensions,
+                  float*             inputToOutputWeightsValue,
+                  hidl_vec<uint32_t> recurrentToInputWeightsDimensions,
+                  float*             recurrentToInputWeightsValue,
+                  hidl_vec<uint32_t> recurrentToForgetWeightsDimensions,
+                  float*             recurrentToForgetWeightsValue,
+                  hidl_vec<uint32_t> recurrentToCellWeightsDimensions,
+                  float*             recurrentToCellWeightsValue,
+                  hidl_vec<uint32_t> recurrentToOutputWeightsDimensions,
+                  float*             recurrentToOutputWeightsValue,
+                  hidl_vec<uint32_t> cellToInputWeightsDimensions,
+                  float*             cellToInputWeightsValue,
+                  hidl_vec<uint32_t> cellToForgetWeightsDimensions,
+                  float*             cellToForgetWeightsValue,
+                  hidl_vec<uint32_t> cellToOutputWeightsDimensions,
+                  float*             cellToOutputWeightsValue,
+                  hidl_vec<uint32_t> inputGateBiasDimensions,
+                  float*             inputGateBiasValue,
+                  hidl_vec<uint32_t> forgetGateBiasDimensions,
+                  float*             forgetGateBiasValue,
+                  hidl_vec<uint32_t> cellBiasDimensions,
+                  float*             cellBiasValue,
+                  hidl_vec<uint32_t> outputGateBiasDimensions,
+                  float*             outputGateBiasValue,
+                  hidl_vec<uint32_t> projectionWeightsDimensions,
+                  float*             projectionWeightsValue,
+                  hidl_vec<uint32_t> projectionBiasDimensions,
+                  float*             projectionBiasValue,
+                  hidl_vec<uint32_t> outputStateInDimensions,
+                  std::vector<float> outputStateInValue,
+                  hidl_vec<uint32_t> cellStateInDimensions,
+                  std::vector<float> cellStateInValue,
+                  hidl_vec<uint32_t> activationFunctionDimensions,
+                  int32_t*           activationFunctionValue,
+                  hidl_vec<uint32_t> cellClippingThresholdDimensions,
+                  float*             cellClippingThresholdValue,
+                  hidl_vec<uint32_t> projectionClippingThresholdDimensions,
+                  float*             projectionClippingThresholdValue,
+                  hidl_vec<uint32_t> scratchBufferDimensions,
+                  std::vector<float> scratchBufferValue,
+                  hidl_vec<uint32_t> outputStateOutDimensions,
+                  std::vector<float> outputStateOutValue,
+                  hidl_vec<uint32_t> cellStateOutDimensions,
+                  std::vector<float> cellStateOutValue,
+                  hidl_vec<uint32_t> outputDimensions,
+                  std::vector<float> outputValue)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::GpuAcc));
+    neuralnetworks::V1_0::Model model = {};
+
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    AddInputOperand(model, inputDimensions);
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    AddTensorOperand(model, inputToInputWeightsDimensions, inputToInputWeightsValue);
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    AddTensorOperand(model, inputToForgetWeightsDimensions, inputToForgetWeightsValue);
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    AddTensorOperand(model, inputToCellWeightsDimensions, inputToCellWeightsValue);
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    AddTensorOperand(model, inputToOutputWeightsDimensions, inputToOutputWeightsValue);
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    AddTensorOperand(model, recurrentToInputWeightsDimensions, recurrentToInputWeightsValue);
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    AddTensorOperand(model, recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue);
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    AddTensorOperand(model, recurrentToCellWeightsDimensions, recurrentToCellWeightsValue);
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    AddTensorOperand(model, recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue);
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, cellToInputWeightsDimensions, cellToInputWeightsValue);
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, cellToForgetWeightsDimensions, cellToForgetWeightsValue);
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, cellToOutputWeightsDimensions, cellToOutputWeightsValue);
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, inputGateBiasDimensions, inputGateBiasValue);
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, forgetGateBiasDimensions, forgetGateBiasValue);
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, cellBiasDimensions, cellBiasValue);
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand(model, outputGateBiasDimensions, outputGateBiasValue);
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    AddTensorOperand(model, projectionWeightsDimensions, projectionWeightsValue);
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    AddTensorOperand(model, projectionBiasDimensions, projectionBiasValue);
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    AddInputOperand(model, outputStateInDimensions);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    AddInputOperand(model, cellStateInDimensions);
+
+    // constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    AddTensorOperand(model, activationFunctionDimensions,
+                     activationFunctionValue, OperandType::INT32);
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    AddTensorOperand(model, cellClippingThresholdDimensions,
+                     cellClippingThresholdValue, OperandType::FLOAT32);
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    AddTensorOperand(model, projectionClippingThresholdDimensions,
+                     projectionClippingThresholdValue, OperandType::FLOAT32);
+
+    // Outputs:
+    //  0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with
+    //     CIFG, or [batch_size, num_units * 3] without CIFG.
+    AddOutputOperand(model, scratchBufferDimensions);
+    //  1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    AddOutputOperand(model, outputStateOutDimensions);
+    //  2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    AddOutputOperand(model, cellStateOutDimensions);
+    //  3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is
+    //     effectively the same as the current “output state (out)” value.
+    AddOutputOperand(model, outputDimensions);
+
+    // make the lstm operation
+    model.operations.resize(1);
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::LSTM;
+    model.operations[0].inputs =
+        hidl_vec<uint32_t> {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22};
+    model.operations[0].outputs = hidl_vec<uint32_t> {23, 24, 25, 26};
+
+    // define the input values
+    hidl_vec<RequestArgument> inputArguments;
+    inputArguments.resize(3);
+
+    inputArguments[0] = CreateRequestArgument<float>(inputValue, 0);
+    inputArguments[1] = CreateRequestArgument<float>(outputStateInValue, 1);
+    inputArguments[2] = CreateRequestArgument<float>(cellStateInValue, 2);
+
+    // define the expected output values
+    hidl_vec<RequestArgument> outputArguments;
+    outputArguments.resize(4);
+
+    outputArguments[0] = CreateRequestArgument<float>(scratchBufferValue, 3);
+    outputArguments[1] = CreateRequestArgument<float>(outputStateOutValue, 4);
+    outputArguments[2] = CreateRequestArgument<float>(cellStateOutValue, 5);
+    outputArguments[3] = CreateRequestArgument<float>(outputValue, 6);
+
+    Request request = {};
+    request.inputs  = inputArguments;
+    request.outputs = outputArguments;
+
+    // set the input data
+    AddPoolAndSetData(inputValue.size(), request, inputValue.data());
+    AddPoolAndSetData(outputStateInValue.size(), request, outputStateInValue.data());
+    AddPoolAndSetData(cellStateInValue.size(), request, cellStateInValue.data());
+
+    // add memory for the outputs
+    AddPoolAndGetData(scratchBufferValue.size(), request);
+    android::sp<IMemory> outputStateOutMemory = AddPoolAndGetData(outputStateOutValue.size(), request);
+    float* outputStateOutData = static_cast<float*>(static_cast<void*>(outputStateOutMemory->getPointer()));
+    android::sp<IMemory> cellStateOutMemory = AddPoolAndGetData(cellStateOutValue.size(), request);
+    float* cellStateOutData = static_cast<float*>(static_cast<void*>(cellStateOutMemory->getPointer()));
+    android::sp<IMemory> outputMemory = AddPoolAndGetData(outputValue.size(), request);
+    float* outputData = static_cast<float*>(static_cast<void*>(outputMemory->getPointer()));
+
+    // make the prepared model and run the execution
+    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+    if (preparedModel.get() != nullptr)
+    {
+        Execute(preparedModel, request);
+    }
+
+    // check the results
+    for (size_t i = 0; i < outputStateOutValue.size(); ++i)
+    {
+        BOOST_TEST(TolerantCompareEqual(outputStateOutValue[i], outputStateOutData[i]),
+                   "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != " << outputStateOutData[i]);
+    }
+    for (size_t i = 0; i < cellStateOutValue.size(); ++i)
+    {
+        BOOST_TEST(TolerantCompareEqual(cellStateOutValue[i], cellStateOutData[i]),
+                   "cellStateOut[" << i << "]: " << cellStateOutValue[i] << " != " << cellStateOutData[i]);
+    }
+    for (size_t i = 0; i < outputValue.size(); ++i)
+    {
+        BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i]),
+                   "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+    }
+}
+
+BOOST_AUTO_TEST_CASE(LstmNoCifgNoPeepholeNoProjection)
+{
+    // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm.model.cpp
+    // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm.example.cpp
+    // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors).
+
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions({1, 2});
+    std::vector<float> inputValue {2.0f, 3.0f};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions({4, 2});
+    float inputToInputWeightsValue[] = {-0.45018822f, -0.02338299f,
+                                        -0.08705890f, -0.34550029f,
+                                         0.04266912f, -0.15680569f,
+                                        -0.34856534f,  0.43890524f};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions({4, 2});
+    float inputToForgetWeightsValue[] = { 0.09701663f,  0.20334584f,
+                                         -0.50592935f, -0.31343272f,
+                                         -0.40032279f,  0.44781327f,
+                                          0.01387155f, -0.35593212f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions({4, 2});
+    float inputToCellWeightsValue[] = {-0.50013041f,  0.13702840f,
+                                        0.11810488f,  0.20131630f,
+                                       -0.20583314f,  0.44344562f,
+                                        0.22077113f, -0.29909778f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions({4, 2});
+    float inputToOutputWeightsValue[] = {-0.25065863f, -0.28290087f,
+                                          0.04613829f,  0.40525138f,
+                                          0.44272184f,  0.03897077f,
+                                         -0.15568960f,  0.19487578f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions({4, 4});
+    float recurrentToInputWeightsValue[] = {-0.00635350f, -0.20423880f,  0.31454784f, -0.35746509f,
+                                             0.28902304f,  0.08183324f, -0.16555229f,  0.02286911f,
+                                            -0.13566875f,  0.03034258f,  0.48091322f, -0.12528998f,
+                                             0.24077177f, -0.51332325f, -0.33502164f,  0.10629296f};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions({4, 4});
+    float recurrentToForgetWeightsValue[] = {-0.48684245f, -0.06655136f,  0.42224967f,  0.21126390f,
+                                              0.27654213f,  0.20864892f, -0.07646349f,  0.45877004f,
+                                              0.00141793f, -0.14609534f,  0.36447752f,  0.09196436f,
+                                              0.28053468f,  0.01560611f, -0.20127171f, -0.01140004f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions({4, 4});
+    float recurrentToCellWeightsValue[] = {-0.34074140f,  0.24443203f, -0.20785320f,  0.26320225f,
+                                            0.05695659f, -0.00123841f, -0.47447860f, -0.35869038f,
+                                           -0.06418842f, -0.13502428f, -0.50176400f,  0.22830659f,
+                                           -0.46367589f,  0.26016325f, -0.03894562f, -0.16368064f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions({4, 4});
+    float recurrentToOutputWeightsValue[] = { 0.43385774f, -0.17194885f,  0.27182370f,  0.09215671f,
+                                              0.24107647f, -0.39835793f,  0.18212086f,  0.01301402f,
+                                              0.48572797f, -0.50656658f,  0.20047462f, -0.20607421f,
+                                             -0.51818722f, -0.15390486f,  0.04681480f,  0.39922136f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions({0});
+    float cellToInputWeightsValue[] = {};
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions({0});
+    float cellToForgetWeightsValue[] = {};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions({0});
+    float cellToOutputWeightsValue[] = {};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions({4});
+    float inputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions({4});
+    float forgetGateBiasValue[] = {1.0f, 1.0f, 1.0f, 1.0f};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions({4});
+    float cellBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions({4});
+    float outputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions({0});
+    float projectionWeightsValue[] = {};
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions({0});
+    float projectionBiasValue[] = {};
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions({1, 4});
+    std::vector<float> outputStateInValue {0, 0, 0, 0};
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions({1, 4});
+    std::vector<float> cellStateInValue {0, 0, 0, 0};
+
+    // constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t> activationFunctionDimensions({});
+    int32_t activationFunctionValue[] = {4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> cellClippingThresholdDimensions({});
+    float cellClippingThresholdValue[] = {0.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> projectionClippingThresholdDimensions({});
+    float projectionClippingThresholdValue[] = {0.0f};
+
+    // Outputs:
+    //  0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with
+    //     CIFG, or [batch_size, num_units * 3] without CIFG.
+    hidl_vec<uint32_t> scratchBufferDimensions({1, 12});
+    std::vector<float> scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    //  1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateOutDimensions({1, 4});
+    std::vector<float> outputStateOutValue {-0.0297319f, 0.122947f, 0.208851f, -0.153588f};
+    //  2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateOutDimensions({1, 4});
+    std::vector<float> cellStateOutValue {-0.145439f, 0.157475f, 0.293663f, -0.277353f};
+    //  3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is
+    //     effectively the same as the current “output state (out)” value.
+    hidl_vec<uint32_t> outputDimensions({1, 4});
+    std::vector<float> outputValue {-0.02973187f, 0.1229473f, 0.20885126f, -0.15358765f};
+
+    LstmTestImpl(inputDimensions,                       inputValue,
+                 inputToInputWeightsDimensions,         inputToInputWeightsValue,
+                 inputToForgetWeightsDimensions,        inputToForgetWeightsValue,
+                 inputToCellWeightsDimensions,          inputToCellWeightsValue,
+                 inputToOutputWeightsDimensions,        inputToOutputWeightsValue,
+                 recurrentToInputWeightsDimensions,     recurrentToInputWeightsValue,
+                 recurrentToForgetWeightsDimensions,    recurrentToForgetWeightsValue,
+                 recurrentToCellWeightsDimensions,      recurrentToCellWeightsValue,
+                 recurrentToOutputWeightsDimensions,    recurrentToOutputWeightsValue,
+                 cellToInputWeightsDimensions,          cellToInputWeightsValue,
+                 cellToForgetWeightsDimensions,         cellToForgetWeightsValue,
+                 cellToOutputWeightsDimensions,         cellToOutputWeightsValue,
+                 inputGateBiasDimensions,               inputGateBiasValue,
+                 forgetGateBiasDimensions,              forgetGateBiasValue,
+                 cellBiasDimensions,                    cellBiasValue,
+                 outputGateBiasDimensions,              outputGateBiasValue,
+                 projectionWeightsDimensions,           projectionWeightsValue,
+                 projectionBiasDimensions,              projectionBiasValue,
+                 outputStateInDimensions,               outputStateInValue,
+                 cellStateInDimensions,                 cellStateInValue,
+                 activationFunctionDimensions,          activationFunctionValue,
+                 cellClippingThresholdDimensions,       cellClippingThresholdValue,
+                 projectionClippingThresholdDimensions, projectionClippingThresholdValue,
+                 scratchBufferDimensions,               scratchBufferValue,
+                 outputStateOutDimensions,              outputStateOutValue,
+                 cellStateOutDimensions,                cellStateOutValue,
+                 outputDimensions,                      outputValue);
+}
+
+BOOST_AUTO_TEST_CASE(LstmCifgPeepholeNoProjection)
+{
+    // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm2.model.cpp
+    // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm2.example.cpp
+    // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors).
+
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions({1, 2});
+    std::vector<float> inputValue {2.0f, 3.0f};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions({0});
+    float inputToInputWeightsValue[] = {};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions({4, 2});
+    float inputToForgetWeightsValue[] = {-0.55291498f, -0.42866567f,
+                                          0.13056988f, -0.36333650f,
+                                         -0.22755712f,  0.28253698f,
+                                          0.24407166f,  0.33826375f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions({4, 2});
+    float inputToCellWeightsValue[] = {-0.49770179f, -0.27711356f,
+                                       -0.09624726f,  0.05100781f,
+                                        0.04717243f,  0.48944736f,
+                                       -0.38535351f, -0.17212132f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions({4, 2});
+    float inputToOutputWeightsValue[] = { 0.10725588f, -0.02335852f,
+                                         -0.55932593f, -0.09426838f,
+                                         -0.44257352f,  0.54939759f,
+                                          0.01533556f,  0.42751634f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions({0});       // VTS was {4, 4} -> {0} ?
+    float recurrentToInputWeightsValue[] = {};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions({4, 4});
+    float recurrentToForgetWeightsValue[] = {-0.13832897f, -0.05151010f, -0.23590070f, -0.16661474f,
+                                             -0.14340827f,  0.36986142f,  0.23414481f,  0.55899000f,
+                                              0.10798943f, -0.41174671f,  0.17751795f, -0.34484994f,
+                                             -0.35874045f, -0.11352962f,  0.27268326f,  0.54058349f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions({4, 4});
+    float recurrentToCellWeightsValue[] = { 0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f,
+                                            0.42957711f,  0.01841056f, -0.32764608f, -0.33027974f,
+                                           -0.10826075f,  0.20675004f,  0.19069612f, -0.03026325f,
+                                           -0.54532051f,  0.33003211f,  0.44901288f,  0.21193194f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions({4, 4});
+    float recurrentToOutputWeightsValue[] = { 0.41613156f,  0.42610586f, -0.16495961f, -0.56638730f,
+                                              0.30579174f, -0.05115908f, -0.33941799f,  0.23364776f,
+                                              0.11178309f,  0.09481031f, -0.26424935f,  0.46261835f,
+                                              0.50248802f,  0.26114327f, -0.43736315f,  0.33149987f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions({0});
+    float cellToInputWeightsValue[] = {};
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions({4});
+    float cellToForgetWeightsValue[] = {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions({4});
+    float cellToOutputWeightsValue[] = {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions({0});                    // VTS was {4} -> {0} ?
+    float inputGateBiasValue[] = {};
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions({4});
+    float forgetGateBiasValue[] = {1.0f, 1.0f, 1.0f, 1.0f};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions({4});
+    float cellBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions({4});
+    float outputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions({0});
+    float projectionWeightsValue[] = {};
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions({0});
+    float projectionBiasValue[] = {};
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions({1, 4});
+    std::vector<float> outputStateInValue {0, 0, 0, 0};
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions({1, 4});
+    std::vector<float> cellStateInValue {0, 0, 0, 0};
+
+    // constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t> activationFunctionDimensions({});
+    int32_t activationFunctionValue[] = {4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> cellClippingThresholdDimensions({});
+    float cellClippingThresholdValue[] = {0.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> projectionClippingThresholdDimensions({});
+    float projectionClippingThresholdValue[] = {0.0f};
+
+    // Outputs:
+    //  0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with
+    //     CIFG, or [batch_size, num_units * 3] without CIFG.
+    hidl_vec<uint32_t> scratchBufferDimensions({1, 16});                // VTS was {1, 12} -> {1, 16}
+    std::vector<float> scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    //  1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateOutDimensions({1, 4});
+    std::vector<float> outputStateOutValue {-0.364445f, -0.00352185f, 0.128866f, -0.0516365f};
+    //  2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateOutDimensions({1, 4});
+    std::vector<float> cellStateOutValue {-0.760444f, -0.0180416f, 0.182264f, -0.0649371f};
+    //  3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is
+    //     effectively the same as the current “output state (out)” value.
+    hidl_vec<uint32_t> outputDimensions({1, 4});
+    std::vector<float> outputValue {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f};
+
+    LstmTestImpl(inputDimensions,                       inputValue,
+                 inputToInputWeightsDimensions,         inputToInputWeightsValue,
+                 inputToForgetWeightsDimensions,        inputToForgetWeightsValue,
+                 inputToCellWeightsDimensions,          inputToCellWeightsValue,
+                 inputToOutputWeightsDimensions,        inputToOutputWeightsValue,
+                 recurrentToInputWeightsDimensions,     recurrentToInputWeightsValue,
+                 recurrentToForgetWeightsDimensions,    recurrentToForgetWeightsValue,
+                 recurrentToCellWeightsDimensions,      recurrentToCellWeightsValue,
+                 recurrentToOutputWeightsDimensions,    recurrentToOutputWeightsValue,
+                 cellToInputWeightsDimensions,          cellToInputWeightsValue,
+                 cellToForgetWeightsDimensions,         cellToForgetWeightsValue,
+                 cellToOutputWeightsDimensions,         cellToOutputWeightsValue,
+                 inputGateBiasDimensions,               inputGateBiasValue,
+                 forgetGateBiasDimensions,              forgetGateBiasValue,
+                 cellBiasDimensions,                    cellBiasValue,
+                 outputGateBiasDimensions,              outputGateBiasValue,
+                 projectionWeightsDimensions,           projectionWeightsValue,
+                 projectionBiasDimensions,              projectionBiasValue,
+                 outputStateInDimensions,               outputStateInValue,
+                 cellStateInDimensions,                 cellStateInValue,
+                 activationFunctionDimensions,          activationFunctionValue,
+                 cellClippingThresholdDimensions,       cellClippingThresholdValue,
+                 projectionClippingThresholdDimensions, projectionClippingThresholdValue,
+                 scratchBufferDimensions,               scratchBufferValue,
+                 outputStateOutDimensions,              outputStateOutValue,
+                 cellStateOutDimensions,                cellStateOutValue,
+                 outputDimensions,                      outputValue);
+}
+
+BOOST_AUTO_TEST_CASE(LstmNoCifgPeepholeProjection)
+{
+    // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm3.model.cpp
+    // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm3.example.cpp
+    // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors).
+
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions({2, 5});
+    std::vector<float> inputValue {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
+                                   0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions({20, 5});
+    float inputToInputWeightsValue[] = { 0.0213936830f,  0.0612455100f,  0.0469051670f, -0.0146576770f, -0.0314946300f,
+                                         0.0917180300f,  0.1464780100f,  0.1079719300f, -0.0057968358f,  0.0019193048f,
+                                        -0.2726754000f,  0.1015402900f, -0.0185398850f,  0.0803498850f, -0.1026238500f,
+                                        -0.0225997870f, -0.0912115500f, -0.0086759670f, -0.0452061030f, -0.0821282000f,
+                                        -0.0080459520f,  0.0154780810f,  0.0552172470f,  0.0387195870f,  0.0441536270f,
+                                        -0.0645324300f,  0.0503182500f, -0.0469351080f, -0.0081644309f,  0.0145742260f,
+                                        -0.1671009000f, -0.1551955200f, -0.1681979700f, -0.1397126900f, -0.1195305900f,
+                                         0.2500548700f, -0.2279098300f,  0.0098550870f, -0.0281409580f, -0.1120069800f,
+                                         0.1129540800f, -0.0035217577f,  0.0544850750f,  0.0518469500f,  0.0647112060f,
+                                         0.1098919300f,  0.1167478600f,  0.0349060700f,  0.0772735700f,  0.1139058500f,
+                                        -0.1863375000f, -0.1034451000f, -0.1394518900f, -0.0494012270f, -0.1876706300f,
+                                         0.0424839030f,  0.1423355200f,  0.1383258100f,  0.1835016500f,  0.1454560300f,
+                                        -0.0285457040f,  0.0249395310f,  0.0509297180f,  0.0076203286f, -0.0029723682f,
+                                        -0.0424842240f, -0.1182759600f, -0.0917110400f, -0.1080862800f, -0.1632798800f,
+                                        -0.2273378000f, -0.0993647000f, -0.0171551070f,  0.0023917493f,  0.0492727640f,
+                                         0.0038534778f,  0.0547645050f,  0.0897537840f,  0.0694723400f,  0.0801447600f,
+                                        -0.0454423400f, -0.0497073000f, -0.0713563100f, -0.0489291060f, -0.0040420120f,
+                                        -0.0092840260f,  0.0180420540f,  0.0036860977f, -0.0742730200f, -0.1143460400f,
+                                        -0.0189954560f,  0.0314875430f,  0.0128349080f,  0.0199777540f,  0.0442566540f,
+                                        -0.3929261300f, -0.1851933400f, -0.1165128100f, -0.0680989200f,  0.0113736770f};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions({20, 5});
+    float inputToForgetWeightsValue[] = {-0.0018401089f, -0.0048522370f,  0.0369842400f,  0.0141817040f,  0.0282732360f,
+                                         -0.0167261940f, -0.0524975900f, -0.1020426100f,  0.0086106600f, -0.0409795050f,
+                                         -0.0098991870f,  0.0192389200f, -0.0281772690f, -0.0853510300f, -0.1458549500f,
+                                          0.1066256700f, -0.0190973100f, -0.0178835340f, -0.0047269356f, -0.0451033230f,
+                                          0.0030784295f,  0.0767847750f,  0.0746369600f,  0.0945313950f,  0.0814421000f,
+                                         -0.1225789900f, -0.0339457580f, -0.0313034650f,  0.0456306260f,  0.0684388700f,
+                                         -0.1349294500f, -0.0124800070f, -0.0811829000f, -0.0722449900f, -0.0962879100f,
+                                          0.0451009460f,  0.0012300825f,  0.0139646620f,  0.0993723940f,  0.0254305900f,
+                                          0.0695832400f,  0.0342572960f,  0.0482646000f,  0.0626799700f,  0.0526250680f,
+                                          0.1278466600f,  0.0707789700f,  0.0257259350f,  0.0416500900f,  0.0724190500f,
+                                          0.0186686440f, -0.0373772940f, -0.0627778300f, -0.0883363600f, -0.0401206050f,
+                                         -0.0114055860f, -0.0078083350f, -0.0103013860f, -0.0051021670f,  0.0277174640f,
+                                          0.0548342300f,  0.1144911100f,  0.1128965200f,  0.1093983900f,  0.1339650600f,
+                                         -0.0840216600f, -0.0190146200f, -0.0446783040f, -0.0772056500f,  0.0143500630f,
+                                         -0.1175795800f, -0.0652038000f, -0.0818573300f, -0.0767543240f, -0.0926143750f,
+                                          0.1040549100f,  0.0529603360f,  0.0357558950f,  0.0358393860f, -0.0125405530f,
+                                          0.0368812980f,  0.0291337600f,  0.0342015900f,  0.0544844700f, -0.0545233530f,
+                                          0.0258271500f,  0.0232735500f, -0.0118571790f, -0.0011980024f, -0.0346417170f,
+                                         -0.0261250940f, -0.1758261500f, -0.1592365700f, -0.2748677400f, -0.0006143371f,
+                                          0.0001771948f, -8.470171e-05f,  0.0265180700f,  0.0457907650f,  0.069564960f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions({20, 5});
+    float inputToCellWeightsValue[] = {-0.0458028300f, -0.0954946200f, -0.0324189850f, -0.0645463300f, -0.0435284530f,
+                                        0.0430185870f, -0.0491523440f, -0.1241814400f, -0.0789854750f, -0.0759688900f,
+                                        0.0194843620f, -0.1143496200f, -0.0074034138f, -0.0631484400f, -0.0929814950f,
+                                        0.0062155537f, -0.0250343380f, -0.0028890965f,  0.0489295270f,  0.0623507500f,
+                                        0.1066591800f, -0.0320367920f, -0.0850591600f, -0.1084335800f, -0.1300243300f,
+                                       -0.0368164370f, -0.0213013400f, -0.0165182390f,  0.0047691227f, -0.0025825808f,
+                                        0.0660178660f,  0.0299915340f, -0.1065283600f, -0.1037554000f, -0.1305607100f,
+                                       -0.0326664300f, -0.0337024140f, -0.0064734240f, -0.0461169200f,  0.0144193390f,
+                                       -0.0251743230f,  0.0396852000f,  0.0817775060f,  0.0615746800f,  0.1021009500f,
+                                       -0.0096581940f,  0.0465117170f,  0.0360390600f,  0.0069369148f,  0.0159600950f,
+                                       -0.0650766600f,  0.0955159800f,  0.0535688360f,  0.0640871400f,  0.1283566700f,
+                                       -0.0087143290f, -0.2021196600f, -0.1209367400f,  0.0294504720f,  0.2849013000f,
+                                       -0.0292279010f,  0.1164364000f, -0.0856026300f,  0.0994178600f, -0.0369995650f,
+                                       -0.0288426260f, -0.0033637602f, -0.0170129020f, -0.0972086500f, -0.1119335100f,
+                                       -0.0291551170f, -0.0179360340f, -0.0097689360f, -0.0422332400f, -0.0361596350f,
+                                        0.0650511200f, -0.0217428920f, -0.0233772120f, -0.0722136400f, -0.0643055200f,
+                                        0.0545386500f,  0.0911498140f,  0.0638733100f,  0.0075183930f,  0.0559609530f,
+                                        0.0697793440f,  0.0464111680f,  0.1050991100f,  0.0746389400f,  0.0075130584f,
+                                        0.0128509820f,  0.0455543100f,  0.0569556880f,  0.0655528500f,  0.0508014560f,
+                                       -0.0098626830f,  0.0082677200f, -0.0265556090f, -0.0073611983f, -0.0014897042f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions({20, 5});
+    float inputToOutputWeightsValue[] = {-0.0998932000f, -0.0720195600f, -0.0528037730f, -0.1562959300f, -0.1500191800f,
+                                         -0.0765075100f,  0.0235985500f, -0.0751553550f, -0.0803770900f, -0.1509353400f,
+                                          0.0295175520f, -0.0475139300f,  0.0103505310f, -0.0266485100f, -0.0168397220f,
+                                         -0.0231211630f,  0.0077019283f,  0.0128512570f, -0.0504064900f, -0.0129761000f,
+                                         -0.0217377470f, -0.0383057930f, -0.0687058600f, -0.0148124700f, -0.0012853940f,
+                                          0.1012423600f,  0.0831228350f,  0.0533130060f, -0.0622356460f, -0.0756371540f,
+                                         -0.0278339030f,  0.0297749710f,  0.1130802000f,  0.0921890600f,  0.0950613500f,
+                                         -0.0866657640f, -0.0371627060f, -0.0388809140f, -0.0358328450f, -0.0144815640f,
+                                         -0.0982500300f, -0.1204856900f, -0.0976655860f, -0.0528763300f, -0.0964047000f,
+                                         -0.1136642900f,  0.0357775050f,  0.1356881900f,  0.0524513830f,  0.0506493040f,
+                                          0.0579895100f, -0.0218523350f, -0.0998488440f,  0.0147404750f, -0.0788979460f,
+                                          0.0497469900f,  0.0141604730f,  0.0697393200f,  0.0496494200f,  0.0333646460f,
+                                          0.0819012400f,  0.0255353670f,  0.0508931650f,  0.0485142540f,  0.0694581300f,
+                                         -0.0789075640f, -0.0670761600f, -0.1184450800f, -0.0998668800f, -0.0750940300f,
+                                          0.0626322600f,  0.1492558700f,  0.2018843600f,  0.1209845100f,  0.1463941500f,
+                                          0.0015017595f, -0.0142673820f, -0.0341725700f,  0.0127114680f,  0.0028300495f,
+                                         -0.0247584820f, -0.0509854800f, -0.0821182000f,  0.0142256720f,  0.0215441580f,
+                                          0.0894972500f,  0.0750526800f, -0.0020780868f,  0.0490825800f,  0.0647629500f,
+                                         -0.0229070630f,  0.0275624560f,  0.0401857350f,  0.0195675770f, -0.0155987390f,
+                                         -0.0490973030f, -0.0171218660f, -0.0833682340f, -0.0233200200f, -0.084095600f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions({20, 16});
+    float recurrentToInputWeightsValue[] = {
+        -0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f,        // 00
+        -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f,
+        -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f,
+        -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f,
+        0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f,        // 01
+        0.08981f, -0.045407712f, 0.08682226f, -0.06867011f,
+        -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f,
+        0.14283475f, -0.07390571f, -0.06402044f, 0.062524505f,
+        -0.093129106f, 0.04860203f, -0.08364217f, -0.08119002f,         // 02
+        0.009352075f, 0.22920375f, 0.0016303885f, 0.11583097f,
+        -0.13732095f, 0.012405723f, -0.07551853f, 0.06343048f,
+        0.12162708f, -0.031923793f, -0.014335606f, 0.01790974f,
+        -0.10650317f, -0.0724401f, 0.08554849f, -0.05727212f,           // 03
+        0.06556731f, -0.042729504f, -0.043227166f, 0.011683251f,
+        -0.013082158f, -0.029302018f, -0.010899579f, -0.062036745f,
+        -0.022509435f, -0.00964907f, -0.01567329f, 0.04260106f,
+        -0.07787477f, -0.11576462f, 0.017356863f, 0.048673786f,         // 04
+        -0.017577527f, -0.05527947f, -0.082487635f, -0.040137455f,
+        -0.10820036f, -0.04666372f, 0.022746278f, -0.07851417f,
+        0.01068115f, 0.032956902f, 0.022433773f, 0.0026891115f,
+        0.08944216f, -0.0685835f, 0.010513544f, 0.07228705f,            // 05
+        0.02032331f, -0.059686817f, -0.0005566496f, -0.086984694f,
+        0.040414046f, -0.1380399f, 0.094208956f, -0.05722982f,
+        0.012092817f, -0.04989123f, -0.086576f, -0.003399834f,
+        -0.04696032f, -0.045747425f, 0.10091314f, 0.048676282f,         // 06
+        -0.029037097f, 0.031399418f, -0.0040285117f, 0.047237843f,
+        0.09504992f, 0.041799378f, -0.049185462f, -0.031518843f,
+        -0.10516937f, 0.026374253f, 0.10058866f, -0.0033195973f,
+        -0.041975245f, 0.0073591834f, 0.0033782164f, -0.004325073f,     // 07
+        -0.10167381f, 0.042500053f, -0.01447153f, 0.06464186f,
+        -0.017142897f, 0.03312627f, 0.009205989f, 0.024138335f,
+        -0.011337001f, 0.035530265f, -0.010912711f, 0.0706555f,
+        -0.005894094f, 0.051841937f, -0.1401738f, -0.02351249f,         // 08
+        0.0365468f, 0.07590991f, 0.08838724f, 0.021681072f,
+        -0.10086113f, 0.019608743f, -0.06195883f, 0.077335775f,
+        0.023646897f, -0.095322326f, 0.02233014f, 0.09756986f,
+        -0.048691444f, -0.009579111f, 0.07595467f, 0.11480546f,         // 09
+        -0.09801813f, 0.019894179f, 0.08502348f, 0.004032281f,
+        0.037211012f, 0.068537936f, -0.048005626f, -0.091520436f,
+        -0.028379958f, -0.01556313f, 0.06554592f, -0.045599163f,
+        -0.01672207f, -0.020169014f, -0.011877351f, -0.20212261f,       // 10
+        0.010889619f, 0.0047078193f, 0.038385306f, 0.08540671f,
+        -0.017140968f, -0.0035865551f, 0.016678626f, 0.005633034f,
+        0.015963363f, 0.00871737f, 0.060130805f, 0.028611384f,
+        0.10109069f, -0.015060172f, -0.07894427f, 0.06401885f,          // 11
+        0.011584063f, -0.024466386f, 0.0047652307f, -0.09041358f,
+        0.030737216f, -0.0046374933f, 0.14215417f, -0.11823516f,
+        0.019899689f, 0.006106124f, -0.027092824f, 0.0786356f,
+        0.05052217f, -0.058925f, -0.011402121f, -0.024987547f,          // 12
+        -0.0013661642f, -0.06832946f, -0.015667673f, -0.1083353f,
+        -0.00096863037f, -0.06988685f, -0.053350925f, -0.027275559f,
+        -0.033664223f, -0.07978348f, -0.025200296f, -0.017207067f,
+        -0.058403496f, -0.055697463f, 0.005798788f, 0.12965427f,        // 13
+        -0.062582195f, 0.0013350133f, -0.10482091f, 0.0379771f,
+        0.072521195f, -0.0029455067f, -0.13797039f, -0.03628521f,
+        0.013806405f, -0.017858358f, -0.01008298f, -0.07700066f,
+        -0.017081132f, 0.019358726f, 0.0027079724f, 0.004635139f,       // 14
+        0.062634714f, -0.02338735f, -0.039547626f, -0.02050681f,
+        0.03385117f, -0.083611414f, 0.002862572f, -0.09421313f,
+        0.058618143f, -0.08598433f, 0.00972939f, 0.023867095f,
+        -0.053934585f, -0.023203006f, 0.07452513f, -0.048767887f,       // 15
+        -0.07314807f, -0.056307215f, -0.10433547f, -0.06440842f,
+        0.04328182f, 0.04389765f, -0.020006588f, -0.09076438f,
+        -0.11652589f, -0.021705797f, 0.03345259f, -0.010329105f,
+        -0.025767034f, 0.013057034f, -0.07316461f, -0.10145612f,        // 16
+        0.06358255f, 0.18531723f, 0.07759293f, 0.12006465f,
+        0.1305557f, 0.058638252f, -0.03393652f, 0.09622831f,
+        -0.16253184f, -2.4580743e-06f, 0.079869635f, -0.070196845f,
+        -0.005644518f, 0.06857898f, -0.12598175f, -0.035084512f,        // 17
+        0.03156317f, -0.12794146f, -0.031963028f, 0.04692781f,
+        0.030070418f, 0.0071660685f, -0.095516115f, -0.004643372f,
+        0.040170413f, -0.062104587f, -0.0037324072f, 0.0554317f,
+        0.08184801f, -0.019164372f, 0.06791302f, 0.034257166f,          // 18
+        -0.10307039f, 0.021943003f, 0.046745934f, 0.0790918f,
+        -0.0265588f, -0.007824208f, 0.042546265f, -0.00977924f,
+        -0.0002440307f, -0.017384544f, -0.017990116f, 0.12252321f,
+        -0.014512694f, -0.08251313f, 0.08861942f, 0.13589665f,          // 19
+        0.026351685f, 0.012641483f, 0.07466548f, 0.044301085f,
+        -0.045414884f, -0.051112458f, 0.03444247f, -0.08502782f,
+        -0.04106223f, -0.028126027f, 0.028473156f, 0.10467447f};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions({20, 16});
+    float recurrentToForgetWeightsValue[] = {
+        -0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f,     // 00
+        0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f,
+        -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f,
+        0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f,
+        0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f,       // 01
+        -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f,
+        -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f,
+        0.061878487f, -0.04729229f, 0.034919553f, -0.07585433f,
+        -0.04421272f, -0.044019096f, 0.085488975f, 0.04058006f,         // 02
+        -0.06890133f, -0.030951202f, -0.024628663f, -0.07672815f,
+        0.034293607f, 0.08556707f, -0.05293577f, -0.033561368f,
+        -0.04899627f, 0.0241671f, 0.015736353f, -0.095442444f,
+        -0.029564252f, 0.016493602f, -0.035026584f, 0.022337519f,       // 03
+        -0.026871363f, 0.004780428f, 0.0077918363f, -0.03601621f,
+        0.016435321f, -0.03263031f, -0.09543275f, -0.047392778f,
+        0.013454138f, 0.028934088f, 0.01685226f, -0.086110644f,
+        -0.046250615f, -0.01847454f, 0.047608484f, 0.07339695f,         // 04
+        0.034546845f, -0.04881143f, 0.009128804f, -0.08802852f,
+        0.03761666f, 0.008096139f, -0.014454086f, 0.014361001f,
+        -0.023502491f, -0.0011840804f, -0.07607001f, 0.001856849f,
+        -0.06509276f, -0.006021153f, -0.08570962f, -0.1451793f,         // 05
+        0.060212336f, 0.055259194f, 0.06974018f, 0.049454916f,
+        -0.027794661f, -0.08077226f, -0.016179763f, 0.1169753f,
+        0.17213494f, -0.0056326236f, -0.053934924f, -0.0124349f,
+        -0.11520337f, 0.05409887f, 0.088759385f, 0.0019655675f,         // 06
+        0.0042065294f, 0.03881498f, 0.019844765f, 0.041858196f,
+        -0.05695512f, 0.047233116f, 0.038937137f, -0.06542224f,
+        0.014429736f, -0.09719407f, 0.13908425f, -0.05379757f,
+        0.012321099f, 0.082840554f, -0.029899208f, 0.044217527f,        // 07
+        0.059855383f, 0.07711018f, -0.045319796f, 0.0948846f,
+        -0.011724666f, -0.0033288454f, -0.033542685f, -0.04764985f,
+        -0.13873616f, 0.040668588f, 0.034832682f, -0.015319203f,
+        -0.018715994f, 0.046002675f, 0.0599172f, -0.043107376f,         // 08
+        0.0294216f, -0.002314414f, -0.022424703f, 0.0030315618f,
+        0.0014641669f, 0.0029166266f, -0.11878115f, 0.013738511f,
+        0.12375372f, -0.0006038222f, 0.029104086f, 0.087442465f,
+        0.052958444f, 0.07558703f, 0.04817258f, 0.044462286f,           // 09
+        -0.015213451f, -0.08783778f, -0.0561384f, -0.003008196f,
+        0.047060397f, -0.002058388f, 0.03429439f, -0.018839769f,
+        0.024734668f, 0.024614193f, -0.042046934f, 0.09597743f,
+        -0.0043254104f, 0.04320769f, 0.0064070094f, -0.0019131786f,     // 10
+        -0.02558259f, -0.022822596f, -0.023273505f, -0.02464396f,
+        -0.10991725f, -0.006240552f, 0.0074488563f, 0.024044557f,
+        0.04383914f, -0.046476185f, 0.028658995f, 0.060410924f,
+        0.050786525f, 0.009452605f, -0.0073054377f, -0.024810238f,      // 11
+        0.0052906186f, 0.0066939713f, -0.0020913032f, 0.014515517f,
+        0.015898481f, 0.021362653f, -0.030262267f, 0.016587038f,
+        -0.011442813f, 0.041154444f, -0.007631438f, -0.03423484f,
+        -0.010977775f, 0.036152758f, 0.0066366293f, 0.11915515f,        // 12
+        0.02318443f, -0.041350313f, 0.021485701f, -0.10906167f,
+        -0.028218046f, -0.00954771f, 0.020531068f, -0.11995105f,
+        -0.03672871f, 0.024019798f, 0.014255957f, -0.05221243f,
+        -0.00661567f, -0.04630967f, 0.033188973f, 0.10107534f,          // 13
+        -0.014027541f, 0.030796422f, -0.10270911f, -0.035999842f,
+        0.15443139f, 0.07684145f, 0.036571592f, -0.035900835f,
+        -0.0034699554f, 0.06209149f, 0.015920248f, -0.031122351f,
+        -0.03858649f, 0.01849943f, 0.13872518f, 0.01503974f,            // 14
+        0.069941424f, -0.06948533f, -0.0088794185f, 0.061282158f,
+        -0.047401894f, 0.03100163f, -0.041533746f, -0.10430945f,
+        0.044574402f, -0.01425562f, -0.024290353f, 0.034563623f,
+        0.05866852f, 0.023947537f, -0.09445152f, 0.035450947f,          // 15
+        0.02247216f, -0.0042998926f, 0.061146557f, -0.10250651f,
+        0.020881841f, -0.06747029f, 0.10062043f, -0.0023941975f,
+        0.03532124f, -0.016341697f, 0.09685456f, -0.016764693f,
+        0.051808182f, 0.05875331f, -0.04536488f, 0.001626336f,          // 16
+        -0.028892258f, -0.01048663f, -0.009793449f, -0.017093895f,
+        0.010987891f, 0.02357273f, -0.00010856845f, 0.0099760275f,
+        -0.001845119f, -0.03551521f, 0.0018358806f, 0.05763657f,
+        -0.01769146f, 0.040995963f, 0.02235177f, -0.060430344f,         // 17
+        0.11475477f, -0.023854522f, 0.10071741f, 0.0686208f,
+        -0.014250481f, 0.034261297f, 0.047418304f, 0.08562733f,
+        -0.030519066f, 0.0060542435f, 0.014653856f, -0.038836084f,
+        0.04096551f, 0.032249358f, -0.08355519f, -0.026823482f,         // 18
+        0.056386515f, -0.010401743f, -0.028396193f, 0.08507674f,
+        0.014410365f, 0.020995233f, 0.17040324f, 0.11511526f,
+        0.02459721f, 0.0066619175f, 0.025853224f, -0.023133837f,
+        -0.081302024f, 0.017264642f, -0.009585969f, 0.09491168f,        // 19
+        -0.051313367f, 0.054532815f, -0.014298593f, 0.10657464f,
+        0.007076659f, 0.10964551f, 0.0409152f, 0.008275321f,
+        -0.07283536f, 0.07937492f, 0.04192024f, -0.1075027f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions({20, 16});
+    float recurrentToCellWeightsValue[] = {
+        -0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f,
+        0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f,
+        0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f,
+        -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f,
+        0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f,
+        0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f,
+        -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f,
+        -0.019443132f, -0.030755889f, -0.0040000007f, 0.04465846f,
+        -0.021585021f, 0.0031670958f, 0.0053199246f, -0.056117613f,
+        -0.10893326f, 0.076739706f, -0.08509834f, -0.027997585f,
+        0.037871376f, 0.01449768f, -0.09002357f, -0.06111149f,
+        -0.046195522f, 0.0422062f, -0.005683705f, -0.1253618f,
+        -0.012925729f, -0.04890792f, 0.06985068f, 0.037654128f,
+        0.03398274f, -0.004781977f, 0.007032333f, -0.031787455f,
+        0.010868644f, -0.031489216f, 0.09525667f, 0.013939797f,
+        0.0058680447f, 0.0167067f, 0.02668468f, -0.04797466f,
+        -0.048885044f, -0.12722108f, 0.035304096f, 0.06554885f,
+        0.00972396f, -0.039238118f, -0.05159735f, -0.11329045f,
+        0.1613692f, -0.03750952f, 0.06529313f, -0.071974665f,
+        -0.11769596f, 0.015524369f, -0.0013754242f, -0.12446318f,
+        0.02786344f, -0.014179351f, 0.005264273f, 0.14376344f,
+        0.015983658f, 0.03406988f, -0.06939408f, 0.040699873f,
+        0.02111075f, 0.09669095f, 0.041345075f, -0.08316494f,
+        -0.07684199f, -0.045768797f, 0.032298047f, -0.041805092f,
+        0.0119405f, 0.0061010392f, 0.12652606f, 0.0064572375f,
+        -0.024950314f, 0.11574242f, 0.04508852f, -0.04335324f,
+        0.06760663f, -0.027437469f, 0.07216407f, 0.06977076f,
+        -0.05438599f, 0.034033038f, -0.028602652f, 0.05346137f,
+        0.043184172f, -0.037189785f, 0.10420091f, 0.00882477f,
+        -0.054019816f, -0.074273005f, -0.030617684f, -0.0028467078f,
+        0.024302477f, -0.0038869337f, 0.005332455f, 0.0013399826f,
+        0.04361412f, -0.007001822f, 0.09631092f, -0.06702025f,
+        -0.042049985f, -0.035070654f, -0.04103342f, -0.10273396f,
+        0.0544271f, 0.037184782f, -0.13150354f, -0.0058036847f,
+        -0.008264958f, 0.042035464f, 0.05891794f, 0.029673764f,
+        0.0063542654f, 0.044788733f, 0.054816857f, 0.062257513f,
+        -0.00093483756f, 0.048938446f, -0.004952862f, -0.007730018f,
+        -0.04043371f, -0.017094059f, 0.07229206f, -0.023670016f,
+        -0.052195564f, -0.025616996f, -0.01520939f, 0.045104615f,
+        -0.007376126f, 0.003533447f, 0.006570588f, 0.056037236f,
+        0.12436656f, 0.051817212f, 0.028532185f, -0.08686856f,
+        0.11868599f, 0.07663395f, -0.07323171f, 0.03463402f,
+        -0.050708205f, -0.04458982f, -0.11590894f, 0.021273347f,
+        0.1251325f, -0.15313013f, -0.12224372f, 0.17228661f,
+        0.023029093f, 0.086124025f, 0.006445803f, -0.03496501f,
+        0.028332196f, 0.04449512f, -0.042436164f, -0.026587414f,
+        -0.006041347f, -0.09292539f, -0.05678812f, 0.03897832f,
+        0.09465633f, 0.008115513f, -0.02171956f, 0.08304309f,
+        0.071401566f, 0.019622514f, 0.032163795f, -0.004167056f,
+        0.02295182f, 0.030739572f, 0.056506045f, 0.004612461f,
+        0.06524936f, 0.059999723f, 0.046395954f, -0.0045512207f,
+        -0.1335546f, -0.030136576f, 0.11584653f, -0.014678886f,
+        0.0020118146f, -0.09688814f, -0.0790206f, 0.039770417f,
+        -0.0329582f, 0.07922767f, 0.029322514f, 0.026405897f,
+        0.04207835f, -0.07073373f, 0.063781224f, 0.0859677f,
+        -0.10925287f, -0.07011058f, 0.048005477f, 0.03438226f,
+        -0.09606514f, -0.006669445f, -0.043381985f, 0.04240257f,
+        -0.06955775f, -0.06769346f, 0.043903265f, -0.026784198f,
+        -0.017840602f, 0.024307009f, -0.040079936f, -0.019946516f,
+        0.045318738f, -0.12233574f, 0.026170589f, 0.0074471775f,
+        0.15978073f, 0.10185836f, 0.10298046f, -0.015476589f,
+        -0.039390966f, -0.072174534f, 0.0739445f, -0.1211869f,
+        -0.0347889f, -0.07943156f, 0.014809798f, -0.12412325f,
+        -0.0030663363f, 0.039695457f, 0.0647603f, -0.08291318f,
+        -0.018529687f, -0.004423833f, 0.0037507233f, 0.084633216f,
+        -0.01514876f, -0.056505352f, -0.012800942f, -0.06994386f,
+        0.012962922f, -0.031234352f, 0.07029052f, 0.016418684f,
+        0.03618972f, 0.055686004f, -0.08663945f, -0.017404709f,
+        -0.054761406f, 0.029065743f, 0.052404847f, 0.020238016f,
+        0.0048197987f, -0.0214882f, 0.07078733f, 0.013016777f,
+        0.06262858f, 0.009184685f, 0.020785125f, -0.043904778f,
+        -0.0270329f, -0.03299152f, -0.060088247f, -0.015162964f,
+        -0.001828936f, 0.12642565f, -0.056757294f, 0.013586685f,
+        0.09232601f, -0.035886683f, 0.06000002f, 0.05229691f,
+        -0.052580316f, -0.082029596f, -0.010794592f, 0.012947712f,
+        -0.036429964f, -0.085508935f, -0.13127148f, -0.017744139f,
+        0.031502828f, 0.036232427f, -0.031581745f, 0.023051167f,
+        -0.05325106f, -0.03421577f, 0.028793324f, -0.034633752f,
+        -0.009881397f, -0.043551125f, -0.018609839f, 0.0019097115f,
+        -0.008799762f, 0.056595087f, 0.0022273948f, 0.055752404f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions({20, 16});
+    float recurrentToOutputWeightsValue[] = {
+        0.025825322f, -0.05813119f, 0.09495884f, -0.045984812f,
+        -0.01255415f, -0.0026479573f, -0.08196161f, -0.054914974f,
+        -0.0046604523f, -0.029587349f, -0.044576716f, -0.07480124f,
+        -0.082868785f, 0.023254942f, 0.027502948f, -0.0039728214f,
+        -0.08683098f, -0.08116779f, -0.014675607f, -0.037924774f,
+        -0.023314456f, -0.007401714f, -0.09255757f, 0.029460307f,
+        -0.08829125f, -0.005139627f, -0.08989442f, -0.0555066f,
+        0.13596267f, -0.025062224f, -0.048351806f, -0.03850004f,
+        0.07266485f, -0.022414139f, 0.05940088f, 0.075114764f,
+        0.09597592f, -0.010211725f, -0.0049794707f, -0.011523867f,
+        -0.025980417f, 0.072999895f, 0.11091378f, -0.081685916f,
+        0.014416728f, 0.043229222f, 0.034178585f, -0.07530371f,
+        0.035837382f, -0.085607f, -0.007721233f, -0.03287832f,
+        -0.043848954f, -0.06404588f, -0.06632928f, -0.073643476f,
+        0.008214239f, -0.045984086f, 0.039764922f, 0.03474462f,
+        0.060612556f, -0.080590084f, 0.049127717f, 0.04151091f,
+        -0.030063879f, 0.008801774f, -0.023021035f, -0.019558564f,
+        0.05158114f, -0.010947698f, -0.011825728f, 0.0075720972f,
+        0.0699727f, -0.0039981045f, 0.069350146f, 0.08799282f,
+        0.016156472f, 0.035502106f, 0.11695009f, 0.006217345f,
+        0.13392477f, -0.037875112f, 0.025745004f, 0.08940699f,
+        -0.00924166f, 0.0046702605f, -0.036598757f, -0.08811812f,
+        0.10522024f, -0.032441203f, 0.008176899f, -0.04454919f,
+        0.07058152f, 0.0067963637f, 0.039206743f, 0.03259838f,
+        0.03725492f, -0.09515802f, 0.013326398f, -0.052055415f,
+        -0.025676316f, 0.03198509f, -0.015951829f, -0.058556724f,
+        0.036879618f, 0.043357447f, 0.028362012f, -0.05908629f,
+        0.0059240665f, -0.04995891f, -0.019187413f, 0.0276265f,
+        -0.01628143f, 0.0025863599f, 0.08800015f, 0.035250366f,
+        -0.022165963f, -0.07328642f, -0.009415526f, -0.07455109f,
+        0.11690406f, 0.0363299f, 0.07411125f, 0.042103454f,
+        -0.009660886f, 0.019076364f, 0.018299393f, -0.046004917f,
+        0.08891175f, 0.0431396f, -0.026327137f, -0.051502608f,
+        0.08979574f, -0.051670972f, 0.04940282f, -0.07491107f,
+        -0.021240504f, 0.022596184f, -0.034280192f, 0.060163025f,
+        -0.058211457f, -0.051837247f, -0.01349775f, -0.04639988f,
+        -0.035936575f, -0.011681591f, 0.064818054f, 0.0073146066f,
+        -0.021745546f, -0.043124277f, -0.06471268f, -0.07053354f,
+        -0.029321948f, -0.05330136f, 0.016933719f, -0.053782392f,
+        0.13747959f, -0.1361751f, -0.11569455f, 0.0033329215f,
+        0.05693899f, -0.053219706f, 0.063698f, 0.07977434f,
+        -0.07924483f, 0.06936997f, 0.0034815092f, -0.007305279f,
+        -0.037325785f, -0.07251102f, -0.033633437f, -0.08677009f,
+        0.091591336f, -0.14165086f, 0.021752775f, 0.019683983f,
+        0.0011612234f, -0.058154266f, 0.049996935f, 0.0288841f,
+        -0.0024567875f, -0.14345716f, 0.010955264f, -0.10234828f,
+        0.1183656f, -0.0010731248f, -0.023590032f, -0.072285876f,
+        -0.0724771f, -0.026382286f, -0.0014920527f, 0.042667855f,
+        0.0018776858f, 0.02986552f, 0.009814309f, 0.0733756f,
+        0.12289186f, 0.018043943f, -0.0458958f, 0.049412545f,
+        0.033632483f, 0.05495232f, 0.036686596f, -0.013781798f,
+        -0.010036754f, 0.02576849f, -0.08307328f, 0.010112348f,
+        0.042521734f, -0.05869831f, -0.071689695f, 0.03876447f,
+        -0.13275425f, -0.0352966f, -0.023077697f, 0.10285965f,
+        0.084736146f, 0.15568255f, -0.00040734606f, 0.027835453f,
+        -0.10292561f, -0.032401145f, 0.10053256f, -0.026142767f,
+        -0.08271222f, -0.0030240538f, -0.016368777f, 0.1070414f,
+        0.042672627f, 0.013456989f, -0.0437609f, -0.022309763f,
+        0.11576483f, 0.04108048f, 0.061026827f, -0.0190714f,
+        -0.0869359f, 0.037901703f, 0.0610107f, 0.07202949f,
+        0.01675338f, 0.086139716f, -0.08795751f, -0.014898893f,
+        -0.023771819f, -0.01965048f, 0.007955471f, -0.043740474f,
+        0.03346837f, -0.10549954f, 0.090567775f, 0.042013682f,
+        -0.03176985f, 0.12569028f, -0.02421228f, -0.029526481f,
+        0.023851605f, 0.031539805f, 0.05292009f, -0.02344001f,
+        -0.07811758f, -0.08834428f, 0.10094801f, 0.16594367f,
+        -0.06861939f, -0.021256343f, -0.041093912f, -0.06669611f,
+        0.035498552f, 0.021757556f, -0.09302526f, -0.015403468f,
+        -0.06614931f, -0.051798206f, -0.013874718f, 0.03630673f,
+        0.010412845f, -0.08077351f, 0.046185967f, 0.0035662893f,
+        0.03541868f, -0.094149634f, -0.034814864f, 0.003128424f,
+        -0.020674974f, -0.03944324f, -0.008110165f, -0.11113267f,
+        0.08484226f, 0.043586485f, 0.040582247f, 0.0968012f,
+        -0.065249965f, -0.028036479f, 0.0050708856f, 0.0017462453f,
+        0.0326779f, 0.041296225f, 0.09164146f, -0.047743853f,
+        -0.015952192f, -0.034451712f, 0.084197424f, -0.05347844f,
+        -0.11768019f, 0.085926116f, -0.08251791f, -0.045081906f,
+        0.0948852f, 0.068401024f, 0.024856757f, 0.06978981f,
+        -0.057309967f, -0.012775832f, -0.0032452994f, 0.01977615f,
+        -0.041040014f, -0.024264973f, 0.063464895f, 0.05431621f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions({20});
+    float cellToInputWeightsValue[] = {0.040369894f, 0.030746894f, 0.24704495f, 0.018586371f, -0.037586458f,
+                                       -0.15312155f, -0.11812848f, -0.11465643f, 0.20259799f, 0.11418174f,
+                                       -0.10116027f, -0.011334949f, 0.12411352f, -0.076769054f, -0.052169047f,
+                                       0.21198851f, -0.38871562f, -0.09061183f, -0.09683246f, -0.21929175f};
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions({20});
+    float cellToForgetWeightsValue[] = {-0.01998659f, -0.15568835f, -0.24248174f, -0.012770197f, 0.041331276f,
+                                        -0.072311886f, -0.052123554f, -0.0066330447f, -0.043891653f, 0.036225766f,
+                                        -0.047248036f, 0.021479502f, 0.033189066f, 0.11952997f, -0.020432774f,
+                                        0.64658105f, -0.06650122f, -0.03467612f, 0.095340036f, 0.23647355f};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions({20});
+    float cellToOutputWeightsValue[] = {0.08286371f, -0.08261836f, -0.51210177f, 0.002913762f, 0.17764764f,
+                                        -0.5495371f, -0.08460716f, -0.24552552f, 0.030037103f, 0.04123544f,
+                                        -0.11940523f, 0.007358328f, 0.1890978f, 0.4833202f, -0.34441817f,
+                                        0.36312827f, -0.26375428f, 0.1457655f, -0.19724406f, 0.15548733f};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions({20});
+    float inputGateBiasValue[] = {0.02234832f, 0.14757581f, 0.18176508f, 0.10380666f, 0.053110216f,
+                                  -0.06928846f, -0.13942584f, -0.11816189f, 0.19483899f, 0.03652339f,
+                                  -0.10250295f, 0.036714908f, -0.18426876f, 0.036065217f, 0.21810818f,
+                                  0.02383196f, -0.043370757f, 0.08690144f, -0.04444982f, 0.00030581196f};
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions({20});
+    float forgetGateBiasValue[] = {0.035185695f, -0.042891346f, -0.03032477f, 0.23027696f, 0.11098921f,
+                                   0.15378423f, 0.09263801f, 0.09790885f, 0.09508917f, 0.061199076f,
+                                   0.07665568f, -0.015443159f, -0.03499149f, 0.046190713f, 0.08895977f,
+                                   0.10899629f, 0.40694186f, 0.06030037f, 0.012413437f, -0.06108739f};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions({20});
+    float cellBiasValue[] = {-0.024379363f, 0.0055531194f, 0.23377132f, 0.033463873f, -0.1483596f,
+                             -0.10639995f, -0.091433935f, 0.058573797f, -0.06809782f, -0.07889636f,
+                             -0.043246906f, -0.09829136f, -0.4279842f, 0.034901652f, 0.18797937f,
+                             0.0075234566f, 0.016178843f, 0.1749513f, 0.13975595f, 0.92058027f};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions({20});
+    float outputGateBiasValue[] = {0.046159424f, -0.0012809046f, 0.03563469f, 0.12648113f, 0.027195795f,
+                                   0.35373217f, -0.018957434f, 0.008907322f, -0.0762701f, 0.12018895f,
+                                   0.04216877f, 0.0022856654f, 0.040952638f, 0.3147856f, 0.08225149f,
+                                   -0.057416286f, -0.14995944f, -0.008040261f, 0.13208859f, 0.029760877f};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions({16, 20});
+    float projectionWeightsValue[] = {
+        -0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f,
+        0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f,
+        -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f,
+        -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f,
+        0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f,
+        0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f,
+        0.08682067f, 0.17240396f, 0.014975425f, 0.056431185f, 0.031037588f,
+        0.16702051f, 0.0077946745f, 0.15140012f, 0.29405436f, 0.120285f,
+        -0.188994f, -0.027265169f, 0.043389652f, -0.022061434f, 0.014777949f,
+        -0.20203483f, 0.094781205f, 0.19100232f, 0.13987629f, -0.036132768f,
+        -0.06426278f, -0.05108664f, 0.13221376f, 0.009441198f, -0.16715929f,
+        0.15859416f, -0.040437475f, 0.050779544f, -0.022187516f, 0.012166504f,
+        0.027685808f, -0.07675938f, -0.0055694645f, -0.09444123f, 0.0046453946f,
+        0.050794356f, 0.10770313f, -0.20790008f, -0.07149004f, -0.11425117f,
+        0.008225835f, -0.035802525f, 0.14374903f, 0.15262283f, 0.048710253f,
+        0.1847461f, -0.007487823f, 0.11000021f, -0.09542012f, 0.22619456f,
+        -0.029149994f, 0.08527916f, 0.009043713f, 0.0042746216f, 0.016261552f,
+        0.022461696f, 0.12689082f, -0.043589946f, -0.12035478f, -0.08361797f,
+        -0.050666027f, -0.1248618f, -0.1275799f, -0.071875185f, 0.07377272f,
+        0.09944291f, -0.18897448f, -0.1593054f, -0.06526116f, -0.040107165f,
+        -0.004618631f, -0.067624845f, -0.007576253f, 0.10727444f, 0.041546922f,
+        -0.20424393f, 0.06907816f, 0.050412357f, 0.00724631f, 0.039827548f,
+        0.12449835f, 0.10747581f, 0.13708383f, 0.09134148f, -0.12617786f,
+        -0.06428341f, 0.09956831f, 0.1208086f, -0.14676677f, -0.0727722f,
+        0.1126304f, 0.010139365f, 0.015571211f, -0.038128063f, 0.022913318f,
+        -0.042050496f, 0.16842307f, -0.060597885f, 0.10531834f, -0.06411776f,
+        -0.07451711f, -0.03410368f, -0.13393489f, 0.06534304f, 0.003620307f,
+        0.04490757f, 0.05970546f, 0.05197996f, 0.02839995f, 0.10434969f,
+        -0.013699693f, -0.028353551f, -0.07260381f, 0.047201227f, -0.024575593f,
+        -0.036445823f, 0.07155557f, 0.009672501f, -0.02328883f, 0.009533515f,
+        -0.03606021f, -0.07421458f, -0.028082801f, -0.2678904f, -0.13221288f,
+        0.18419984f, -0.13012612f, -0.014588381f, -0.035059117f, -0.04824723f,
+        0.07830115f, -0.056184657f, 0.03277091f, 0.025466874f, 0.14494097f,
+        -0.12522776f, -0.098633975f, -0.10766018f, -0.08317623f, 0.08594209f,
+        0.07749552f, 0.039474737f, 0.1776665f, -0.07409566f, -0.0477268f,
+        0.29323658f, 0.10801441f, 0.1154011f, 0.013952499f, 0.10739139f,
+        0.10708251f, -0.051456142f, 0.0074137426f, -0.10430189f, 0.10034707f,
+        0.045594677f, 0.0635285f, -0.0715442f, -0.089667566f, -0.10811871f,
+        0.00026344223f, 0.08298446f, -0.009525053f, 0.006585689f, -0.24567553f,
+        -0.09450807f, 0.09648481f, 0.026996298f, -0.06419476f, -0.04752702f,
+        -0.11063944f, -0.23441927f, -0.17608605f, -0.052156363f, 0.067035615f,
+        0.19271925f, -0.0032889997f, -0.043264326f, 0.09663576f, -0.057112187f,
+        -0.10100678f, 0.0628376f, 0.04447668f, 0.017961001f, -0.10094388f,
+        -0.10190601f, 0.18335468f, 0.10494553f, -0.052095775f, -0.0026118709f,
+        0.10539724f, -0.04383912f, -0.042349473f, 0.08438151f, -0.1947263f,
+        0.02251204f, 0.11216432f, -0.10307853f, 0.17351969f, -0.039091777f,
+        0.08066188f, -0.00561982f, 0.12633002f, 0.11335965f, -0.0088127935f,
+        -0.019777594f, 0.06864014f, -0.059751723f, 0.016233567f, -0.06894641f,
+        -0.28651384f, -0.004228674f, 0.019708522f, -0.16305895f, -0.07468996f,
+        -0.0855457f, 0.099339016f, -0.07580735f, -0.13775392f, 0.08434318f,
+        0.08330512f, -0.12131499f, 0.031935584f, 0.09180414f, -0.08876437f,
+        -0.08049874f, 0.008753825f, 0.03498998f, 0.030215185f, 0.03907079f,
+        0.089751154f, 0.029194152f, -0.03337423f, -0.019092513f, 0.04331237f,
+        0.04299654f, -0.036394123f, -0.12915532f, 0.09793732f, 0.07512415f,
+        -0.11319543f, -0.032502122f, 0.15661901f, 0.07671967f, -0.005491124f,
+        -0.19379048f, -0.218606f, 0.21448623f, 0.017840758f, 0.1416943f,
+        -0.07051762f, 0.19488361f, 0.02664691f, -0.18104725f, -0.09334311f,
+        0.15026465f, -0.15493552f, -0.057762887f, -0.11604192f, -0.262013f,
+        -0.01391798f, 0.012185008f, 0.11156489f, -0.07483202f, 0.06693364f,
+        -0.26151478f, 0.046425626f, 0.036540434f, -0.16435726f, 0.17338543f,
+        -0.21401681f, -0.11385144f, -0.08283257f, -0.069031075f, 0.030635102f,
+        0.010969227f, 0.11109743f, 0.010919218f, 0.027526086f, 0.13519906f,
+        0.01891392f, -0.046839405f, -0.040167913f, 0.017953383f, -0.09700955f,
+        0.0061885654f, -0.07000971f, 0.026893595f, -0.038844477f, 0.14543656f};
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions({0});
+    float projectionBiasValue[] = {};
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions({2, 16});
+    std::vector<float> outputStateInValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions({2, 20});
+    std::vector<float> cellStateInValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    // constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t> activationFunctionDimensions({});
+    int32_t activationFunctionValue[] = {4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> cellClippingThresholdDimensions({});
+    float cellClippingThresholdValue[] = {0.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> projectionClippingThresholdDimensions({});
+    float projectionClippingThresholdValue[] = {0.0f};
+
+    // Outputs:
+    //  0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with
+    //     CIFG, or [batch_size, num_units * 3] without CIFG.
+    hidl_vec<uint32_t> scratchBufferDimensions({2, 60});
+    std::vector<float> scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    //  1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateOutDimensions({2, 16});
+    std::vector<float> outputStateOutValue {
+        -0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835577f, -0.0211779f, 0.0283512f, -0.0114597f,
+        0.00907307f, -0.0244004f, -0.0152191f, -0.0259063f, 0.00914318f, 0.00415119f, 0.017147f, 0.0134203f,
+        -0.013869f, 0.0287268f, -0.00334694f, 0.00733397f, -0.0287926f, -0.0186926f, 0.0193662f, -0.0115437f,
+        0.00422612f, -0.0345232f, 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, 0.0216801f};
+    //  2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateOutDimensions({2, 20});
+    std::vector<float> cellStateOutValue {
+        -0.0531632f, -0.0118138f, 0.0870833f, 0.0347929f, -0.076144f,
+        -0.0659219f, -0.0463811f, 0.0141307f, -0.0127706f, -0.03782f,
+        -0.00402401f, -0.00571876f, -0.187957f, -0.0247127f, 0.0711425f,
+        0.008244f, 0.0492649f, 0.126972f, 0.0933097f, 0.29848f,
+        -0.0966178f, -0.114417f, 0.0387229f, 0.0453255f, -0.181286f,
+        -0.0651251f, -0.0996879f, -0.00276995f, 0.0617558f, -0.0100728f,
+        0.056304f, -0.077416f, -0.162858f, -0.0541251f, 0.0571202f,
+        -0.0525331f, 0.0724297f, 0.171029f, 0.141738f, 0.295483f};
+    //  3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is
+    //     effectively the same as the current “output state (out)” value.
+    hidl_vec<uint32_t> outputDimensions({2, 16});
+    std::vector<float> outputValue {
+        -0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835576f, -0.0211779f, 0.0283512f, -0.0114597f,
+        0.00907307f, -0.0244004f, -0.0152191f, -0.0259063f, 0.00914318f, 0.00415118f, 0.017147f, 0.0134203f,
+        -0.013869f, 0.0287268f, -0.00334693f, 0.00733398f, -0.0287926f, -0.0186926f, 0.0193662f, -0.0115437f,
+        0.00422612f, -0.0345232f, 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, 0.02168f};
+
+    LstmTestImpl(inputDimensions,                       inputValue,
+                 inputToInputWeightsDimensions,         inputToInputWeightsValue,
+                 inputToForgetWeightsDimensions,        inputToForgetWeightsValue,
+                 inputToCellWeightsDimensions,          inputToCellWeightsValue,
+                 inputToOutputWeightsDimensions,        inputToOutputWeightsValue,
+                 recurrentToInputWeightsDimensions,     recurrentToInputWeightsValue,
+                 recurrentToForgetWeightsDimensions,    recurrentToForgetWeightsValue,
+                 recurrentToCellWeightsDimensions,      recurrentToCellWeightsValue,
+                 recurrentToOutputWeightsDimensions,    recurrentToOutputWeightsValue,
+                 cellToInputWeightsDimensions,          cellToInputWeightsValue,
+                 cellToForgetWeightsDimensions,         cellToForgetWeightsValue,
+                 cellToOutputWeightsDimensions,         cellToOutputWeightsValue,
+                 inputGateBiasDimensions,               inputGateBiasValue,
+                 forgetGateBiasDimensions,              forgetGateBiasValue,
+                 cellBiasDimensions,                    cellBiasValue,
+                 outputGateBiasDimensions,              outputGateBiasValue,
+                 projectionWeightsDimensions,           projectionWeightsValue,
+                 projectionBiasDimensions,              projectionBiasValue,
+                 outputStateInDimensions,               outputStateInValue,
+                 cellStateInDimensions,                 cellStateInValue,
+                 activationFunctionDimensions,          activationFunctionValue,
+                 cellClippingThresholdDimensions,       cellClippingThresholdValue,
+                 projectionClippingThresholdDimensions, projectionClippingThresholdValue,
+                 scratchBufferDimensions,               scratchBufferValue,
+                 outputStateOutDimensions,              outputStateOutValue,
+                 cellStateOutDimensions,                cellStateOutValue,
+                 outputDimensions,                      outputValue);
+}
+
+BOOST_AUTO_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2)
+{
+    // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm2.model.cpp
+    // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm2.example.cpp
+    // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors).
+    // The batch size has been increased to 2 (it was 1 in the VTS test) with appropriate input and output values added.
+
+    uint32_t batchSize = 2;
+    uint32_t inputSize = 2;
+    uint32_t numUnits = 4;
+    uint32_t outputSize = numUnits;
+
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions({batchSize, inputSize});
+    std::vector<float> inputValue {2.0f, 3.0f, 3.0f, 4.0f};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions({0});
+    float inputToInputWeightsValue[] = {};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions({numUnits, inputSize});
+    float inputToForgetWeightsValue[] = {-0.55291498f, -0.42866567f,
+                                          0.13056988f, -0.36333650f,
+                                         -0.22755712f,  0.28253698f,
+                                          0.24407166f,  0.33826375f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions({numUnits, inputSize});
+    float inputToCellWeightsValue[] = {-0.49770179f, -0.27711356f,
+                                       -0.09624726f,  0.05100781f,
+                                        0.04717243f,  0.48944736f,
+                                       -0.38535351f, -0.17212132f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions({numUnits, inputSize});
+    float inputToOutputWeightsValue[] = { 0.10725588f, -0.02335852f,
+                                         -0.55932593f, -0.09426838f,
+                                         -0.44257352f,  0.54939759f,
+                                          0.01533556f,  0.42751634f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions({0});       // VTS was {4, 4} -> {0} ?
+    float recurrentToInputWeightsValue[] = {};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions({numUnits, outputSize});
+    float recurrentToForgetWeightsValue[] = {-0.13832897f, -0.05151010f, -0.23590070f, -0.16661474f,
+                                             -0.14340827f,  0.36986142f,  0.23414481f,  0.55899000f,
+                                              0.10798943f, -0.41174671f,  0.17751795f, -0.34484994f,
+                                             -0.35874045f, -0.11352962f,  0.27268326f,  0.54058349f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions({numUnits, outputSize});
+    float recurrentToCellWeightsValue[] = { 0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f,
+                                            0.42957711f,  0.01841056f, -0.32764608f, -0.33027974f,
+                                           -0.10826075f,  0.20675004f,  0.19069612f, -0.03026325f,
+                                           -0.54532051f,  0.33003211f,  0.44901288f,  0.21193194f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions({numUnits, outputSize});
+    float recurrentToOutputWeightsValue[] = { 0.41613156f,  0.42610586f, -0.16495961f, -0.56638730f,
+                                              0.30579174f, -0.05115908f, -0.33941799f,  0.23364776f,
+                                              0.11178309f,  0.09481031f, -0.26424935f,  0.46261835f,
+                                              0.50248802f,  0.26114327f, -0.43736315f,  0.33149987f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions({0});
+    float cellToInputWeightsValue[] = {};
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions({numUnits});
+    float cellToForgetWeightsValue[] = {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions({numUnits});
+    float cellToOutputWeightsValue[] = {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions({0});                    // VTS was {4} -> {0} ?
+    float inputGateBiasValue[] = {};
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions({4});
+    float forgetGateBiasValue[] = {1.0f, 1.0f, 1.0f, 1.0f};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions({numUnits});
+    float cellBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions({numUnits});
+    float outputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions({0});
+    float projectionWeightsValue[] = {};
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions({0});
+    float projectionBiasValue[] = {};
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions({batchSize, outputSize});
+    std::vector<float> outputStateInValue {0, 0, 0, 0, 0, 0, 0, 0};
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions({batchSize, numUnits});
+    std::vector<float> cellStateInValue {0, 0, 0, 0, 0, 0, 0, 0};
+
+    // constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t> activationFunctionDimensions({});
+    int32_t activationFunctionValue[] = {4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> cellClippingThresholdDimensions({});
+    float cellClippingThresholdValue[] = {0.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t> projectionClippingThresholdDimensions({});
+    float projectionClippingThresholdValue[] = {0.0f};
+
+    // Outputs:
+    //  0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with
+    //     CIFG, or [batch_size, num_units * 3] without CIFG.
+    hidl_vec<uint32_t> scratchBufferDimensions({batchSize, numUnits * 4});
+    std::vector<float> scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    //  1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateOutDimensions({batchSize, outputSize});
+    std::vector<float> outputStateOutValue {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
+                                            -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f};
+    //  2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateOutDimensions({batchSize, numUnits});
+    std::vector<float> cellStateOutValue {-0.76044439f, -0.01804161f, 0.18226376f, -0.06493707f,
+                                          -0.90477051f, -0.04355603f, 0.18475688f, -0.04158677f};
+    //  3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is
+    //     effectively the same as the current “output state (out)” value.
+    hidl_vec<uint32_t> outputDimensions({batchSize, outputSize});
+    std::vector<float> outputValue {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
+                                    -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f};
+
+    LstmTestImpl(inputDimensions,                       inputValue,
+                 inputToInputWeightsDimensions,         inputToInputWeightsValue,
+                 inputToForgetWeightsDimensions,        inputToForgetWeightsValue,
+                 inputToCellWeightsDimensions,          inputToCellWeightsValue,
+                 inputToOutputWeightsDimensions,        inputToOutputWeightsValue,
+                 recurrentToInputWeightsDimensions,     recurrentToInputWeightsValue,
+                 recurrentToForgetWeightsDimensions,    recurrentToForgetWeightsValue,
+                 recurrentToCellWeightsDimensions,      recurrentToCellWeightsValue,
+                 recurrentToOutputWeightsDimensions,    recurrentToOutputWeightsValue,
+                 cellToInputWeightsDimensions,          cellToInputWeightsValue,
+                 cellToForgetWeightsDimensions,         cellToForgetWeightsValue,
+                 cellToOutputWeightsDimensions,         cellToOutputWeightsValue,
+                 inputGateBiasDimensions,               inputGateBiasValue,
+                 forgetGateBiasDimensions,              forgetGateBiasValue,
+                 cellBiasDimensions,                    cellBiasValue,
+                 outputGateBiasDimensions,              outputGateBiasValue,
+                 projectionWeightsDimensions,           projectionWeightsValue,
+                 projectionBiasDimensions,              projectionBiasValue,
+                 outputStateInDimensions,               outputStateInValue,
+                 cellStateInDimensions,                 cellStateInValue,
+                 activationFunctionDimensions,          activationFunctionValue,
+                 cellClippingThresholdDimensions,       cellClippingThresholdValue,
+                 projectionClippingThresholdDimensions, projectionClippingThresholdValue,
+                 scratchBufferDimensions,               scratchBufferValue,
+                 outputStateOutDimensions,              outputStateOutValue,
+                 cellStateOutDimensions,                cellStateOutValue,
+                 outputDimensions,                      outputValue);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Merger.cpp b/test/Merger.cpp
index 48253604..aeaff0cb 100644
--- a/test/Merger.cpp
+++ b/test/Merger.cpp
@@ -4,28 +4,33 @@
 //
 #include "DriverTestHelpers.hpp"
 #include "TestTensor.hpp"
+#include <boost/array.hpp>
 #include <boost/test/unit_test.hpp>
+#include <boost/test/data/test_case.hpp>
 #include <log/log.h>
 
 
 BOOST_AUTO_TEST_SUITE(MergerTests)
 
-using ArmnnDriver = armnn_driver::ArmnnDriver;
-using DriverOptions = armnn_driver::DriverOptions;
+using namespace android::hardware;
 using namespace driverTestHelpers;
+using namespace armnn_driver;
 
 namespace
 {
 
+static const boost::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
+
 void
 MergerTestImpl(const std::vector<const TestTensor*> & inputs,
                 int32_t concatAxis,
                 const TestTensor & expectedOutputTensor,
+                armnn::Compute computeDevice,
                 ErrorStatus expectedPrepareStatus=ErrorStatus::NONE,
                 ErrorStatus expectedExecStatus=ErrorStatus::NONE)
 {
-    std::unique_ptr<ArmnnDriver> driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    V1_0::Model model{};
+    std::unique_ptr<ArmnnDriver> driver = std::make_unique<ArmnnDriver>(DriverOptions(computeDevice));
+    neuralnetworks::V1_0::Model model{};
 
     hidl_vec<uint32_t> modelInputIds;
     modelInputIds.resize(inputs.size()+1);
@@ -40,7 +45,7 @@ MergerTestImpl(const std::vector<const TestTensor*> & inputs,
 
     // make the concat operation
     model.operations.resize(1);
-    model.operations[0].type = V1_0::OperationType::CONCATENATION;
+    model.operations[0].type = neuralnetworks::V1_0::OperationType::CONCATENATION;
     model.operations[0].inputs  = modelInputIds;
     model.operations[0].outputs = hidl_vec<uint32_t>{static_cast<uint32_t>(inputs.size()+1)};
 
@@ -130,7 +135,8 @@ MergerTestImpl(const std::vector<const TestTensor*> & inputs,
 
 } // namespace <anonymous>
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis0)
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxis0, COMPUTE_DEVICES)
 {
     int32_t axis = 0;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
@@ -139,10 +145,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis0)
 
     TestTensor expected{armnn::TensorShape{3,1,1,1},{0,1,2}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(ConcatAxis0_NoInterleave)
+BOOST_DATA_TEST_CASE(ConcatAxis0_NoInterleave, COMPUTE_DEVICES)
 {
     int32_t axis = 0;
     TestTensor aIn{armnn::TensorShape{2,1,2,1},{0,  1,
@@ -159,10 +165,10 @@ BOOST_AUTO_TEST_CASE(ConcatAxis0_NoInterleave)
                                                      8,  9,
                                                      10, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis1)
+BOOST_DATA_TEST_CASE(SimpleConcatAxis1, COMPUTE_DEVICES)
 {
     int32_t axis = 1;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
@@ -171,10 +177,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis1)
 
     TestTensor expected{armnn::TensorShape{1,3,1,1},{0,1,2}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(ConcatAxis1_NoInterleave)
+BOOST_DATA_TEST_CASE(ConcatAxis1_NoInterleave, COMPUTE_DEVICES)
 {
     int32_t axis = 1;
     TestTensor aIn{armnn::TensorShape{1,2,2,1},{0,  1,
@@ -191,10 +197,10 @@ BOOST_AUTO_TEST_CASE(ConcatAxis1_NoInterleave)
                                                      8,  9,
                                                      10, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis1_DoInterleave)
+BOOST_DATA_TEST_CASE(SimpleConcatAxis1_DoInterleave, COMPUTE_DEVICES)
 {
     int32_t axis = 1;
     TestTensor aIn{armnn::TensorShape{2,2,1,1},{0,  1,
@@ -207,10 +213,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis1_DoInterleave)
     TestTensor expected{armnn::TensorShape{2,6,1,1},{0, 1, 4, 5, 6, 10,
                                                      2, 3, 7, 8, 9, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis2)
+BOOST_DATA_TEST_CASE(SimpleConcatAxis2, COMPUTE_DEVICES)
 {
     int32_t axis = 2;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
@@ -219,10 +225,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis2)
 
     TestTensor expected{armnn::TensorShape{1,1,3,1},{0,1,2}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(ConcatAxis2_NoInterleave)
+BOOST_DATA_TEST_CASE(ConcatAxis2_NoInterleave, COMPUTE_DEVICES)
 {
     int32_t axis = 2;
     TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
@@ -239,10 +245,10 @@ BOOST_AUTO_TEST_CASE(ConcatAxis2_NoInterleave)
                                                      8,  9,
                                                      10, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis2_DoInterleave)
+BOOST_DATA_TEST_CASE(SimpleConcatAxis2_DoInterleave, COMPUTE_DEVICES)
 {
     int32_t axis = 2;
     TestTensor aIn{armnn::TensorShape{1,2,2,1},{0,  1,
@@ -255,10 +261,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis2_DoInterleave)
     TestTensor expected{armnn::TensorShape{1,2,6,1},{0, 1, 4, 5, 6, 10,
                                                      2, 3, 7, 8, 9, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis3)
+BOOST_DATA_TEST_CASE(SimpleConcatAxis3, COMPUTE_DEVICES)
 {
     int32_t axis = 3;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
@@ -267,10 +273,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis3)
 
     TestTensor expected{armnn::TensorShape{1,1,1,3},{0,1,2}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(SimpleConcatAxis3_DoInterleave)
+BOOST_DATA_TEST_CASE(SimpleConcatAxis3_DoInterleave, COMPUTE_DEVICES)
 {
     int32_t axis = 3;
     TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
@@ -283,10 +289,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis3_DoInterleave)
     TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
                                                      2, 3, 7, 8, 9, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
-BOOST_AUTO_TEST_CASE(AxisTooBig)
+BOOST_DATA_TEST_CASE(AxisTooBig, COMPUTE_DEVICES)
 {
     int32_t axis = 4;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
@@ -296,10 +302,10 @@ BOOST_AUTO_TEST_CASE(AxisTooBig)
     // see: https://www.tensorflow.org/api_docs/python/tf/concat
     TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, expectedParserStatus);
+    MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(AxisTooSmall)
+BOOST_DATA_TEST_CASE(AxisTooSmall, COMPUTE_DEVICES)
 {
     int32_t axis = -5;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
@@ -309,20 +315,20 @@ BOOST_AUTO_TEST_CASE(AxisTooSmall)
     // see: https://www.tensorflow.org/api_docs/python/tf/concat
     TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, expectedParserStatus);
+    MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(TooFewInputs)
+BOOST_DATA_TEST_CASE(TooFewInputs, COMPUTE_DEVICES)
 {
     int32_t axis = 0;
     TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
 
     // We need at least two tensors to concatenate
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn}, axis, aIn, expectedParserStatus);
+    MergerTestImpl({&aIn}, axis, aIn, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(MismatchedInputDimensions)
+BOOST_DATA_TEST_CASE(MismatchedInputDimensions, COMPUTE_DEVICES)
 {
     int32_t axis = 3;
     TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
@@ -336,10 +342,10 @@ BOOST_AUTO_TEST_CASE(MismatchedInputDimensions)
 
     // The input dimensions must be compatible
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn, &bIn, &mismatched}, axis, expected, expectedParserStatus);
+    MergerTestImpl({&aIn, &bIn, &mismatched}, axis, expected, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(MismatchedInputRanks)
+BOOST_DATA_TEST_CASE(MismatchedInputRanks, COMPUTE_DEVICES)
 {
     int32_t axis = 2;
     TestTensor aIn{armnn::TensorShape{1,1,2},{0,1}};
@@ -348,10 +354,10 @@ BOOST_AUTO_TEST_CASE(MismatchedInputRanks)
 
     // The input dimensions must be compatible
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn, &bIn}, axis, expected, expectedParserStatus);
+    MergerTestImpl({&aIn, &bIn}, axis, expected, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(MismatchedOutputDimensions)
+BOOST_DATA_TEST_CASE(MismatchedOutputDimensions, COMPUTE_DEVICES)
 {
     int32_t axis = 3;
     TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
@@ -366,10 +372,10 @@ BOOST_AUTO_TEST_CASE(MismatchedOutputDimensions)
 
     // The input and output dimensions must be compatible
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, expectedParserStatus);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(MismatchedOutputRank)
+BOOST_DATA_TEST_CASE(MismatchedOutputRank, COMPUTE_DEVICES)
 {
     int32_t axis = 3;
     TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
@@ -384,10 +390,10 @@ BOOST_AUTO_TEST_CASE(MismatchedOutputRank)
 
     // The input and output ranks must match
     ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, expectedParserStatus);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus);
 }
 
-BOOST_AUTO_TEST_CASE(ValidNegativeAxis)
+BOOST_DATA_TEST_CASE(ValidNegativeAxis, COMPUTE_DEVICES)
 {
     // this is the same as 3
     // see: https://www.tensorflow.org/api_docs/python/tf/concat
@@ -402,7 +408,79 @@ BOOST_AUTO_TEST_CASE(ValidNegativeAxis)
     TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
                                                      2, 3, 7, 8, 9, 11}};
 
-    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+}
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxisZero3D, COMPUTE_DEVICES)
+{
+    int32_t axis = 0;
+    TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{3,1,1},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+}
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxisOne3D, COMPUTE_DEVICES)
+{
+    int32_t axis = 1;
+    TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{1,3,1},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+}
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxisTwo3D, COMPUTE_DEVICES)
+{
+    int32_t axis = 2;
+    TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{1,1,3},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+}
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxisZero2D, COMPUTE_DEVICES)
+{
+    int32_t axis = 0;
+    TestTensor aIn{armnn::TensorShape{1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{3,1},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+}
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxisOne2D, COMPUTE_DEVICES)
+{
+    int32_t axis = 1;
+    TestTensor aIn{armnn::TensorShape{1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{1,3},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+}
+
+BOOST_DATA_TEST_CASE(SimpleConcatAxisZero1D, COMPUTE_DEVICES)
+{
+    int32_t axis = 0;
+    TestTensor aIn{armnn::TensorShape{1},{0}};
+    TestTensor bIn{armnn::TensorShape{1},{1}};
+    TestTensor cIn{armnn::TensorShape{1},{2}};
+
+    TestTensor expected{armnn::TensorShape{3},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Tests.cpp b/test/Tests.cpp
index 3fa8e125..df98b2ca 100644
--- a/test/Tests.cpp
+++ b/test/Tests.cpp
@@ -11,9 +11,9 @@
 
 BOOST_AUTO_TEST_SUITE(DriverTests)
 
-using ArmnnDriver = armnn_driver::ArmnnDriver;
-using DriverOptions = armnn_driver::DriverOptions;
+using namespace android::hardware;
 using namespace driverTestHelpers;
+using namespace armnn_driver;
 
 BOOST_AUTO_TEST_CASE(Init)
 {
@@ -31,9 +31,9 @@ BOOST_AUTO_TEST_CASE(TestCapabilities)
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
     ErrorStatus error;
-    V1_0::Capabilities cap;
+    neuralnetworks::V1_0::Capabilities cap;
 
-    ArmnnDriver::getCapabilities_cb cb = [&](ErrorStatus status, const V1_0::Capabilities& capabilities)
+    auto cb = [&](ErrorStatus status, const neuralnetworks::V1_0::Capabilities& capabilities)
     {
         error = status;
         cap = capabilities;
diff --git a/test/UtilsTests.cpp b/test/UtilsTests.cpp
index e7e6cde7..72b6d361 100644
--- a/test/UtilsTests.cpp
+++ b/test/UtilsTests.cpp
@@ -16,9 +16,10 @@
 
 BOOST_AUTO_TEST_SUITE(UtilsTests)
 
-using namespace armnn_driver;
-using namespace android::nn;
 using namespace android;
+using namespace android::nn;
+using namespace android::hardware;
+using namespace armnn_driver;
 
 // The following are helpers for writing unit tests for the driver.
 namespace
@@ -28,9 +29,9 @@ struct ExportNetworkGraphFixture
 {
 public:
     // Setup: set the output dump directory and an empty dummy model (as only its memory address is used).
-    // Defaulting the output dump directory to "/sdcard" because it should exist and be writable in all deployments.
+    // Defaulting the output dump directory to "/data" because it should exist and be writable in all deployments.
     ExportNetworkGraphFixture()
-        : ExportNetworkGraphFixture("/sdcard")
+        : ExportNetworkGraphFixture("/data")
     {}
     ExportNetworkGraphFixture(const std::string& requestInputsAndOutputsDumpDir)
         : m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
@@ -95,7 +96,7 @@ public:
     }
 
     std::string m_RequestInputsAndOutputsDumpDir;
-    V1_0::Model m_Model;
+    neuralnetworks::V1_0::Model m_Model;
 
 private:
     std::string m_FileName;
-- 
cgit v1.2.1