From 0a2dfabd76a45c58d0a14567f0503369c4e6fbf3 Mon Sep 17 00:00:00 2001
From: Sadik Armagan <sadik.armagan@arm.com>
Date: Wed, 6 Oct 2021 16:41:44 +0100
Subject: IVGCVSW-5636 'Implement NNAPI caching functions'

* Cached serialized ArmNN model.

!armnn:6384

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Signed-off-by: Kevin May <kevin.may@arm.com>
Change-Id: I78120a7f8ea892a28c0ff25f1b54e67a4f912574
---
 1.3/ArmnnDriver.hpp     |  86 +++++++----
 1.3/ArmnnDriverImpl.cpp | 391 ++++++++++++++++++++++++++++++++++++++++++++++--
 1.3/ArmnnDriverImpl.hpp |  29 +++-
 3 files changed, 453 insertions(+), 53 deletions(-)

(limited to '1.3')
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp
index fd4aa74c..6d2e0b7a 100644
--- a/1.3/ArmnnDriver.hpp
+++ b/1.3/ArmnnDriver.hpp
@@ -21,6 +21,8 @@
 #include "../1.0/ArmnnDriverImpl.hpp"
 #include "../1.0/HalPolicy.hpp"
 
+#include <armnn/BackendHelper.hpp>
+
 #include <log/log.h>
 
 namespace armnn_driver
@@ -31,6 +33,7 @@ namespace hal_1_3
 class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice
 {
 public:
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
     ArmnnDriver(DriverOptions options)
         : ArmnnDevice(std::move(options))
@@ -39,9 +42,7 @@ public:
     }
     ~ArmnnDriver() {}
 
-    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
-public:
     Return<void> getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override
     {
         ALOGV("hal_1_3::ArmnnDriver::getCapabilities()");
@@ -131,10 +132,13 @@ public:
                                                                                          cb);
     }
 
-    Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
-            const android::sp<V1_2::IPreparedModelCallback>& cb)
+    Return<V1_0::ErrorStatus> prepareModel_1_2(
+        const V1_2::Model& model,
+        V1_1::ExecutionPreference preference,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()");
 
@@ -151,6 +155,9 @@ public:
                                                                m_ClTunedParameters,
                                                                m_Options,
                                                                model,
+                                                               modelCacheHandle,
+                                                               dataCacheHandle,
+                                                               token,
                                                                cb,
                                                                model.relaxComputationFloat32toFloat16
                                                                && m_Options.GetFp16Enabled());
@@ -174,14 +181,15 @@ public:
                                                                                          cb);
     }
 
-    Return<V1_3::ErrorStatus> prepareModel_1_3(const V1_3::Model& model,
-                                               V1_1::ExecutionPreference preference,
-                                               V1_3::Priority priority,
-                                               const V1_3::OptionalTimePoint&,
-                                               const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-                                               const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-                                               const HidlToken&,
-                                               const android::sp<V1_3::IPreparedModelCallback>& cb)
+    Return<V1_3::ErrorStatus> prepareModel_1_3(
+        const V1_3::Model& model,
+        V1_1::ExecutionPreference preference,
+        V1_3::Priority priority,
+        const V1_3::OptionalTimePoint&,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCache,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCache,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()");
 
@@ -199,11 +207,13 @@ public:
             return V1_3::ErrorStatus::INVALID_ARGUMENT;
         }
 
-
         return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime,
                                                       m_ClTunedParameters,
                                                       m_Options,
                                                       model,
+                                                      modelCache,
+                                                      dataCache,
+                                                      token,
                                                       cb,
                                                       model.relaxComputationFloat32toFloat16
                                                       && m_Options.GetFp16Enabled(),
@@ -219,10 +229,13 @@ public:
 
     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
     {
-        ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()");
-
-        // Set both numbers to be 0 for cache not supported.
-        cb(V1_0::ErrorStatus::NONE, 0, 0);
+        ALOGV("hal_1_3::ArmnnDriver::getNumberOfCacheFilesNeeded()");
+        unsigned int numberOfCachedModelFiles = 0;
+        for (auto& backend : m_Options.GetBackends())
+        {
+            numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+        }
+        cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles,   1ul);
         return Void();
     }
 
@@ -250,26 +263,35 @@ public:
     }
 
     Return<V1_0::ErrorStatus> prepareModelFromCache(
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const HidlToken&,
-        const android::sp<V1_2::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_0::ErrorStatus::GENERAL_FAILURE;
+        return hal_1_2::ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+                                                               m_Options,
+                                                               modelCacheHandle,
+                                                               dataCacheHandle,
+                                                               token,
+                                                               cb);
     }
 
     Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
         const V1_3::OptionalTimePoint&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const HidlToken&,
-        const android::sp<V1_3::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb)
     {
-        ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_3::ErrorStatus::GENERAL_FAILURE;
+        ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache_1_3()");
+
+        return ArmnnDriverImpl::prepareModelFromCache_1_3(m_Runtime,
+                                                          m_Options,
+                                                          modelCacheHandle,
+                                                          dataCacheHandle,
+                                                          token,
+                                                          cb);
     }
 
     Return<void> allocate(const V1_3::BufferDesc& /*desc*/,
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index 3ecd2f82..e1d65f92 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -8,8 +8,12 @@
 #include "../ModelToINetworkConverter.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
+#include <armnnDeserializer/IDeserializer.hpp>
+
 #include <log/log.h>
 
+#include <sys/stat.h>
+
 namespace
 {
 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
@@ -100,6 +104,9 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
        const DriverOptions& options,
        const V1_3::Model& model,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+       const HidlToken& token,
        const android::sp<V1_3::IPreparedModelCallback>& cb,
        bool float32ToFloat16,
        V1_3::Priority priority)
@@ -138,8 +145,13 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
 
     // Serialize the network graph to a .armnn file if an output directory
     // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
     auto serializedNetworkFileName =
-        SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         serializeToFile);
 
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
@@ -147,12 +159,42 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
 
+    int cachedFd = -1;
+    bool saveCachedNetwork = options.SaveCachedNetwork();
+
+    unsigned int numberOfCachedModelFiles = 0;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                numberOfCachedModelFiles += numberOfCacheFiles;
+                if (modelCacheHandle[index]->numFds == 1)
+                {
+                    // For GpuAcc numberOfCachedFiles is 1
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        cachedFd = modelCacheHandle[index]->data[0];
+                        saveCachedNetwork = true;
+                    }
+                }
+                index += numberOfCachedModelFiles;
+            }
+        }
+    }
+
     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
-        { "SaveCachedNetwork", options.SaveCachedNetwork() },
+        { "SaveCachedNetwork", saveCachedNetwork },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
-        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", cachedFd }
     });
 
     armnn::BackendOptions cpuAcc("CpuAcc",
@@ -203,7 +245,11 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
-                                                MemorySource::Undefined);
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    auto numInputs  = getMainModel(model).inputIndexes.size();
+    auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
@@ -239,28 +285,345 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
 
     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    if (!preparedModel->ExecuteWithDummyInputs())
+    // Only run this if the GpuAcc backend has been added to options
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
     {
-        return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+        {
+            return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        }
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+        {
+            // Now that we've done one inference the CL kernel parameters will have been tuned,
+            // so save the updated file.
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+                      options.GetClTunedParametersFile().c_str(), error.what());
+            }
+        }
+    }
+    size_t hashValue = 0;
+    // Cache the model
+    if (dataCacheHandle.size() > 0)
+    {
+        // Cache the Arm NN model
+        if (dataCacheHandle.size() != 1)
+        {
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        if (dataCacheHandle[0]->numFds != 1)
+        {
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+        int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+        if (dataCacheFileAccessMode != O_RDWR)
+        {
+            ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+        hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
     }
 
-    if (clTunedParameters &&
-        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+    // Cache the model data
+    if (modelCacheHandle.size() > 0)
     {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
+        if (modelCacheHandle.size() != numberOfCachedModelFiles)
         {
-            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
         }
-        catch (std::exception& error)
+
+        for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
         {
-            ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
-                  options.GetClTunedParametersFile().c_str(), error.what());
+            if (modelCacheHandle[i]->numFds == 1)
+            {
+                int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDONLY)
+                {
+                    struct stat statBuffer;
+                    if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+                    {
+                        long modelDataSize = statBuffer.st_size;
+                        if (modelDataSize > 0)
+                        {
+                            std::vector<uint8_t> modelData(modelDataSize);
+                            pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+                            hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+                        }
+                    }
+                }
+            }
         }
     }
+    if (hashValue != 0)
+    {
+        CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+    }
 
     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+    return V1_3::ErrorStatus::NONE;
+}
+
+Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
+    const armnn::IRuntimePtr& runtime,
+    const DriverOptions& options,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+    const HidlToken& token,
+    const android::sp<V1_3::IPreparedModelCallback>& cb)
+{
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
 
+    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (cb.get() == nullptr)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
+        return V1_3::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    if (!runtime)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
+        return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
+    }
+
+    // DataCacheHandle size should always be 1
+    // Arm NN model
+    if (dataCacheHandle.size() != 1)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Check if model files cached they match the expected value
+    unsigned int numberOfCachedModelFiles = 0;
+    for (auto& backend : options.GetBackends())
+    {
+        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+    }
+    if (modelCacheHandle.size() != numberOfCachedModelFiles)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->numFds != 1)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+    if (dataCacheFileAccessMode != O_RDWR)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+    if (dataSize == 0)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int offset = 0;
+    {
+        struct stat statBuffer;
+        if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+        {
+            unsigned long bufferSize = statBuffer.st_size;
+            if (bufferSize <= 0)
+            {
+                ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+                cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                return V1_3::ErrorStatus::GENERAL_FAILURE;
+            }
+            if (bufferSize > dataSize)
+            {
+                offset = bufferSize - dataSize;
+            }
+        }
+    }
+    std::vector<uint8_t> dataCacheData(dataSize);
+    pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+    auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+    int gpuAccCachedFd = -1;
+    bool saveCachedNetwork = false;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                if (modelCacheHandle[index]->numFds != 1)
+                {
+                    ALOGW(
+                       "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
+                    cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                    return V1_3::ErrorStatus::GENERAL_FAILURE;
+                }
+                auto cachedFd = modelCacheHandle[index]->data[0];
+
+                int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDWR)
+                {
+                    cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                    return V1_3::ErrorStatus::GENERAL_FAILURE;
+                }
+
+                struct stat statBuffer;
+                if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+                {
+                    long modelDataSize = statBuffer.st_size;
+                    if (modelDataSize > 0)
+                    {
+                        std::vector<uint8_t> modelData(modelDataSize);
+                        pread(cachedFd, modelData.data(), modelData.size(), 0);
+                        hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+                        // For GpuAcc numberOfCachedFiles is 1
+                        if (backend == armnn::Compute::GpuAcc)
+                        {
+                            gpuAccCachedFd = cachedFd;
+                        }
+                    }
+                }
+                index += numberOfCacheFiles;
+            }
+        }
+    }
+
+    if (!CacheDataHandlerInstance().Validate(token, hashValue))
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Deserialize the network..
+    auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptions OptOptions;
+    OptOptions.m_ReduceFp32ToFp16 = options.GetFp16Enabled();
+    OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+                                         {"FastMathEnabled",       options.IsFastMathEnabled()},
+                                         {"SaveCachedNetwork",     saveCachedNetwork},
+                                         {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+                                         {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
+                                         {"CachedFileDescriptor",  gpuAccCachedFd}
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         {"FastMathEnabled", options.IsFastMathEnabled()},
+                                         {"NumberOfThreads", options.GetNumberOfThreads()}
+                                 });
+    OptOptions.m_ModelOptions.push_back(gpuAcc);
+    OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*network.get(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from optimize.";
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
+            new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
+                                                           runtime.get(),
+                                                           options.GetRequestInputsAndOutputsDumpDir(),
+                                                           options.IsGpuProfilingEnabled(),
+                                                           V1_3::Priority::MEDIUM,
+                                                           options.isAsyncModelExecutionEnabled(),
+                                                           options.getNoOfArmnnThreads(),
+                                                           true));
+
+    NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
     return V1_3::ErrorStatus::NONE;
 }
 
diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp
index 3c094fe5..a482edac 100644
--- a/1.3/ArmnnDriverImpl.hpp
+++ b/1.3/ArmnnDriverImpl.hpp
@@ -7,6 +7,7 @@
 
 #include <HalInterfaces.h>
 
+#include "../CacheDataHandler.hpp"
 #include "../DriverOptions.hpp"
 
 #include <armnn/ArmNN.hpp>
@@ -31,13 +32,27 @@ namespace hal_1_3
 class ArmnnDriverImpl
 {
 public:
-    static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime,
-                                                           const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
-                                                           const DriverOptions& options,
-                                                           const V1_3::Model& model,
-                                                           const android::sp<V1_3::IPreparedModelCallback>& cb,
-                                                           bool float32ToFloat16 = false,
-                                                           V1_3::Priority priority = V1_3::Priority::MEDIUM);
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+    static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const V1_3::Model& model,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false,
+        V1_3::Priority priority = V1_3::Priority::MEDIUM);
+
+    static Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb);
 
     static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
                                             V1_3::IDevice::getCapabilities_1_3_cb cb);
-- 
cgit v1.2.1